Skip to content

Commit

Permalink
feat: comment task
Browse files Browse the repository at this point in the history
  • Loading branch information
fmelihh committed Nov 9, 2024
1 parent ae1d551 commit efea5e4
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 38 deletions.
11 changes: 7 additions & 4 deletions src/recommendation_engine/app/features/comments/db/model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import uuid
import pendulum
from sqlalchemy import Column
from clickhouse_sqlalchemy import engines
from clickhouse_sqlalchemy.types import Int32, String, DateTime, Array
from clickhouse_sqlalchemy.types import Int32, String, DateTime, Array, Nullable

from ....shared_kernel.database.clickhouse import ClickhouseBase

Expand All @@ -10,15 +11,17 @@ class CommentsModel(ClickhouseBase):
__tablename__ = "comments"

id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
provider = Column(String)
rating = Column(Int32)
comment = Column(String)
comment_id = Column(String)
replies = Column(Array(String))
like_count = Column(Int32)
created_at = Column(DateTime, nullable=True)
updated_at = Column(DateTime, nullable=True)
created_at = Column(Nullable(DateTime))
updated_at = Column(Nullable(DateTime))
version = Column(Int32, default=pendulum.now("Europe/Istanbul").int_timestamp)

__table_args__ = (
engines.MergeTree(order_by="id"),
engines.ReplacingMergeTree(order_by="id", version="version"),
{"schema": "default"},
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,28 @@
from ..db.model import CommentsModel
from ..dto.comment import CommentDto
from ....shared_kernel.generator import HashGenerator
from ....shared_kernel.database.clickhouse import get_session


class CommentService:
def parse_all_comments(self, comments):
pass
@staticmethod
def parse_all_comments(
restaurant_id: str, provider: str, comments: list[CommentDto]
):
with get_session() as session:
session.bulk_save_objects(
[
CommentsModel(
provider=provider,
**comments[idx].model_dump(),
id=HashGenerator.generate_unique_hash(
[
provider,
restaurant_id,
comments[idx].comment_id,
]
)
)
for idx in range(len(comments))
]
)
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from addict import Dict
from typing import List

from ..domain.entity.getir import GetirComments
Expand All @@ -9,17 +8,17 @@


class CommentsExtractorService(Extractor):
def __init__(self, provider_type: Providers, **kwargs):
self.kwargs = Dict(**kwargs)
def __init__(self, provider_type: Providers, restaurant_id: str):
self.restaurant_id = restaurant_id
self.provider = self.initialize_provider(provider_type)

def initialize_provider(
self, provider_type: Providers
) -> GetirComments | YemekSepetiComments:
if provider_type == Providers.YEMEK_SEPETI:
return YemekSepetiComments(restaurant_id=self.kwargs.restaurant_id)
return YemekSepetiComments(restaurant_id=self.restaurant_id)
elif provider_type == Providers.GETIR:
return GetirComments(restaurant_id=self.kwargs.restaurant_id)
return GetirComments(restaurant_id=self.restaurant_id)
else:
raise ValueError("Provider is not defined.")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,28 @@

class YemeksepetiRestaurants(BaseEntity, Processor):
HEADERS = {
'Referer': 'https://www.yemeksepeti.com/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15',
'Host': 'tr.fd-api.com',
'Origin': 'https://www.yemeksepeti.com',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Site': 'cross-site',
'Content-Length': '12639',
'Connection': 'keep-alive',
'Authorization': 'Bearer Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzY29wZXMiOlsxXSwiZXhwIjozNTE2Mjg4MTA1LCJzdWIiOiJ0dG11dWRAZ21haWwuY29tIiwiY29tcGFueV9pZGVudGlmaWVyIjoidHRfbXV1ZF91c2VyIiwiaXNfc2VydmljZSI6ZmFsc2V9.Io-7tX4TOYGBrPHvt9Gu1-L5fpy_tSE_t0w9s2w36is',
'Accept-Language': 'en-US,en;q=0.9',
'Accept': 'application/json, text/plain, */*',
'Content-Type': 'application/json;charset=utf-8',
'Accept-Encoding': 'gzip, deflate, br',
'Sec-Fetch-Mode': 'cors',
'Request-Id': '05abe3bf-a5e1-42fa-8fe1-0c12f15b4d29',
'X-FP-API-KEY': 'volo',
'perseus-session-id': '1731008747698.395883673882935828.p3lf6jtoyy',
'perseus-client-id': '1731008747697.711480228086526878.alpf6f0b2w',
'Platform': 'web',
'dps-session-id': 'eyJzZXNzaW9uX2lkIjoiN2Y1MTkwY2VmOWMxYjM3YjU2NjQ4ZDdkMDU5MjRiNTQiLCJwZXJzZXVzX2lkIjoiMTczMTAwODc0NzY5Ny43MTE0ODAyMjgwODY1MjY4NzguYWxwZjZmMGIydyIsInRpbWVzdGFtcCI6MTczMTAwODc1M30=',
'App-Version': 'VENDOR-LIST-MICROFRONTEND.24.45.0049',
'Cookie': '__cf_bm=aw2iUtJ8RDtNR8kQvZrEg6AUuQXgmY_BoRq_0ykKn5M-1731008926-1.0.1.1-63kS46LRbNymh7RoEiwBBCiSwwAvN18.pdu.5Jnh3o8Dl404cVbx0pFUnxAkQmiFG5IsUslAHWHTMbtXNHEytwNik5htDRhssfJm8xaAfu0; _pxhd=l5fq2P4NZH1lFoQbiUpz4t1P61fAQk2CDAULm8Yk-Gf3M49U8C9F5hNvT9ERPq-xuTfOinl2nlH6TvpNhtVPdg==:fPft/kKEenQomg/2ya/Uyx-Y8oD2pK8KJf/06XPMiXMjRsqn7leW2coQI5ahnU2WcsTGx9yyCkeTG2Td8hADp2mlGiYqpo/YRj9URss-6Cc='
"Referer": "https://www.yemeksepeti.com/",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15",
"Host": "tr.fd-api.com",
"Origin": "https://www.yemeksepeti.com",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Site": "cross-site",
"Content-Length": "12639",
"Connection": "keep-alive",
"Authorization": "Bearer Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzY29wZXMiOlsxXSwiZXhwIjozNTE2Mjg4MTA1LCJzdWIiOiJ0dG11dWRAZ21haWwuY29tIiwiY29tcGFueV9pZGVudGlmaWVyIjoidHRfbXV1ZF91c2VyIiwiaXNfc2VydmljZSI6ZmFsc2V9.Io-7tX4TOYGBrPHvt9Gu1-L5fpy_tSE_t0w9s2w36is",
"Accept-Language": "en-US,en;q=0.9",
"Accept": "application/json, text/plain, */*",
"Content-Type": "application/json;charset=utf-8",
"Accept-Encoding": "gzip, deflate, br",
"Sec-Fetch-Mode": "cors",
"Request-Id": "05abe3bf-a5e1-42fa-8fe1-0c12f15b4d29",
"X-FP-API-KEY": "volo",
"perseus-session-id": "1731008747698.395883673882935828.p3lf6jtoyy",
"perseus-client-id": "1731008747697.711480228086526878.alpf6f0b2w",
"Platform": "web",
"dps-session-id": "eyJzZXNzaW9uX2lkIjoiN2Y1MTkwY2VmOWMxYjM3YjU2NjQ4ZDdkMDU5MjRiNTQiLCJwZXJzZXVzX2lkIjoiMTczMTAwODc0NzY5Ny43MTE0ODAyMjgwODY1MjY4NzguYWxwZjZmMGIydyIsInRpbWVzdGFtcCI6MTczMTAwODc1M30=",
"App-Version": "VENDOR-LIST-MICROFRONTEND.24.45.0049",
"Cookie": "__cf_bm=aw2iUtJ8RDtNR8kQvZrEg6AUuQXgmY_BoRq_0ykKn5M-1731008926-1.0.1.1-63kS46LRbNymh7RoEiwBBCiSwwAvN18.pdu.5Jnh3o8Dl404cVbx0pFUnxAkQmiFG5IsUslAHWHTMbtXNHEytwNik5htDRhssfJm8xaAfu0; _pxhd=l5fq2P4NZH1lFoQbiUpz4t1P61fAQk2CDAULm8Yk-Gf3M49U8C9F5hNvT9ERPq-xuTfOinl2nlH6TvpNhtVPdg==:fPft/kKEenQomg/2ya/Uyx-Y8oD2pK8KJf/06XPMiXMjRsqn7leW2coQI5ahnU2WcsTGx9yyCkeTG2Td8hADp2mlGiYqpo/YRj9URss-6Cc=",
}

def __init__(self, geo_value: GeoValue) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
class RestaurantService:
@staticmethod
def parse_all_restaurants(
provider: str, restaurants: list[RestaurantDto], lat: float, lon: float, city: str
provider: str,
restaurants: list[RestaurantDto],
lat: float,
lon: float,
city: str,
):
with get_session() as session:
session.bulk_save_objects(
Expand All @@ -29,3 +33,18 @@ def parse_all_restaurants(
for idx in range(len(restaurants))
]
)

@staticmethod
def retrieve_restaurants_with_pagination(
provider: str, start: int = 0, page: int = 10
) -> list[RestaurantDto]:
with get_session() as session:
restaurants = (
session.query(RestaurantModel)
.filter(RestaurantModel.provider == provider)
.offset(page * start)
.limit(page)
.all()
)

return [RestaurantDto(**restaurant.__dict__) for restaurant in restaurants]
24 changes: 20 additions & 4 deletions src/recommendation_engine/app/tasks/comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,33 @@
CommentService,
CommentsExtractorService,
)
from ..shared_kernel.domain_providers import Providers
from ..features.restaurants.services import RestaurantService


class CommentTask(Task):
__name__ = "CommentTask"

def run(self, *args, **kwargs):
comment_service = CommentService()
comment_extractor = CommentsExtractorService(**kwargs)
for provider in Providers:
counter = 0
while True:
restaurants = RestaurantService.retrieve_restaurants_with_pagination(
provider=provider.value, start=counter, page=100
)
if len(restaurants) == 0:
break

comment_list = comment_extractor.crawl()
comment_service.parse_all_comments(comment_list)
for restaurant in restaurants:
comment_service = CommentService()
comment_extractor = CommentsExtractorService(
provider_type=provider, restaurant_id=restaurant.restaurant_id
)

comment_list = comment_extractor.crawl()
comment_service.parse_all_comments(comment_list)

counter += 1


celery_application.register_task(CommentTask)

0 comments on commit efea5e4

Please sign in to comment.