Skip to content

Commit

Permalink
Log all requests/responses (debug level) (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
elacuesta authored Oct 20, 2021
1 parent 5036227 commit b228a9e
Showing 1 changed file with 23 additions and 0 deletions.
23 changes: 23 additions & 0 deletions scrapy_playwright/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Page,
PlaywrightContextManager,
Request as PlaywrightRequest,
Response as PlaywrightResponse,
Route,
)
from scrapy import Spider, signals
Expand All @@ -35,6 +36,26 @@
logger = logging.getLogger("scrapy-playwright")


def _make_request_logger(context_name: str) -> Callable:
def _log_request(request: PlaywrightRequest) -> None:
logger.debug(
f"[Context={context_name}] Request: <{request.method.upper()} {request.url}> "
f"(resource type: {request.resource_type}, referrer: {request.headers.get('referer')})"
)

return _log_request


def _make_response_logger(context_name: str) -> Callable:
def _log_request(response: PlaywrightResponse) -> None:
logger.debug(
f"[Context={context_name}] Response: <{response.status} {response.url}> "
f"(referrer: {response.headers.get('referer')})"
)

return _log_request


class ScrapyPlaywrightDownloadHandler(HTTPDownloadHandler):
def __init__(self, crawler: Crawler) -> None:
super().__init__(settings=crawler.settings, crawler=crawler)
Expand Down Expand Up @@ -107,6 +128,8 @@ async def _create_page(self, request: Request) -> Page:
context = await self._create_browser_context(context_name, context_kwargs)
self.contexts[context_name] = context
page = await context.new_page()
page.on("request", _make_request_logger(context_name))
page.on("response", _make_response_logger(context_name))
self.stats.inc_value("playwright/page_count")
if self.default_navigation_timeout:
page.set_default_navigation_timeout(self.default_navigation_timeout)
Expand Down

0 comments on commit b228a9e

Please sign in to comment.