diff --git a/playwrightcapture/capture.py b/playwrightcapture/capture.py index 1c8abee..7970033 100644 --- a/playwrightcapture/capture.py +++ b/playwrightcapture/capture.py @@ -18,7 +18,7 @@ from logging import LoggerAdapter, Logger from tempfile import NamedTemporaryFile from typing import Any, TypedDict, Literal, TYPE_CHECKING, MutableMapping, Generator -from urllib.parse import urlparse, unquote, urljoin +from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit from zipfile import ZipFile import aiohttp @@ -164,7 +164,7 @@ def __init__(self, browser: BROWSER | None=None, device_name: str | None=None, self.proxy: ProxySettings = {} if proxy: if isinstance(proxy, str): - self.proxy = {'server': proxy} + self.proxy = self.__prepare_proxy_playwright(proxy) elif isinstance(proxy, dict): self.proxy = {'server': proxy['server'], 'bypass': proxy.get('bypass', ''), 'username': proxy.get('username', ''), @@ -187,6 +187,19 @@ def __init__(self, browser: BROWSER | None=None, device_name: str | None=None, self._locale: str = '' self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None + def __prepare_proxy_playwright(self, proxy: str) -> ProxySettings: + splitted = urlsplit(proxy) + if splitted.username and splitted.password: + return {'username': splitted.username, 'password': splitted.password, + 'server': urlunsplit((splitted.scheme, f'{splitted.hostname}:{splitted.port}', splitted.path, splitted.query, splitted.fragment))} + return {'server': proxy} + + def __prepare_proxy_aiohttp(self, proxy: ProxySettings) -> str: + if 'username' in proxy and 'password' in proxy: + splitted = urlsplit(proxy['server']) + return urlunsplit((splitted.scheme, f'{proxy["username"]}:{proxy["password"]}@{splitted.netloc}', splitted.path, splitted.query, splitted.fragment)) + return proxy['server'] + async def __aenter__(self) -> Capture: '''Launch the browser''' self._temp_harfile = NamedTemporaryFile(delete=False) @@ -1395,9 +1408,9 @@ async def get_favicons(self, rendered_url: str, rendered_content: str) -> set[by Method inspired by https://github.com/ail-project/ail-framework/blob/master/bin/lib/crawlers.py """ connector = None - if self.proxy and self.proxy.get('server'): + if self.proxy: # NOTE 2024-05-17: switch to async to fetch, the lib uses socks5h by default - connector = ProxyConnector.from_url(self.proxy['server']) + connector = ProxyConnector.from_url(self.__prepare_proxy_aiohttp(self.proxy)) extracted_favicons = self.__extract_favicons(rendered_content) if not extracted_favicons: