From a67cd1deb878bc615dbf474f741fe46930d4edac Mon Sep 17 00:00:00 2001 From: Vlad Pronsky Date: Sun, 11 Feb 2024 01:05:22 +0200 Subject: [PATCH] raise exception on proxy failed --- twscrape/account.py | 2 +- twscrape/queue_client.py | 29 +++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/twscrape/account.py b/twscrape/account.py index f88463a..172900a 100644 --- a/twscrape/account.py +++ b/twscrape/account.py @@ -49,7 +49,7 @@ def to_rs(self): rs["last_used"] = rs["last_used"].isoformat() if rs["last_used"] else None return rs - def make_client(self, proxy: str | None) -> AsyncClient: + def make_client(self, proxy: str | None = None) -> AsyncClient: proxies = [proxy, os.getenv("TWS_PROXY"), self.proxy] proxies = [x for x in proxies if x is not None] proxy = proxies[0] if proxies else None diff --git a/twscrape/queue_client.py b/twscrape/queue_client.py index 093c755..7009052 100644 --- a/twscrape/queue_client.py +++ b/twscrape/queue_client.py @@ -2,7 +2,8 @@ import os from typing import Any -from httpx import AsyncClient, HTTPStatusError, ProxyError, ReadTimeout, Response +import httpx +from httpx import AsyncClient, Response from .accounts_pool import Account, AccountsPool from .logger import logger @@ -187,7 +188,7 @@ async def _check_rep(self, rep: Response) -> None: try: rep.raise_for_status() - except HTTPStatusError: + except httpx.HTTPStatusError: logger.error(f"Unhandled API response code: {log_msg}") await self._close_ctx(utc.ts() + 60 * 15) # 15 minutes raise HandledError() @@ -196,7 +197,8 @@ async def get(self, url: str, params: ReqParams = None): return await self.req("GET", url, params=params) async def req(self, method: str, url: str, params: ReqParams = None) -> Response | None: - retry_count = 0 + unknown_retry, connection_retry = 0, 0 + while True: ctx = await self._get_ctx() # not need to close client, class implements __aexit__ if ctx is None: @@ -208,7 +210,7 @@ async def req(self, method: str, url: str, params: ReqParams = None) -> Response await self._check_rep(rep) ctx.req_count += 1 # count only successful - retry_count = 0 + unknown_retry, connection_retry = 0, 0 return rep except AbortReqError: # abort all queries @@ -216,11 +218,22 @@ async def req(self, method: str, url: str, params: ReqParams = None) -> Response except HandledError: # retry with new account continue - except (ReadTimeout, ProxyError): + except (httpx.ReadTimeout, httpx.ProxyError): # http transport failed, just retry with same account continue + except httpx.ConnectError as e: + # if proxy missconfigured or ??? + connection_retry += 1 + if connection_retry >= 3: + raise e except Exception as e: - retry_count += 1 - if retry_count >= 3: - logger.warning(f"Unhandled error {type(e)}: {e}") + unknown_retry += 1 + if unknown_retry >= 3: + msg = [ + "Unknown error. Account timeouted for 15 minutes.", + "Create issue please: https://github.com/vladkens/twscrape/issues", + f"If it mistake, you can unlock account now with `twscrape reset_locks`. Err: {type(e)}: {e}", + ] + + logger.warning(" ".join(msg)) await self._close_ctx(utc.ts() + 60 * 15) # 15 minutes