diff --git a/twarc/client.py b/twarc/client.py index 00b30533..bff3c323 100644 --- a/twarc/client.py +++ b/twarc/client.py @@ -51,6 +51,7 @@ def __init__( access_token_secret=None, connection_errors=0, http_errors=0, + max_server_error_retries=30, config=None, profile="", protected=False, @@ -76,6 +77,7 @@ def __init__( self.access_token_secret = access_token_secret self.connection_errors = connection_errors self.http_errors = http_errors + self.max_server_error_retries = max_server_error_retries self.profile = profile self.client = None self.last_response = None @@ -144,7 +146,11 @@ def search( if max_id: params["max_id"] = max_id - resp = self.get(url, params=params) + resp = self.get( + url, + params=params, + max_server_error_retries=self.max_server_error_retries, + ) retrieved_pages += 1 statuses = resp.json()["statuses"] @@ -237,7 +243,11 @@ def premium_search( count = 0 stop = False while not stop: - resp = self.get(url, params=params) + resp = self.get( + url, + params=params, + max_server_error_retries=self.max_server_error_retries, + ) if resp.status_code == 200: data = resp.json() for tweet in data["results"]: @@ -293,7 +303,12 @@ def timeline( params["max_id"] = max_id try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) retrieved_pages += 1 except requests.exceptions.HTTPError as e: if e.response.status_code == 404: @@ -355,7 +370,12 @@ def do_lookup(): url = "https://api.twitter.com/1.1/users/lookup.json" params = {id_type: ids_str} try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) except requests.exceptions.HTTPError as e: if e.response.status_code == 404: log.warning("no users matching %s", ids_str) @@ -391,7 +411,12 @@ def follower_ids(self, user, max_pages=None): while params["cursor"] != 0: try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) retrieved_pages += 1 except requests.exceptions.HTTPError as e: if e.response.status_code == 404: @@ -424,7 +449,12 @@ def friend_ids(self, user, max_pages=None): while params["cursor"] != 0: try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) retrieved_pages += 1 except requests.exceptions.HTTPError as e: if e.response.status_code == 404: @@ -658,7 +688,12 @@ def retweets(self, tweet_ids): tweet_id ) try: - resp = self.get(url, params={"count": 100}, allow_404=True) + resp = self.get( + url, + params={"count": 100}, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) for tweet in resp.json(): yield tweet except requests.exceptions.HTTPError as e: @@ -671,7 +706,7 @@ def trends_available(self): """ url = "https://api.twitter.com/1.1/trends/available.json" try: - resp = self.get(url) + resp = self.get(url, max_server_error_retries=self.max_server_error_retries) except requests.exceptions.HTTPError as e: raise e return resp.json() @@ -687,7 +722,12 @@ def trends_place(self, woeid, exclude=None): if exclude: params["exclude"] = exclude try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) except requests.exceptions.HTTPError as e: if e.response.status_code == 404: log.info("no region matching WOEID %s", woeid) @@ -701,7 +741,11 @@ def trends_closest(self, lat, lon): url = "https://api.twitter.com/1.1/trends/closest.json" params = {"lat": lat, "long": lon} try: - resp = self.get(url, params=params) + resp = self.get( + url, + params=params, + max_server_error_retries=self.max_server_error_retries, + ) except requests.exceptions.HTTPError as e: raise e return resp.json() @@ -789,7 +833,12 @@ def list_members( while params["cursor"] != 0: try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) except requests.exceptions.HTTPError as e: if e.response.status_code == 404: log.error("no matching list") @@ -813,7 +862,9 @@ def oembed(self, tweet_url, **params): url = "https://publish.twitter.com/oembed" params["url"] = tweet_url - resp = self.get(url, params=params) + resp = self.get( + url, params=params, max_server_error_retries=self.max_server_error_retries + ) return resp.json() @@ -1010,7 +1061,7 @@ def validate_keys(self): # Need to explicitly reconnect to confirm the current creds # are used in the session object. self.connect() - self.get(url) + self.get(url, max_server_error_retries=self.max_server_error_retries) return True except requests.HTTPError as e: if e.response.status_code == 401: diff --git a/twarc/decorators.py b/twarc/decorators.py index 93f0f3df..88e526e5 100644 --- a/twarc/decorators.py +++ b/twarc/decorators.py @@ -15,7 +15,7 @@ def rate_limit(f): issue the API call again. """ - def new_f(*args, **kwargs): + def new_f(*args, max_server_error_retries=30, **kwargs): errors = 0 while True: resp = f(*args, **kwargs) @@ -56,7 +56,7 @@ def new_f(*args, **kwargs): resp.url.startswith("https://api.twitter.com/2/tweets/search/all") ): errors += 1 - if errors > 30: + if errors > max_server_error_retries: log.warning("too many errors from Twitter, giving up") resp.raise_for_status() # Shorter wait time than other endpoints for this specific case. Also @@ -76,7 +76,7 @@ def new_f(*args, **kwargs): time.sleep(seconds) elif resp.status_code >= 500: errors += 1 - if errors > 30: + if errors > max_server_error_retries: log.warning("too many errors from Twitter, giving up") resp.raise_for_status() seconds = 60 * errors