Skip to content

Commit

Permalink
🎉 Source Github: add Retry for GraphQL API Resource limitations (#14376)
Browse files Browse the repository at this point in the history
Signed-off-by: Sergey Chvalyuk <[email protected]>
  • Loading branch information
grubberr authored Jul 4, 2022
1 parent efc27a3 commit 38d8d60
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@
- name: GitHub
sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
dockerRepository: airbyte/source-github
dockerImageTag: 0.2.40
dockerImageTag: 0.2.41
documentationUrl: https://docs.airbyte.io/integrations/sources/github
icon: github.svg
sourceType: api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2593,7 +2593,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-github:0.2.40"
- dockerImage: "airbyte/source-github:0.2.41"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/github"
connectionSpecification:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-github/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.40
LABEL io.airbyte.version=0.2.41
LABEL io.airbyte.name=airbyte/source-github
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,24 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str,
page = dict(parse.parse_qsl(parsed_link.query)).get("page")
return {"page": page}

def check_graphql_rate_limited(self, response_json) -> bool:
errors = response_json.get("errors")
if errors:
for error in errors:
if error.get("type") == "RATE_LIMITED":
return True
return False

def should_retry(self, response: requests.Response) -> bool:
# We don't call `super()` here because we have custom error handling and GitHub API sometimes returns strange
# errors. So in `read_records()` we have custom error handling which don't require to call `super()` here.
retry_flag = (
# The GitHub GraphQL API has limitations
# https://docs.github.com/en/graphql/overview/resource-limitations
(response.headers.get("X-RateLimit-Resource") == "graphql" and self.check_graphql_rate_limited(response.json()))
# Rate limit HTTP headers
# https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limit-http-headers
response.headers.get("X-RateLimit-Remaining") == "0"
or response.headers.get("X-RateLimit-Remaining") == "0"
# Secondary rate limits
# https://docs.github.com/en/rest/overview/resources-in-the-rest-api#secondary-rate-limits
or response.headers.get("Retry-After")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
PullRequestCommentReactions,
PullRequestCommits,
PullRequests,
PullRequestStats,
Releases,
Repositories,
Reviews,
Expand Down Expand Up @@ -113,6 +114,39 @@ def request_callback(request):
assert responses.calls[1].request.url == "https://api.github.com/orgs/airbytehq?per_page=100"


@responses.activate
@patch("time.sleep")
@patch("time.time", return_value=1655804424.0)
def test_graphql_rate_limited(time_mock, sleep_mock):
response_objects = [
(
HTTPStatus.OK,
{"X-RateLimit-Limit": "5000", "X-RateLimit-Resource": "graphql", "X-RateLimit-Reset": "1655804724"},
json.dumps({"errors": [{"type": "RATE_LIMITED"}]}),
),
(
HTTPStatus.OK,
{"X-RateLimit-Limit": "5000", "X-RateLimit-Resource": "graphql", "X-RateLimit-Reset": "1655808324"},
json.dumps({"data": {"repository": None}}),
),
]

responses.add_callback(
responses.POST,
"https://api.github.com/graphql",
callback=lambda r: response_objects.pop(0),
content_type="application/json",
)

stream = PullRequestStats(repositories=["airbytehq/airbyte"], page_size_for_large_streams=30)
records = read_full_refresh(stream)
assert records == []
assert len(responses.calls) == 2
assert responses.calls[0].request.url == "https://api.github.com/graphql"
assert responses.calls[1].request.url == "https://api.github.com/graphql"
assert sum([c[0][0] for c in sleep_mock.call_args_list]) > 300


@responses.activate
def test_stream_teams_404():
organization_args = {"organizations": ["org_name"]}
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/github.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ The GitHub connector should not run into GitHub API limitations under normal usa

| Version | Date | Pull Request | Subject |
|:--------|:-----------| :--- |:-------------------------------------------------------------------------------------------------------------|
| 0.2.41 | 2022-07-03 | [14376](https://github.com/airbytehq/airbyte/pull/14376) | Add Retry for GraphQL API Resource limitations |
| 0.2.40 | 2022-07-01 | [14338](https://github.com/airbytehq/airbyte/pull/14338) | Revert: "Rename field `mergeable` to `is_mergeable`" |
| 0.2.39 | 2022-06-30 | [14274](https://github.com/airbytehq/airbyte/pull/14274) | Rename field `mergeable` to `is_mergeable` |
| 0.2.38 | 2022-06-27 | [13989](https://github.com/airbytehq/airbyte/pull/13989) | Use GraphQL for `reviews` stream |
Expand Down

0 comments on commit 38d8d60

Please sign in to comment.