Skip to content

Commit

Permalink
🐛 Source Github: handling empty repos, check method using RepositoryS…
Browse files Browse the repository at this point in the history
…tats stream (#5788)

* update error handling for empty repositories, update check method using RepositoryStats stream

Co-authored-by: ykurochkin <[email protected]>
  • Loading branch information
yevhenii-ldv and ykurochkin authored Sep 2, 2021
1 parent d3b2e8a commit 441d7cd
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sourceDefinitionId": "ef69ef6e-aa7f-4af1-a01d-ef775033524e",
"name": "GitHub",
"dockerRepository": "airbyte/source-github",
"dockerImageTag": "0.1.8",
"dockerImageTag": "0.1.9",
"documentationUrl": "https://docs.airbyte.io/integrations/sources/github",
"icon": "github.svg"
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
- sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
name: GitHub
dockerRepository: airbyte/source-github
dockerImageTag: 0.1.8
dockerImageTag: 0.1.9
documentationUrl: https://docs.airbyte.io/integrations/sources/github
icon: github.svg
- sourceDefinitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-github/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.1.8
LABEL io.airbyte.version=0.1.9
LABEL io.airbyte.name=airbyte/source-github
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
PullRequestStats,
Releases,
Repositories,
RepositoryStats,
ReviewComments,
Reviews,
Stargazers,
Expand Down Expand Up @@ -88,15 +89,12 @@ def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) ->
authenticator = self._get_authenticator(config["access_token"])
repositories = self._generate_repositories(config=config, authenticator=authenticator)

# We should use the most poorly filled stream to use the `list` method,
# because when using the `next` method, we can get the `StopIteration` error.
projects_stream = Projects(
repository_stats_stream = RepositoryStats(
authenticator=authenticator,
repositories=repositories,
start_date=config["start_date"],
)
for stream in projects_stream.stream_slices(sync_mode=SyncMode.full_refresh):
list(projects_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream))
for stream_slice in repository_stats_stream.stream_slices(sync_mode=SyncMode.full_refresh):
next(repository_stats_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice))
return True, None
except Exception as e:
return False, repr(e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ def read_records(self, stream_slice: Mapping[str, any] = None, **kwargs) -> Iter
# For private repositories `Teams` stream is not available and we get "404 Client Error: Not Found for
# url: https://api.github.com/orgs/sherifnada/teams?per_page=100" error.
error_msg = f"Syncing `Team` stream isn't available for repository `{stream_slice['repository']}`."
elif e.response.status_code == requests.codes.CONFLICT:
error_msg = (
f"Syncing `{self.name}` stream isn't available for repository "
f"`{stream_slice['repository']}`, it seems like this repository is empty."
)
else:
self.logger.error(f"Undefined error while reading records: {error_msg}")
raise e
Expand Down Expand Up @@ -278,6 +283,19 @@ def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[
# Below are full refresh streams


class RepositoryStats(GithubStream):
"""
This stream is technical and not intended for the user, we use it for checking connection with the repository.
API docs: https://docs.github.com/en/rest/reference/repos#get-a-repository
"""

def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
return f"repos/{stream_slice['repository']}"

def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
yield response.json()


class Assignees(GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/issues#list-assignees
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/github.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ Your token should have at least the `repo` scope. Depending on which streams you

| Version | Date | Pull Request | Subject |
| :------ | :-------- | :----- | :------ |
| 0.1.9 | 2021-09-02 | [5788](https://github.com/airbytehq/airbyte/pull/5788) | Handling empty repository, check method using RepositoryStats stream |
| 0.1.8 | 2021-09-01 | [5757](https://github.com/airbytehq/airbyte/pull/5757) | Add more streams |
| 0.1.7 | 2021-08-27 | [5696](https://github.com/airbytehq/airbyte/pull/5696) | Handle negative backoff values |
| 0.1.6 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5223) | Add MultipleTokenAuthenticator |
Expand Down

0 comments on commit 441d7cd

Please sign in to comment.