From 441d7cde764b1b5b7b2af26e120d4636d98a1612 Mon Sep 17 00:00:00 2001 From: Yevhenii <34103125+yevhenii-ldv@users.noreply.github.com> Date: Thu, 2 Sep 2021 16:31:44 +0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Source=20Github:=20handling=20em?= =?UTF-8?q?pty=20repos,=20check=20method=20using=20RepositoryStats=20strea?= =?UTF-8?q?m=20(#5788)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update error handling for empty repositories, update check method using RepositoryStats stream Co-authored-by: ykurochkin --- .../ef69ef6e-aa7f-4af1-a01d-ef775033524e.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../connectors/source-github/Dockerfile | 2 +- .../source-github/source_github/source.py | 10 ++++------ .../source-github/source_github/streams.py | 18 ++++++++++++++++++ docs/integrations/sources/github.md | 1 + 6 files changed, 26 insertions(+), 9 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ef69ef6e-aa7f-4af1-a01d-ef775033524e.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ef69ef6e-aa7f-4af1-a01d-ef775033524e.json index f10370fa5d52..5d54f24269f0 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ef69ef6e-aa7f-4af1-a01d-ef775033524e.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ef69ef6e-aa7f-4af1-a01d-ef775033524e.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "ef69ef6e-aa7f-4af1-a01d-ef775033524e", "name": "GitHub", "dockerRepository": "airbyte/source-github", - "dockerImageTag": "0.1.8", + "dockerImageTag": "0.1.9", "documentationUrl": "https://docs.airbyte.io/integrations/sources/github", "icon": "github.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 1b075b9d7a6c..e616d9c29fcd 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -39,7 +39,7 @@ - sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e name: GitHub dockerRepository: airbyte/source-github - dockerImageTag: 0.1.8 + dockerImageTag: 0.1.9 documentationUrl: https://docs.airbyte.io/integrations/sources/github icon: github.svg - sourceDefinitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1 diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index c4b7a6fdfb13..4a7315b0af39 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.8 +LABEL io.airbyte.version=0.1.9 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/source_github/source.py b/airbyte-integrations/connectors/source-github/source_github/source.py index 922e4cb27925..75d10581fce4 100644 --- a/airbyte-integrations/connectors/source-github/source_github/source.py +++ b/airbyte-integrations/connectors/source-github/source_github/source.py @@ -50,6 +50,7 @@ PullRequestStats, Releases, Repositories, + RepositoryStats, ReviewComments, Reviews, Stargazers, @@ -88,15 +89,12 @@ def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> authenticator = self._get_authenticator(config["access_token"]) repositories = self._generate_repositories(config=config, authenticator=authenticator) - # We should use the most poorly filled stream to use the `list` method, - # because when using the `next` method, we can get the `StopIteration` error. - projects_stream = Projects( + repository_stats_stream = RepositoryStats( authenticator=authenticator, repositories=repositories, - start_date=config["start_date"], ) - for stream in projects_stream.stream_slices(sync_mode=SyncMode.full_refresh): - list(projects_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream)) + for stream_slice in repository_stats_stream.stream_slices(sync_mode=SyncMode.full_refresh): + next(repository_stats_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice)) return True, None except Exception as e: return False, repr(e) diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 9885022ecea4..aa652f7ea867 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -129,6 +129,11 @@ def read_records(self, stream_slice: Mapping[str, any] = None, **kwargs) -> Iter # For private repositories `Teams` stream is not available and we get "404 Client Error: Not Found for # url: https://api.github.com/orgs/sherifnada/teams?per_page=100" error. error_msg = f"Syncing `Team` stream isn't available for repository `{stream_slice['repository']}`." + elif e.response.status_code == requests.codes.CONFLICT: + error_msg = ( + f"Syncing `{self.name}` stream isn't available for repository " + f"`{stream_slice['repository']}`, it seems like this repository is empty." + ) else: self.logger.error(f"Undefined error while reading records: {error_msg}") raise e @@ -278,6 +283,19 @@ def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[ # Below are full refresh streams +class RepositoryStats(GithubStream): + """ + This stream is technical and not intended for the user, we use it for checking connection with the repository. + API docs: https://docs.github.com/en/rest/reference/repos#get-a-repository + """ + + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: + return f"repos/{stream_slice['repository']}" + + def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]: + yield response.json() + + class Assignees(GithubStream): """ API docs: https://docs.github.com/en/rest/reference/issues#list-assignees diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 7c6d5259bd6a..4dd5ea945273 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -93,6 +93,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| 0.1.9 | 2021-09-02 | [5788](https://github.com/airbytehq/airbyte/pull/5788) | Handling empty repository, check method using RepositoryStats stream | | 0.1.8 | 2021-09-01 | [5757](https://github.com/airbytehq/airbyte/pull/5757) | Add more streams | | 0.1.7 | 2021-08-27 | [5696](https://github.com/airbytehq/airbyte/pull/5696) | Handle negative backoff values | | 0.1.6 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5223) | Add MultipleTokenAuthenticator |