From f321b8318bf32cae7e84addfcaee6436e3498f0c Mon Sep 17 00:00:00 2001 From: Aaron Collier Date: Mon, 30 Sep 2024 16:34:55 -0700 Subject: [PATCH] Refactoring --- dlme_airflow/utils/partition_url_builder.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/dlme_airflow/utils/partition_url_builder.py b/dlme_airflow/utils/partition_url_builder.py index afbd229..5f7af88 100644 --- a/dlme_airflow/utils/partition_url_builder.py +++ b/dlme_airflow/utils/partition_url_builder.py @@ -66,16 +66,11 @@ def _prefetch_page_urls(self): ids = [] while True: api_endpoint = self.paging_config['pages_url'].format(offset,self.paging_config['limit']) - print(f"Fetching {api_endpoint}") data = self._fetch_provider_data(api_endpoint)[self.paging_config['page_data']] offset += self.paging_config["limit"] harvested = len(data) - for i in data: - if validators.url(i['id']): - ids.append(i['id']) - else: - ids.append(f"{self.collection_url}{i['id']}") + ids += self._extract_ids(data) if harvested < self.paging_config["limit"]: break @@ -90,3 +85,11 @@ def _fetch_provider_data(self, url): resp = requests.get(url, headers=headers) if resp.status_code == 200: return resp.json() + + def _extract_ids(self, data): + return [self._format_id(i['id']) for i in data] + + def _format_id(self, id): + if validators.url(id): + return id + return f"{self.collection_url}{id}"