Skip to content

Commit

Permalink
Refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
aaron-collier committed Sep 30, 2024
1 parent 670f32e commit daa042c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 12 deletions.
19 changes: 13 additions & 6 deletions dlme_airflow/drivers/iiif_json_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,21 @@
from typing import Any, Optional, Generator
from dlme_airflow.utils.partition_url_builder import PartitionBuilder

container = "dataframe"
name = "iiif_json_v3"
version = "0.0.2"
partition_access = True


class IiifV3JsonSource(intake.source.base.DataSource):
def __init__(self, collection_url, dtype=None, metadata=None, wait=None, paging=None):
container = "dataframe"
name = "iiif_json_v3"
version = "0.0.2"
partition_access = True

def __init__(
self,
collection_url,
dtype=None,
metadata=None,
wait=None,
paging=None
):
super(IiifV3JsonSource, self).__init__(metadata=metadata)
self.collection_url = collection_url
self.dtype = dtype
Expand Down
15 changes: 9 additions & 6 deletions dlme_airflow/utils/partition_url_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,11 @@ def _prefetch_page_urls(self):
ids = []
while True:
api_endpoint = self.paging_config['pages_url'].format(offset,self.paging_config['limit'])
print(f"Fetching {api_endpoint}")
data = self._fetch_provider_data(api_endpoint)[self.paging_config['page_data']]
offset += self.paging_config["limit"]
harvested = len(data)

for i in data:
if validators.url(i['id']):
ids.append(i['id'])
else:
ids.append(f"{self.collection_url}{i['id']}")
ids += self._extract_ids(data)

if harvested < self.paging_config["limit"]:
break
Expand All @@ -90,3 +85,11 @@ def _fetch_provider_data(self, url):
resp = requests.get(url, headers=headers)
if resp.status_code == 200:
return resp.json()

def _extract_ids(self, data):
return [self._format_id(i['id']) for i in data]

def _format_id(self, id):
if validators.url(id):
return id
return f"{self.collection_url}{id}"

0 comments on commit daa042c

Please sign in to comment.