Skip to content

Commit

Permalink
Implement save and resolve functions (#6)
Browse files Browse the repository at this point in the history
* Add functions to save assets and resolve links.
* Add config to disable SSL verification from requests. 
* Add tests, docstrings
  • Loading branch information
jkanche authored May 13, 2024
1 parent 11aa004 commit 4b8a330
Show file tree
Hide file tree
Showing 14 changed files with 636 additions and 13 deletions.
3 changes: 3 additions & 0 deletions src/gypsum_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@
from .fetch_metadata_schema import fetch_metadata_schema
from .list_assets import list_assets, list_files, list_projects, list_versions
from .s3_config import public_s3_config
from .save_assets import save_version
from .save_file import save_file
from .resolve_links import resolve_links
6 changes: 5 additions & 1 deletion src/gypsum_client/_github.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import requests

from .config import REQUESTS_MOD

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"
Expand Down Expand Up @@ -59,7 +61,9 @@ def start_temp_server():
"client_secret": client_secret,
"code": AUTH_CODE,
}
token_req = requests.post(token_url, headers=headers, json=parameters)
token_req = requests.post(
token_url, headers=headers, json=parameters, verify=REQUESTS_MOD["verify"]
)
token_req.raise_for_status()
token = token_req.json()["access_token"]

Expand Down
53 changes: 48 additions & 5 deletions src/gypsum_client/_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
import re
import shutil
import tempfile
from datetime import datetime, timezone
Expand All @@ -10,6 +11,8 @@
import requests
from filelock import FileLock

from .config import REQUESTS_MOD

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"
Expand Down Expand Up @@ -67,7 +70,7 @@ def _list_for_prefix(
if prefix is not None:
qparams["prefix"] = prefix

req = requests.get(url, params=qparams)
req = requests.get(url, params=qparams, verify=REQUESTS_MOD["verify"])
req.raise_for_status()

resp = req.json()
Expand All @@ -86,7 +89,7 @@ def _list_for_prefix(
def _fetch_json(path: str, url: str):
full_url = f"{url}/file/{quote_plus(path)}"

req = requests.get(full_url)
req = requests.get(full_url, verify=REQUESTS_MOD["verify"])
req.raise_for_status()

return req.json()
Expand Down Expand Up @@ -120,7 +123,12 @@ def _fetch_cacheable_json(


def _save_file(
path: str, destination: str, overwrite: bool, url: str, error: bool = True
path: str,
destination: str,
overwrite: bool,
url: str,
error: bool = True,
verify: Optional[bool] = None,
):
if overwrite is True or not os.path.exists(destination):
os.makedirs(os.path.dirname(destination), exist_ok=True)
Expand All @@ -133,7 +141,10 @@ def _save_file(
try:
full_url = f"{url}/file/{quote_plus(path)}"

req = requests.get(full_url, stream=True)
if verify is None:
verify = REQUESTS_MOD["verify"]

req = requests.get(full_url, stream=True, verify=verify)
req.raise_for_status()

for chunk in req.iter_content(chunk_size=None):
Expand Down Expand Up @@ -176,10 +187,42 @@ def _rename_file(src: str, dest: str):

def _download_and_rename_file(url: str, dest: str):
tmp = tempfile.NamedTemporaryFile(dir=os.path.dirname(dest), delete=False).name
req = requests.get(url, stream=True)
req = requests.get(url, stream=True, verify=REQUESTS_MOD["verify"])

with open(tmp, "wb") as f:
for chunk in req.iter_content():
f.write(chunk)

_rename_file(tmp, dest)


IS_LOCKED = {"locks": {}}


def _acquire_lock(cache: str, project: str, asset: str, version: str):
_key = f"{project}/{asset}/{version}"

if _key in IS_LOCKED["locks"] and IS_LOCKED["locks"][_key] is None:
_path = os.path.join(cache, "status", project, asset, version)
os.makedirs(os.path.dirname(_path), exist_ok=True)

_lock = FileLock(_path)
_lock.acquire()
IS_LOCKED["locks"][_key] = _lock


def _release_lock(project: str, asset: str, version: str):
_key = f"{project}/{asset}/{version}"

if _key in IS_LOCKED["locks"] and IS_LOCKED["locks"][_key] is not None:
_lock = IS_LOCKED["locks"][_key]
_lock.release()
del IS_LOCKED["locks"][_key]


def _sanitize_path(x):
if os.name == "nt":
x = re.sub(r"\\\\", "/", x)

x = re.sub(r"//+", "/", x)
return x
9 changes: 7 additions & 2 deletions src/gypsum_client/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from ._github import github_access_token
from ._utils import _cache_directory, _remove_slash_url, _rest_url
from .config import REQUESTS_MOD

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
Expand Down Expand Up @@ -125,7 +126,7 @@ def set_access_token(
if user_agent:
headers["User-Agent"] = user_agent

r = requests.get(_url, headers=headers)
r = requests.get(_url, headers=headers, verify=REQUESTS_MOD["verify"])
r.raise_for_status()

_info = r.json()
Expand All @@ -145,7 +146,11 @@ def set_access_token(

headers["Authorization"] = f"Bearer {token}"

token_req = requests.get(f"{_remove_slash_url(github_url)}/user", headers=headers)
token_req = requests.get(
f"{_remove_slash_url(github_url)}/user",
headers=headers,
verify=REQUESTS_MOD["verify"],
)
token_req.raise_for_status()

token_resp = token_req.json()
Expand Down
6 changes: 6 additions & 0 deletions src/gypsum_client/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"


REQUESTS_MOD = {"verify": True}
8 changes: 6 additions & 2 deletions src/gypsum_client/fetch_metadata_database.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
import tempfile
import time
import warnings

import requests
from filelock import FileLock

from ._utils import _cache_directory, _download_and_rename_file
from .config import REQUESTS_MOD

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
Expand Down Expand Up @@ -96,10 +98,12 @@ def get_last_modified_date(base_url):
mod_time = None
try:
url = base_url + "modified"
response = requests.get(url)
response = requests.get(url, verify=REQUESTS_MOD["verify"])
mod_time = float(response.text)
except Exception as e:
print("Failed to check the last modified timestamp:", str(e))
warnings.warn(
f"Failed to check the last modified timestamp: {str(e)}", UserWarning
)

if mod_time is not None:
LAST_CHECK["req_time"] = curtime
Expand Down
3 changes: 2 additions & 1 deletion src/gypsum_client/fetch_metadata_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from filelock import FileLock

from ._utils import _cache_directory
from .config import REQUESTS_MOD

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
Expand Down Expand Up @@ -50,7 +51,7 @@ def fetch_metadata_schema(
_lock = FileLock(cache_path)
with _lock:
url = "https://artifactdb.github.io/bioconductor-metadata-index/" + name
response = requests.get(url)
response = requests.get(url, verify=REQUESTS_MOD["verify"])
with open(cache_path, "wb") as f:
f.write(response.content)

Expand Down
7 changes: 6 additions & 1 deletion src/gypsum_client/list_assets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import requests

from ._utils import _list_for_prefix, _rest_url
from .config import REQUESTS_MOD

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
Expand Down Expand Up @@ -99,7 +100,11 @@ def list_files(
if prefix is not None:
_prefix = f"{_prefix}{prefix}"

req = requests.get(f"{url}/list", params={"recursive": "true", "prefix": _prefix})
req = requests.get(
f"{url}/list",
params={"recursive": "true", "prefix": _prefix},
verify=REQUESTS_MOD["verify"],
)
req.raise_for_status()
resp = req.json()

Expand Down
115 changes: 115 additions & 0 deletions src/gypsum_client/resolve_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import atexit
import os
import shutil
from typing import Optional

from ._utils import (
BUCKET_CACHE_NAME,
_acquire_lock,
_cache_directory,
_release_lock,
_rest_url,
)
from .fetch_assets import fetch_manifest
from .save_file import save_file

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"


def resolve_links(
project: str,
asset: str,
version: str,
cache_dir: Optional[str] = None,
overwrite: str = False,
url: str = _rest_url(),
):
"""Resolve links in the cache directory.
Create hard links (or copies, if filesystem links
are not supported) for linked-from files to their
link destinations.
Args:
project:
Project name.
asset:
Asset name.
version:
Version name.
cache_dir:
Path to the cache directory.
overwrite:
Whether to overwrite existing file in cache.
url:
URL to the gypsum compatible API.
Returns:
True if all links are resolved.
"""
cache_dir = _cache_directory(cache_dir)
_acquire_lock(cache_dir, project, asset, version)

def release_lock_wrapper():
_release_lock(project, asset, version)

atexit.register(release_lock_wrapper)

# destination = os.path.join(cache_dir, BUCKET_CACHE_NAME, project, asset, version)
manifests = {}

self_manifest = fetch_manifest(
project, asset, version, cache_dir=cache_dir, url=url
)
manifests["/".join([project, asset, version])] = self_manifest

for kmf in self_manifest.keys():
entry = self_manifest[kmf]
if entry.get("link") is None:
continue

old_loc = os.path.join(project, asset, version, kmf)
if os.path.exists(old_loc) and not overwrite:
continue

link_data = entry["link"]
if link_data.get("ancestor") is not None:
link_data = link_data["ancestor"]

out = save_file(
link_data["project"],
link_data["asset"],
link_data["version"],
link_data["path"],
cache_dir=cache_dir,
url=url,
overwrite=overwrite,
)
old_path = os.path.join(cache_dir, BUCKET_CACHE_NAME, old_loc)

try:
os.unlink(old_path)
except Exception:
pass

os.makedirs(os.path.dirname(old_path), exist_ok=True)

try:
os.link(out, old_path)
except Exception:
try:
os.symlink(out, old_path)
except Exception:
try:
shutil.copy(out, old_path)
except Exception as e:
raise ValueError(f"Failed to resolve link for '{kmf}': {e}")

return True
3 changes: 2 additions & 1 deletion src/gypsum_client/s3_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from filelock import FileLock

from ._utils import _cache_directory, _rest_url
from .config import REQUESTS_MOD

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
Expand Down Expand Up @@ -68,7 +69,7 @@ def public_s3_config(
CREDS_CACHE["info"][cache_dir] = creds
return creds

req = requests.get(url + "/credentials/s3-api")
req = requests.get(url + "/credentials/s3-api", verify=REQUESTS_MOD["verify"])
creds = req.json()

if cache_dir is None:
Expand Down
Loading

0 comments on commit 4b8a330

Please sign in to comment.