Skip to content

Commit

Permalink
Retries and performance improvements (#9)
Browse files Browse the repository at this point in the history
* add _session_with_retries utility function

* add retries to artifactory calls

* add retries to upstream calls

* greatly reduce requests to artifactory for iterating over collections

* cleanup

* fix tests
  • Loading branch information
briantist authored Aug 8, 2022
1 parent ce0f93c commit db35a31
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 17 deletions.
6 changes: 3 additions & 3 deletions galactory/upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from flask import current_app, abort, Response

from . import constants as C

from .utilities import _session_with_retries

class _CacheEntry:
_raw = {}
Expand Down Expand Up @@ -140,7 +140,7 @@ def _set_cache(self, request, cache) -> None:
@contextmanager
def proxy_download(self, request):
req = self._rewrite_to_upstream(request, self._upstream)
with requests.Session() as s:
with _session_with_retries() as s:
try:
resp = s.send(req, stream=True)
resp.raise_for_status()
Expand All @@ -157,7 +157,7 @@ def proxy(self, request):

if cache.empty or cache.expired:
req = self._rewrite_to_upstream(request, self._upstream)
with requests.Session() as s:
with _session_with_retries() as s:
try:
resp = s.send(req)
resp.raise_for_status()
Expand Down
62 changes: 53 additions & 9 deletions galactory/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,43 @@
from tempfile import SpooledTemporaryFile
from contextlib import contextmanager
from urllib.request import urlopen
from urllib3 import Retry
from requests.adapters import HTTPAdapter
from requests import Session

from flask import url_for, request, current_app
from artifactory import ArtifactoryPath
from dohq_artifactory.auth import XJFrogArtApiAuth


def authorize(request, artifactory_path) -> ArtifactoryPath:
def _session_with_retries(retry=None, auth=None) -> Session:
if retry is None:
retry = Retry(connect=5, read=3, redirect=2, status=6, other=3, backoff_factor=0.1, raise_on_status=False)

adapter = HTTPAdapter(max_retries=retry)
session = Session()
session.auth = auth
session.mount('http://', adapter)
session.mount('https://', adapter)

return session


def authorize(request, artifactory_path, retry=None) -> ArtifactoryPath:
auth = None
apikey = current_app.config['ARTIFACTORY_API_KEY']

if current_app.config['USE_GALAXY_KEY'] and (not current_app.config['PREFER_CONFIGURED_KEY'] or not apikey):
authorization = request.headers.get('Authorization')
if authorization:
apikey = authorization.split(' ')[1]

target = artifactory_path
if apikey:
target = ArtifactoryPath(target, apikey=apikey)
auth = XJFrogArtApiAuth(apikey)

session = _session_with_retries(retry=retry, auth=auth)
return ArtifactoryPath(artifactory_path, session=session)

return target

# TODO: this relies on a paid feature
# We can work around it by parsing the archives as we upload,
Expand All @@ -41,18 +60,43 @@ def load_manifest_from_artifactory(artifact):
return manifest


def discover_collections(repo, namespace=None, name=None, version=None):
def discover_collections(repo, namespace=None, name=None, version=None, fast_detection=True):
for p in repo:
props = p.properties
if fast_detection:
# we're going to use the naming convention to eliminate candidates early,
# to avoid excessive additional requests for properties and stat that slow
# down the listing immensely as the number of collections grows.
try:
f_namespace, f_name, f_version = p.name.replace('.tar.gz', '').split('-')
except ValueError:
pass
else:
if not all(
(
not namespace or f_namespace == namespace,
not name or f_name == name,
not version or f_version == version
)
):
continue

info = p.stat()
if info.is_dir:
continue

if info.is_dir or not props.get('version'):
props = p.properties
if not props.get('version'): # TODO: change to collection_info
continue

manifest = load_manifest_from_artifactory(p)
if 'collection_info' in props:
collection_info = json.loads(props['collection_info'][0])
else:
# fallback for now just in case, we expect this never to be hit
# TODO: remove in the next version
collection_info = load_manifest_from_artifactory(p)['collection_info']

coldata = {
'collection_info': manifest['collection_info'],
'collection_info': collection_info,
'fqcn': props['fqcn'][0],
'created': info.ctime.isoformat(),
'modified': info.mtime.isoformat(),
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def properties(self):
ci = self._galactory_get_manifest('collection_info')

return {
'collection_info': json.dumps(ci),
'collection_info': [json.dumps(ci)],
'fqcn': [f"{ci['namespace']}.{ci['name']}"],
'namespace': [ci['namespace']],
'name': [ci['name']],
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/utilities/test_discover_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ def test_discover_collections_any(repository, manifest_loader, namespace, collec
]):
assert len(collections) == 0

assert len(contents) == manifest_loader.call_count

expected_calls = [mock.call(x) for x in contents]
manifest_loader.assert_has_calls(expected_calls, any_order=True)
# TODO: we're phasing out the manifest loader, remove later
# assert len(contents) == manifest_loader.call_count
# expected_calls = [mock.call(x) for x in contents]
# manifest_loader.assert_has_calls(expected_calls, any_order=True)

for c in collections:
assert 'collection_info' in c
Expand Down

0 comments on commit db35a31

Please sign in to comment.