Skip to content

Commit

Permalink
Add env var for disabling cache
Browse files Browse the repository at this point in the history
  • Loading branch information
sir-sigurd committed Mar 10, 2021
1 parent 1ecd1c5 commit 803a7c9
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 20 deletions.
57 changes: 38 additions & 19 deletions api/python/quilt3/packages.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import gc
import hashlib
import inspect
Expand All @@ -6,6 +7,7 @@
import os
import pathlib
import shutil
import tempfile
import textwrap
import time
import uuid
Expand All @@ -16,7 +18,7 @@
import jsonlines
from tqdm import tqdm

from . import workflows
from . import util, workflows
from .backends import get_package_registry
from .data_transfer import (
calculate_sha256,
Expand Down Expand Up @@ -218,7 +220,7 @@ def get_cached_path(self):
"""
Returns a locally cached physical key, if available.
"""
if not self.physical_key.is_local():
if util.IS_CACHE_ENABLED and not self.physical_key.is_local():
return ObjectPathCache.get(str(self.physical_key))
return None

Expand Down Expand Up @@ -499,10 +501,11 @@ def install(cls, name, registry=None, top_hash=None, dest=None, dest_registry=No
# Copy the datafiles in the package.
physical_key = entry.physical_key

# Try a local cache.
cached_file = ObjectPathCache.get(str(physical_key))
if cached_file is not None:
physical_key = PhysicalKey.from_path(cached_file)
if util.IS_CACHE_ENABLED:
# Try a local cache.
cached_file = ObjectPathCache.get(str(physical_key))
if cached_file is not None:
physical_key = PhysicalKey.from_path(cached_file)

new_physical_key = dest_parsed.join(logical_key)
if physical_key != new_physical_key:
Expand All @@ -512,7 +515,11 @@ def _maybe_add_to_cache(old: PhysicalKey, new: PhysicalKey, _):
if not old.is_local() and new.is_local():
ObjectPathCache.set(str(old), new.path)

copy_file_list(file_list, callback=_maybe_add_to_cache, message="Copying objects")
copy_file_list(
file_list,
callback=_maybe_add_to_cache if util.IS_CACHE_ENABLED else None,
message="Copying objects",
)

pkg._build(name, registry=dest_registry, message=message)
if top_hash is None:
Expand Down Expand Up @@ -583,18 +590,30 @@ def _browse(cls, name, registry=None, top_hash=None):
registry.resolve_top_hash(name, top_hash)
)
pkg_manifest = registry.manifest_pk(name, top_hash)
if pkg_manifest.is_local():
local_pkg_manifest = pkg_manifest.path
else:
local_pkg_manifest = CACHE_PATH / "manifest" / _filesystem_safe_encode(str(pkg_manifest))
if not local_pkg_manifest.exists():
# Copy to a temporary file first, to make sure we don't cache a truncated file
# if the download gets interrupted.
tmp_path = local_pkg_manifest.with_suffix('.tmp')
copy_file(pkg_manifest, PhysicalKey.from_path(tmp_path), message="Downloading manifest")
tmp_path.rename(local_pkg_manifest)

return cls._from_path(local_pkg_manifest)

def download_manifest(dst):
copy_file(pkg_manifest, PhysicalKey.from_path(dst), message="Downloading manifest")

with contextlib.ExitStack() as stack:
if pkg_manifest.is_local():
local_pkg_manifest = pkg_manifest.path
elif util.IS_CACHE_ENABLED:
local_pkg_manifest = CACHE_PATH / "manifest" / _filesystem_safe_encode(str(pkg_manifest))
if not local_pkg_manifest.exists():
# Copy to a temporary file first, to make sure we don't cache a truncated file
# if the download gets interrupted.
tmp_path = local_pkg_manifest.with_suffix('.tmp')
download_manifest(tmp_path)
tmp_path.rename(local_pkg_manifest)
else:
# This tmp file has to closed before downloading, because on Windows it can't be
# opened for concurrent access.
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
local_pkg_manifest = tmp_file.name
stack.callback(os.unlink, local_pkg_manifest)
download_manifest(local_pkg_manifest)

return cls._from_path(local_pkg_manifest)

@classmethod
def _from_path(cls, path):
Expand Down
9 changes: 8 additions & 1 deletion api/python/quilt3/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
import yaml
from appdirs import user_cache_dir, user_data_dir


def get_bool_from_env(var_name: str):
return os.getenv(var_name, '').lower() == 'true'


APP_NAME = "Quilt"
APP_AUTHOR = "QuiltData"
BASE_DIR = user_data_dir(APP_NAME, APP_AUTHOR)
Expand All @@ -30,8 +35,10 @@
OPEN_DATA_URL = "https://open.quiltdata.com"

PACKAGE_NAME_FORMAT = r"([\w-]+/[\w-]+)(?:/(.+))?$"
DISABLE_TQDM = os.getenv('QUILT_MINIMIZE_STDOUT', '').lower() == 'true'
DISABLE_TQDM = get_bool_from_env('QUILT_MINIMIZE_STDOUT')
PACKAGE_UPDATE_POLICY = {'incoming', 'existing'}
IS_CACHE_ENABLED = not get_bool_from_env('QUILT_DISABLE_CACHE')


# CONFIG_TEMPLATE
# Must contain every permitted config key, as well as their default values (which can be 'null'/None).
Expand Down
39 changes: 39 additions & 0 deletions api/python/tests/integration/test_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -1386,6 +1386,45 @@ def test_install(self):
'test/foo/foo',
)))

@pytest.mark.usefixtures('isolate_packages_cache')
@patch('quilt3.util.IS_CACHE_ENABLED', False)
@patch('quilt3.packages.ObjectPathCache')
def test_install_disabled_cache(self, object_path_cache_mock):
registry = 's3://my-test-bucket'
pkg_registry = self.S3PackageRegistryDefault(PhysicalKey.from_url(registry))
pkg_name = 'Quilt/Foo'

# Install a package twice and make sure cache functions weren't called.
for x in range(2):
self.setup_s3_stubber_pkg_install(
pkg_registry, pkg_name, manifest=REMOTE_MANIFEST.read_bytes(),
entries=(
('s3://my_bucket/my_data_pkg/bar.csv', b'a,b,c'),
('s3://my_bucket/my_data_pkg/baz/bat', b'Hello World!'),
('s3://my_bucket/my_data_pkg/foo', '💩'.encode()),
),
)
with patch('quilt3.data_transfer.MAX_CONCURRENCY', 1):
Package.install(pkg_name, registry=registry, dest='package')
object_path_cache_mock.get.assert_not_called()
object_path_cache_mock.set.assert_not_called()

@pytest.mark.usefixtures('isolate_packages_cache')
@patch('quilt3.util.IS_CACHE_ENABLED', False)
@patch('quilt3.packages.ObjectPathCache')
def test_package_entry_disabled_cache(self, object_path_cache_mock):
registry = 's3://my-test-bucket'
pkg_registry = self.S3PackageRegistryDefault(PhysicalKey.from_url(registry))
pkg_name = 'Quilt/Foo'

self.setup_s3_stubber_pkg_install(
pkg_registry, pkg_name, manifest=REMOTE_MANIFEST.read_bytes(),
)
pkg = Package.browse(pkg_name, registry=registry)
for lk, entry in pkg.walk():
assert entry.get_cached_path() is None
object_path_cache_mock.get.assert_not_called()

def test_install_subpackage_deprecated_and_new(self):
pkg_name = 'Quilt/Foo'
bucket = 'my-test-bucket'
Expand Down
5 changes: 5 additions & 0 deletions docs/API Reference/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,11 @@ Turn off TQDM progress bars for log files. Defaults to `False`
```
$ export QUILT_MINIMIZE_STDOUT=true
```
### `QUILT_DISABLE_CACHE`
Turn off cache. Defaults to `False`.
```
$ export QUILT_DISABLE_CACHE=true
```

### `QUILT_TRANSFER_MAX_CONCURRENCY`
Number of threads for file transfers. Defaults to `10`.
Expand Down
1 change: 1 addition & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# unreleased - YYYY-MM-DD
## Python API
* [Added] `QUILT_TRANSFER_MAX_CONCURRENCY` environment variable ([#2092](https://github.com/quiltdata/quilt/pull/2092))
* [Added] `QUILT_DISABLE_CACHE` environment variable ([#2091](https://github.com/quiltdata/quilt/pull/2091))
* [Changed] Removed unused dependency on `packaging` ([#2090](https://github.com/quiltdata/quilt/pull/2090))

## CLI
Expand Down

0 comments on commit 803a7c9

Please sign in to comment.