diff --git a/api/python/quilt3/packages.py b/api/python/quilt3/packages.py index 4a83b55f74d..a6716e0988c 100644 --- a/api/python/quilt3/packages.py +++ b/api/python/quilt3/packages.py @@ -1,3 +1,4 @@ +import contextlib import gc import hashlib import inspect @@ -6,6 +7,7 @@ import os import pathlib import shutil +import tempfile import textwrap import time import uuid @@ -16,7 +18,7 @@ import jsonlines from tqdm import tqdm -from . import workflows +from . import util, workflows from .backends import get_package_registry from .data_transfer import ( calculate_sha256, @@ -218,7 +220,7 @@ def get_cached_path(self): """ Returns a locally cached physical key, if available. """ - if not self.physical_key.is_local(): + if util.IS_CACHE_ENABLED and not self.physical_key.is_local(): return ObjectPathCache.get(str(self.physical_key)) return None @@ -499,10 +501,11 @@ def install(cls, name, registry=None, top_hash=None, dest=None, dest_registry=No # Copy the datafiles in the package. physical_key = entry.physical_key - # Try a local cache. - cached_file = ObjectPathCache.get(str(physical_key)) - if cached_file is not None: - physical_key = PhysicalKey.from_path(cached_file) + if util.IS_CACHE_ENABLED: + # Try a local cache. + cached_file = ObjectPathCache.get(str(physical_key)) + if cached_file is not None: + physical_key = PhysicalKey.from_path(cached_file) new_physical_key = dest_parsed.join(logical_key) if physical_key != new_physical_key: @@ -512,7 +515,11 @@ def _maybe_add_to_cache(old: PhysicalKey, new: PhysicalKey, _): if not old.is_local() and new.is_local(): ObjectPathCache.set(str(old), new.path) - copy_file_list(file_list, callback=_maybe_add_to_cache, message="Copying objects") + copy_file_list( + file_list, + callback=_maybe_add_to_cache if util.IS_CACHE_ENABLED else None, + message="Copying objects", + ) pkg._build(name, registry=dest_registry, message=message) if top_hash is None: @@ -583,18 +590,30 @@ def _browse(cls, name, registry=None, top_hash=None): registry.resolve_top_hash(name, top_hash) ) pkg_manifest = registry.manifest_pk(name, top_hash) - if pkg_manifest.is_local(): - local_pkg_manifest = pkg_manifest.path - else: - local_pkg_manifest = CACHE_PATH / "manifest" / _filesystem_safe_encode(str(pkg_manifest)) - if not local_pkg_manifest.exists(): - # Copy to a temporary file first, to make sure we don't cache a truncated file - # if the download gets interrupted. - tmp_path = local_pkg_manifest.with_suffix('.tmp') - copy_file(pkg_manifest, PhysicalKey.from_path(tmp_path), message="Downloading manifest") - tmp_path.rename(local_pkg_manifest) - - return cls._from_path(local_pkg_manifest) + + def download_manifest(dst): + copy_file(pkg_manifest, PhysicalKey.from_path(dst), message="Downloading manifest") + + with contextlib.ExitStack() as stack: + if pkg_manifest.is_local(): + local_pkg_manifest = pkg_manifest.path + elif util.IS_CACHE_ENABLED: + local_pkg_manifest = CACHE_PATH / "manifest" / _filesystem_safe_encode(str(pkg_manifest)) + if not local_pkg_manifest.exists(): + # Copy to a temporary file first, to make sure we don't cache a truncated file + # if the download gets interrupted. + tmp_path = local_pkg_manifest.with_suffix('.tmp') + download_manifest(tmp_path) + tmp_path.rename(local_pkg_manifest) + else: + # This tmp file has to closed before downloading, because on Windows it can't be + # opened for concurrent access. + with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + local_pkg_manifest = tmp_file.name + stack.callback(os.unlink, local_pkg_manifest) + download_manifest(local_pkg_manifest) + + return cls._from_path(local_pkg_manifest) @classmethod def _from_path(cls, path): diff --git a/api/python/quilt3/util.py b/api/python/quilt3/util.py index cc8730d23e9..25282a3baa3 100644 --- a/api/python/quilt3/util.py +++ b/api/python/quilt3/util.py @@ -20,6 +20,11 @@ import yaml from appdirs import user_cache_dir, user_data_dir + +def get_bool_from_env(var_name: str): + return os.getenv(var_name, '').lower() == 'true' + + APP_NAME = "Quilt" APP_AUTHOR = "QuiltData" BASE_DIR = user_data_dir(APP_NAME, APP_AUTHOR) @@ -30,8 +35,10 @@ OPEN_DATA_URL = "https://open.quiltdata.com" PACKAGE_NAME_FORMAT = r"([\w-]+/[\w-]+)(?:/(.+))?$" -DISABLE_TQDM = os.getenv('QUILT_MINIMIZE_STDOUT', '').lower() == 'true' +DISABLE_TQDM = get_bool_from_env('QUILT_MINIMIZE_STDOUT') PACKAGE_UPDATE_POLICY = {'incoming', 'existing'} +IS_CACHE_ENABLED = not get_bool_from_env('QUILT_DISABLE_CACHE') + # CONFIG_TEMPLATE # Must contain every permitted config key, as well as their default values (which can be 'null'/None). diff --git a/api/python/tests/integration/test_packages.py b/api/python/tests/integration/test_packages.py index 15fb0d9707c..db86d4ced9a 100644 --- a/api/python/tests/integration/test_packages.py +++ b/api/python/tests/integration/test_packages.py @@ -1387,6 +1387,45 @@ def test_install(self): 'test/foo/foo', ))) + @pytest.mark.usefixtures('isolate_packages_cache') + @patch('quilt3.util.IS_CACHE_ENABLED', False) + @patch('quilt3.packages.ObjectPathCache') + def test_install_disabled_cache(self, object_path_cache_mock): + registry = 's3://my-test-bucket' + pkg_registry = self.S3PackageRegistryDefault(PhysicalKey.from_url(registry)) + pkg_name = 'Quilt/Foo' + + # Install a package twice and make sure cache functions weren't called. + for x in range(2): + self.setup_s3_stubber_pkg_install( + pkg_registry, pkg_name, manifest=REMOTE_MANIFEST.read_bytes(), + entries=( + ('s3://my_bucket/my_data_pkg/bar.csv', b'a,b,c'), + ('s3://my_bucket/my_data_pkg/baz/bat', b'Hello World!'), + ('s3://my_bucket/my_data_pkg/foo', '💩'.encode()), + ), + ) + with patch('quilt3.data_transfer.MAX_CONCURRENCY', 1): + Package.install(pkg_name, registry=registry, dest='package') + object_path_cache_mock.get.assert_not_called() + object_path_cache_mock.set.assert_not_called() + + @pytest.mark.usefixtures('isolate_packages_cache') + @patch('quilt3.util.IS_CACHE_ENABLED', False) + @patch('quilt3.packages.ObjectPathCache') + def test_package_entry_disabled_cache(self, object_path_cache_mock): + registry = 's3://my-test-bucket' + pkg_registry = self.S3PackageRegistryDefault(PhysicalKey.from_url(registry)) + pkg_name = 'Quilt/Foo' + + self.setup_s3_stubber_pkg_install( + pkg_registry, pkg_name, manifest=REMOTE_MANIFEST.read_bytes(), + ) + pkg = Package.browse(pkg_name, registry=registry) + for lk, entry in pkg.walk(): + assert entry.get_cached_path() is None + object_path_cache_mock.get.assert_not_called() + def test_install_subpackage_deprecated_and_new(self): pkg_name = 'Quilt/Foo' bucket = 'my-test-bucket' diff --git a/docs/API Reference/cli.md b/docs/API Reference/cli.md index 7541c2a8633..92011b73fe2 100644 --- a/docs/API Reference/cli.md +++ b/docs/API Reference/cli.md @@ -189,7 +189,11 @@ Turn off TQDM progress bars for log files. Defaults to `False` ``` $ export QUILT_MINIMIZE_STDOUT=true ``` - +### `QUILT_DISABLE_CACHE` +Turn off cache. Defaults to `False`. +``` +$ export QUILT_DISABLE_CACHE=true +``` ### `QUILT_TRANSFER_MAX_CONCURRENCY` Number of threads for file transfers. Defaults to `10`. diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f6c4a779666..b444d6446b9 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -10,6 +10,7 @@ # unreleased - YYYY-MM-DD ## Python API * [Added] `QUILT_TRANSFER_MAX_CONCURRENCY` environment variable ([#2092](https://github.com/quiltdata/quilt/pull/2092)) +* [Added] `QUILT_DISABLE_CACHE` environment variable ([#2091](https://github.com/quiltdata/quilt/pull/2091)) * [Changed] Removed unused dependency on `packaging` ([#2090](https://github.com/quiltdata/quilt/pull/2090)) * [Fixed] Possible downloading of truncated manifests ([#1977](https://github.com/quiltdata/quilt/pull/1977)) diff --git a/gendocs/env_constants.md b/gendocs/env_constants.md index a87196764cf..ac1f3754bf9 100644 --- a/gendocs/env_constants.md +++ b/gendocs/env_constants.md @@ -9,7 +9,11 @@ Turn off TQDM progress bars for log files. Defaults to `False` ``` $ export QUILT_MINIMIZE_STDOUT=true ``` - +### `QUILT_DISABLE_CACHE` +Turn off cache. Defaults to `False`. +``` +$ export QUILT_DISABLE_CACHE=true +``` ### `QUILT_TRANSFER_MAX_CONCURRENCY` Number of threads for file transfers. Defaults to `10`.