Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add env var to disable cache #2091

Merged
merged 2 commits into from
Mar 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 38 additions & 19 deletions api/python/quilt3/packages.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import gc
import hashlib
import inspect
Expand All @@ -6,6 +7,7 @@
import os
import pathlib
import shutil
import tempfile
import textwrap
import time
import uuid
Expand All @@ -16,7 +18,7 @@
import jsonlines
from tqdm import tqdm

from . import workflows
from . import util, workflows
from .backends import get_package_registry
from .data_transfer import (
calculate_sha256,
Expand Down Expand Up @@ -218,7 +220,7 @@ def get_cached_path(self):
"""
Returns a locally cached physical key, if available.
"""
if not self.physical_key.is_local():
if util.IS_CACHE_ENABLED and not self.physical_key.is_local():
return ObjectPathCache.get(str(self.physical_key))
return None

Expand Down Expand Up @@ -499,10 +501,11 @@ def install(cls, name, registry=None, top_hash=None, dest=None, dest_registry=No
# Copy the datafiles in the package.
physical_key = entry.physical_key

# Try a local cache.
cached_file = ObjectPathCache.get(str(physical_key))
if cached_file is not None:
physical_key = PhysicalKey.from_path(cached_file)
if util.IS_CACHE_ENABLED:
# Try a local cache.
cached_file = ObjectPathCache.get(str(physical_key))
if cached_file is not None:
physical_key = PhysicalKey.from_path(cached_file)

new_physical_key = dest_parsed.join(logical_key)
if physical_key != new_physical_key:
Expand All @@ -512,7 +515,11 @@ def _maybe_add_to_cache(old: PhysicalKey, new: PhysicalKey, _):
if not old.is_local() and new.is_local():
ObjectPathCache.set(str(old), new.path)

copy_file_list(file_list, callback=_maybe_add_to_cache, message="Copying objects")
copy_file_list(
file_list,
callback=_maybe_add_to_cache if util.IS_CACHE_ENABLED else None,
message="Copying objects",
)

pkg._build(name, registry=dest_registry, message=message)
if top_hash is None:
Expand Down Expand Up @@ -583,18 +590,30 @@ def _browse(cls, name, registry=None, top_hash=None):
registry.resolve_top_hash(name, top_hash)
)
pkg_manifest = registry.manifest_pk(name, top_hash)
if pkg_manifest.is_local():
local_pkg_manifest = pkg_manifest.path
else:
local_pkg_manifest = CACHE_PATH / "manifest" / _filesystem_safe_encode(str(pkg_manifest))
if not local_pkg_manifest.exists():
# Copy to a temporary file first, to make sure we don't cache a truncated file
# if the download gets interrupted.
tmp_path = local_pkg_manifest.with_suffix('.tmp')
copy_file(pkg_manifest, PhysicalKey.from_path(tmp_path), message="Downloading manifest")
tmp_path.rename(local_pkg_manifest)

return cls._from_path(local_pkg_manifest)

def download_manifest(dst):
copy_file(pkg_manifest, PhysicalKey.from_path(dst), message="Downloading manifest")

with contextlib.ExitStack() as stack:
if pkg_manifest.is_local():
local_pkg_manifest = pkg_manifest.path
elif util.IS_CACHE_ENABLED:
local_pkg_manifest = CACHE_PATH / "manifest" / _filesystem_safe_encode(str(pkg_manifest))
if not local_pkg_manifest.exists():
# Copy to a temporary file first, to make sure we don't cache a truncated file
# if the download gets interrupted.
tmp_path = local_pkg_manifest.with_suffix('.tmp')
download_manifest(tmp_path)
tmp_path.rename(local_pkg_manifest)
else:
# This tmp file has to closed before downloading, because on Windows it can't be
# opened for concurrent access.
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
local_pkg_manifest = tmp_file.name
stack.callback(os.unlink, local_pkg_manifest)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems a little bit over-engineered... Ideally, we'd just download the manifest and immediately deserialize it, without even writing to a temporary file - or create a temporary file, but use its file descriptor instead of opening it again. But I guess that's not supported right now.

Fine for now, but would be nice to clean up some day.

download_manifest(local_pkg_manifest)

return cls._from_path(local_pkg_manifest)

@classmethod
def _from_path(cls, path):
Expand Down
9 changes: 8 additions & 1 deletion api/python/quilt3/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
import yaml
from appdirs import user_cache_dir, user_data_dir


def get_bool_from_env(var_name: str):
return os.getenv(var_name, '').lower() == 'true'


APP_NAME = "Quilt"
APP_AUTHOR = "QuiltData"
BASE_DIR = user_data_dir(APP_NAME, APP_AUTHOR)
Expand All @@ -30,8 +35,10 @@
OPEN_DATA_URL = "https://open.quiltdata.com"

PACKAGE_NAME_FORMAT = r"([\w-]+/[\w-]+)(?:/(.+))?$"
DISABLE_TQDM = os.getenv('QUILT_MINIMIZE_STDOUT', '').lower() == 'true'
DISABLE_TQDM = get_bool_from_env('QUILT_MINIMIZE_STDOUT')
PACKAGE_UPDATE_POLICY = {'incoming', 'existing'}
IS_CACHE_ENABLED = not get_bool_from_env('QUILT_DISABLE_CACHE')


# CONFIG_TEMPLATE
# Must contain every permitted config key, as well as their default values (which can be 'null'/None).
Expand Down
39 changes: 39 additions & 0 deletions api/python/tests/integration/test_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,45 @@ def test_install(self):
'test/foo/foo',
)))

@pytest.mark.usefixtures('isolate_packages_cache')
@patch('quilt3.util.IS_CACHE_ENABLED', False)
@patch('quilt3.packages.ObjectPathCache')
def test_install_disabled_cache(self, object_path_cache_mock):
registry = 's3://my-test-bucket'
pkg_registry = self.S3PackageRegistryDefault(PhysicalKey.from_url(registry))
pkg_name = 'Quilt/Foo'

# Install a package twice and make sure cache functions weren't called.
for x in range(2):
self.setup_s3_stubber_pkg_install(
pkg_registry, pkg_name, manifest=REMOTE_MANIFEST.read_bytes(),
entries=(
('s3://my_bucket/my_data_pkg/bar.csv', b'a,b,c'),
('s3://my_bucket/my_data_pkg/baz/bat', b'Hello World!'),
('s3://my_bucket/my_data_pkg/foo', '💩'.encode()),
),
)
with patch('quilt3.data_transfer.MAX_CONCURRENCY', 1):
Package.install(pkg_name, registry=registry, dest='package')
object_path_cache_mock.get.assert_not_called()
object_path_cache_mock.set.assert_not_called()

@pytest.mark.usefixtures('isolate_packages_cache')
@patch('quilt3.util.IS_CACHE_ENABLED', False)
@patch('quilt3.packages.ObjectPathCache')
def test_package_entry_disabled_cache(self, object_path_cache_mock):
registry = 's3://my-test-bucket'
pkg_registry = self.S3PackageRegistryDefault(PhysicalKey.from_url(registry))
pkg_name = 'Quilt/Foo'

self.setup_s3_stubber_pkg_install(
pkg_registry, pkg_name, manifest=REMOTE_MANIFEST.read_bytes(),
)
pkg = Package.browse(pkg_name, registry=registry)
for lk, entry in pkg.walk():
assert entry.get_cached_path() is None
object_path_cache_mock.get.assert_not_called()

def test_install_subpackage_deprecated_and_new(self):
pkg_name = 'Quilt/Foo'
bucket = 'my-test-bucket'
Expand Down
6 changes: 5 additions & 1 deletion docs/API Reference/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,11 @@ Turn off TQDM progress bars for log files. Defaults to `False`
```
$ export QUILT_MINIMIZE_STDOUT=true
```

### `QUILT_DISABLE_CACHE`
Turn off cache. Defaults to `False`.
```
$ export QUILT_DISABLE_CACHE=true
```
### `QUILT_TRANSFER_MAX_CONCURRENCY`
Number of threads for file transfers. Defaults to `10`.

Expand Down
1 change: 1 addition & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# unreleased - YYYY-MM-DD
## Python API
* [Added] `QUILT_TRANSFER_MAX_CONCURRENCY` environment variable ([#2092](https://github.com/quiltdata/quilt/pull/2092))
* [Added] `QUILT_DISABLE_CACHE` environment variable ([#2091](https://github.com/quiltdata/quilt/pull/2091))
* [Changed] Removed unused dependency on `packaging` ([#2090](https://github.com/quiltdata/quilt/pull/2090))
* [Fixed] Possible downloading of truncated manifests ([#1977](https://github.com/quiltdata/quilt/pull/1977))

Expand Down
6 changes: 5 additions & 1 deletion gendocs/env_constants.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ Turn off TQDM progress bars for log files. Defaults to `False`
```
$ export QUILT_MINIMIZE_STDOUT=true
```

### `QUILT_DISABLE_CACHE`
Turn off cache. Defaults to `False`.
```
$ export QUILT_DISABLE_CACHE=true
```
### `QUILT_TRANSFER_MAX_CONCURRENCY`
Number of threads for file transfers. Defaults to `10`.

Expand Down