Skip to content

Commit

Permalink
Merge pull request #1950 from apache/juerg/cache-config
Browse files Browse the repository at this point in the history
Expand cache config options and improve defaults
  • Loading branch information
juergbi authored Aug 30, 2024
2 parents 5e88ea7 + 904dd8e commit 28e385f
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 5 deletions.
26 changes: 25 additions & 1 deletion doc/source/using_config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,15 @@ toplevel of your configuration file, like so:
#
cache:
# Allow using as much free space as possible
# Use as much space as possible
quota: infinity
# Keep 5% of disk space available
reserved-disk-space: 5%
# Retain 80% of the cache on cleanup
low-watermark: 80%
# Avoid pulling large amounts of data we don't need locally
pull-buildtrees: False
Expand Down Expand Up @@ -184,6 +190,24 @@ Attributes
Percentage values are taken to represent a percentage of the partition
size on the filesystem where the cache has been configured.

* ``reserved-disk-space``

This controls how much disk space should remain available. If the amount
of available disk space falls below the specified value, unused cache
objects will be pruned even if the configured quota has not been exceeded.

``reserved-disk-space`` can be specified in the same way as ``quota``, with
the exception of the special ``infinity`` value. The default is ``5%``.

* ``low-watermark``

This controls how much of the cache should be retained on cleanup.

``low-watermark`` is specified as a percentage of the effective cache quota
as configured by ``quota`` and/or ``reserved-disk-space``. The default is
``80%``, which means that when cleanup is triggered, 20% of the cache will
be pruned by removing CAS objects that haven't been used recently.

* ``pull-buildtrees``

Whether to pull *build trees* when downloading remote artifacts.
Expand Down
22 changes: 20 additions & 2 deletions src/buildstream/_cas/casdprocessmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,25 @@
# log_dir (str): The directory for the logs
# log_level (LogLevel): Log level to give to buildbox-casd for logging
# cache_quota (int): User configured cache quota
# reserved (int): User configured reserved disk space
# remote_cache_spec (RemoteSpec): Optional remote cache server
# protect_session_blobs (bool): Disable expiry for blobs used in the current session
# messenger (Messenger): The messenger to report warnings through the UI
#
class CASDProcessManager:
def __init__(self, path, log_dir, log_level, cache_quota, remote_cache_spec, protect_session_blobs, messenger):
def __init__(
self,
path,
log_dir,
log_level,
cache_quota,
remote_cache_spec,
protect_session_blobs,
messenger,
*,
reserved=None,
low_watermark=None
):
os.makedirs(path, exist_ok=True)

self._log_dir = log_dir
Expand All @@ -80,7 +93,12 @@ def __init__(self, path, log_dir, log_level, cache_quota, remote_cache_spec, pro

if cache_quota is not None:
casd_args.append("--quota-high={}".format(int(cache_quota)))
casd_args.append("--quota-low={}".format(int(cache_quota / 2)))

if low_watermark is not None:
casd_args.append("--quota-low={}%".format(int(low_watermark * 100)))

if reserved is not None:
casd_args.append("--reserved={}".format(int(reserved)))

if protect_session_blobs:
casd_args.append("--protect-session-blobs")
Expand Down
38 changes: 37 additions & 1 deletion src/buildstream/_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ def __init__(self, *, use_casd: bool = True) -> None:
# User specified cache quota, used for display messages
self.config_cache_quota_string: Optional[str] = None

# Reserved disk space for local cache in bytes
self.config_cache_reserved: Optional[int] = None

# Low watermark for local cache (ratio relative to effective quota)
self.config_cache_low_watermark: Optional[float] = None

# Remote cache server
self.remote_cache_spec: Optional[RemoteSpec] = None

Expand Down Expand Up @@ -362,7 +368,9 @@ def load(self, config: Optional[str] = None) -> None:
# We need to find the first existing directory in the path of our
# casdir - the casdir may not have been created yet.
cache = defaults.get_mapping("cache")
cache.validate_keys(["quota", "storage-service", "pull-buildtrees", "cache-buildtrees"])
cache.validate_keys(
["quota", "reserved-disk-space", "low-watermark", "storage-service", "pull-buildtrees", "cache-buildtrees"]
)

cas_volume = self.casdir
while not os.path.exists(cas_volume):
Expand All @@ -378,6 +386,32 @@ def load(self, config: Optional[str] = None) -> None:
LoadErrorReason.INVALID_DATA,
) from e

cache_reserved_string = cache.get_str("reserved-disk-space")
try:
self.config_cache_reserved = utils._parse_size(cache_reserved_string, cas_volume)
if self.config_cache_reserved is None:
provenance = cache.get_scalar("reserved-disk-space").get_provenance()
raise LoadError(
"{}: Please specify the value in bytes or as a % of full disk space.\n"
"\nValid values are, for example: 2G 5%\n".format(provenance),
LoadErrorReason.INVALID_DATA,
)
except utils.UtilError as e:
raise LoadError(
"{}\nPlease specify the value in bytes or as a % of full disk space.\n"
"\nValid values are, for example: 2G 5%\n".format(str(e)),
LoadErrorReason.INVALID_DATA,
) from e

low_watermark_string = cache.get_str("low-watermark")
try:
self.config_cache_low_watermark = utils._parse_percentage(low_watermark_string)
except utils.UtilError as e:
raise LoadError(
"{}\nPlease specify the value as a % of the cache quota.".format(str(e)),
LoadErrorReason.INVALID_DATA,
) from e

remote_cache = cache.get_mapping("storage-service", default=None)
if remote_cache:
self.remote_cache_spec = RemoteSpec.new_from_node(remote_cache)
Expand Down Expand Up @@ -701,6 +735,8 @@ def get_casd(self) -> CASDProcessManager:
self.remote_cache_spec,
protect_session_blobs=True,
messenger=self.messenger,
reserved=self.config_cache_reserved,
low_watermark=self.config_cache_low_watermark,
)
return self._casd

Expand Down
6 changes: 6 additions & 0 deletions src/buildstream/data/userconfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ cache:
# Use as much space as possible
quota: infinity

# Keep 5% of disk space available
reserved-disk-space: 5%

# Retain 80% of the cache on cleanup
low-watermark: 80%

# Whether to pull build trees when downloading element artifacts
pull-buildtrees: False

Expand Down
29 changes: 29 additions & 0 deletions src/buildstream/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,35 @@ def _parse_size(size, volume):
return int(num) * 1024 ** units.index(unit)


# _parse_percentage():
#
# Convert a string representing a percentage between 0% and 100% to a float.
# E.g. "80%" -> 0.8.
#
# Arguments:
# percentage (str) The string to parse
#
# Returns:
# (float) The percentage as a float
#
# Raises:
# UtilError if the string is not a valid percentage.
#
def _parse_percentage(percentage):
if not percentage.endswith("%"):
raise UtilError("{} is not a valid percentage.".format(percentage))

try:
num = float(percentage[:-1])
except ValueError:
raise UtilError("{} is not a valid percentage.".format(percentage))

if num < 0 or num > 100:
raise UtilError("{} is not between 0% and 100%.".format(percentage))

return num / 100


# _pretty_size()
#
# Converts a number of bytes into a string representation in KiB, MiB, GiB, TiB
Expand Down
2 changes: 1 addition & 1 deletion tests/artifactcache/expiry.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_expiry_order(cli, datafiles):
element_path = "elements"
checkout = os.path.join(project, "workspace")

cli.configure({"cache": {"quota": 9000000}})
cli.configure({"cache": {"quota": 9000000, "low-watermark": "50%"}})

# Create an artifact
create_element_size("dep.bst", project, element_path, [], 2000000)
Expand Down

0 comments on commit 28e385f

Please sign in to comment.