diff --git a/Cargo.lock b/Cargo.lock index 0d0c034adf4..52aaab9e1ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "1.0.2" @@ -17,6 +28,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "anes" version = "0.1.6" @@ -112,8 +129,10 @@ name = "benchsuite" version = "0.0.0" dependencies = [ "cargo", + "cargo-util", "criterion", "flate2", + "rand", "tar", "url", ] @@ -270,6 +289,8 @@ dependencies = [ "pathdiff", "pulldown-cmark", "rand", + "regex", + "rusqlite", "rustfix", "same-file", "semver", @@ -389,6 +410,7 @@ dependencies = [ "time", "toml", "url", + "walkdir", "windows-sys", ] @@ -400,6 +422,7 @@ dependencies = [ "core-foundation", "filetime", "hex", + "ignore", "jobserver", "libc", "miow", @@ -885,6 +908,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "faster-hex" version = "0.8.1" @@ -1805,6 +1840,19 @@ name = "hashbrown" version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown", +] [[package]] name = "hermit-abi" @@ -2059,6 +2107,17 @@ dependencies = [ "libc", ] +[[package]] +name = "libsqlite3-sys" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "libssh2-sys" version = "0.3.0" @@ -2614,7 +2673,7 @@ dependencies = [ "rand", "rand_chacha", "rand_xorshift", - "regex-syntax 0.7.2", + "regex-syntax 0.7.5", "rusty-fork", "tempfile", "unarray", @@ -2742,13 +2801,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.7.2", + "regex-automata 0.3.8", + "regex-syntax 0.7.5", ] [[package]] @@ -2765,6 +2825,11 @@ name = "regex-automata" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5", +] [[package]] name = "regex-syntax" @@ -2774,9 +2839,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.7.2" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "resolver-tests" @@ -2798,6 +2863,20 @@ dependencies = [ "subtle", ] +[[package]] +name = "rusqlite" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" +dependencies = [ + "bitflags 2.4.0", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rustc-hash" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index 3df275ff70d..60ffec21b64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,8 @@ pretty_assertions = "1.4.0" proptest = "1.3.1" pulldown-cmark = { version = "0.9.3", default-features = false } rand = "0.8.5" +regex = "1.9.3" +rusqlite = { version = "0.29.0", features = ["bundled"] } rustfix = "0.6.1" same-file = "1.0.6" security-framework = "2.9.2" @@ -162,6 +164,8 @@ pasetors.workspace = true pathdiff.workspace = true pulldown-cmark.workspace = true rand.workspace = true +regex.workspace = true +rusqlite.workspace = true rustfix.workspace = true semver.workspace = true serde = { workspace = true, features = ["derive"] } diff --git a/benches/README.md b/benches/README.md index b4b8b190a0a..900bf084ca3 100644 --- a/benches/README.md +++ b/benches/README.md @@ -9,7 +9,23 @@ cd benches/benchsuite cargo bench ``` -The tests involve downloading the index and benchmarking against some +However, running all benchmarks would take many minutes, so in most cases it +is recommended to just run the benchmarks relevant to whatever section of code +you are working on. + +## Benchmarks + +There are several different kinds of benchmarks in the `benchsuite/benches` directory: + +* `global_cache_tracker` — Benchmarks saving data to the global cache tracker + database using samples of real-world data. +* `resolve` — Benchmarks the resolver against simulations of real-world workspaces. +* `workspace_initialization` — Benchmarks initialization of a workspace + against simulations of real-world workspaces. + +### Resolve benchmarks + +The resolve benchmarks involve downloading the index and benchmarking against some real-world and artificial workspaces located in the [`workspaces`](workspaces) directory. @@ -21,7 +37,7 @@ faster. You can (and probably should) specify individual benchmarks to run to narrow it down to a more reasonable set, for example: ```sh -cargo bench -- resolve_ws/rust +cargo bench -p benchsuite --bench resolve -- resolve_ws/rust ``` This will only download what's necessary for the rust-lang/rust workspace @@ -29,7 +45,24 @@ This will only download what's necessary for the rust-lang/rust workspace about a minute). To get a list of all the benchmarks, run: ```sh -cargo bench -- --list +cargo bench -p benchsuite --bench resolve -- --list +``` + +### Global cache tracker + +The `global_cache_tracker` benchmark tests saving data to the global cache +tracker database using samples of real-world data. This benchmark should run +relatively quickly. + +The real-world data is based on a capture of my personal development +environment which has accumulated a large cache. So it is somewhat arbitrary, +but hopefully representative of a challenging environment. Capturing of the +data is done with the `capture-last-use` binary, which you can run if you need +to rebuild the database. Just try to run on a system with a relatively full +cache in your cargo home directory. + +```sh +cargo bench -p benchsuite --bench global_cache_tracker ``` ## Viewing reports diff --git a/benches/benchsuite/Cargo.toml b/benches/benchsuite/Cargo.toml index 81413e761d8..2887290ed4d 100644 --- a/benches/benchsuite/Cargo.toml +++ b/benches/benchsuite/Cargo.toml @@ -11,8 +11,10 @@ publish = false [dependencies] cargo.workspace = true +cargo-util.workspace = true criterion.workspace = true flate2.workspace = true +rand.workspace = true tar.workspace = true url.workspace = true @@ -26,3 +28,7 @@ harness = false [[bench]] name = "workspace_initialization" harness = false + +[[bench]] +name = "global_cache_tracker" +harness = false diff --git a/benches/benchsuite/benches/global_cache_tracker.rs b/benches/benchsuite/benches/global_cache_tracker.rs new file mode 100644 index 00000000000..71d5d526226 --- /dev/null +++ b/benches/benchsuite/benches/global_cache_tracker.rs @@ -0,0 +1,159 @@ +//! Benchmarks for the global cache tracker. + +use cargo::core::global_cache_tracker::{self, DeferredGlobalLastUse, GlobalCacheTracker}; +use cargo::util::cache_lock::CacheLockMode; +use cargo::util::interning::InternedString; +use cargo::util::Config; +use criterion::{criterion_group, criterion_main, Criterion}; +use std::fs; +use std::path::{Path, PathBuf}; + +// Samples of real-world data. +const GLOBAL_CACHE_SAMPLE: &str = "global-cache-tracker/global-cache-sample"; +const GLOBAL_CACHE_RANDOM: &str = "global-cache-tracker/random-sample"; + +/// A scratch directory where the benchmark can place some files. +fn root() -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_TARGET_TMPDIR")); + p.push("bench_global_cache_tracker"); + p +} + +fn cargo_home() -> PathBuf { + let mut p = root(); + p.push("chome"); + p +} + +fn initialize_config() -> Config { + // Set up config. + let shell = cargo::core::Shell::new(); + let homedir = cargo_home(); + if !homedir.exists() { + fs::create_dir_all(&homedir).unwrap(); + } + let cwd = homedir.clone(); + let mut config = Config::new(shell, cwd, homedir); + config.nightly_features_allowed = true; + config.set_search_stop_path(root()); + config + .configure( + 0, + false, + None, + false, + false, + false, + &None, + &["gc".to_string()], + &[], + ) + .unwrap(); + // Set up database sample. + let db_path = GlobalCacheTracker::db_path(&config).into_path_unlocked(); + if db_path.exists() { + fs::remove_file(&db_path).unwrap(); + } + let sample = Path::new(env!("CARGO_MANIFEST_DIR")).join(GLOBAL_CACHE_SAMPLE); + fs::copy(sample, &db_path).unwrap(); + config +} + +/// Benchmarks how long it takes to initialize `GlobalCacheTracker` with an already +/// existing full database. +fn global_tracker_init(c: &mut Criterion) { + let config = initialize_config(); + let _lock = config + .acquire_package_cache_lock(CacheLockMode::DownloadExclusive) + .unwrap(); + c.bench_function("global_tracker_init", |b| { + b.iter(|| { + GlobalCacheTracker::new(&config).unwrap(); + }) + }); +} + +/// Benchmarks how long it takes to save a `GlobalCacheTracker` when there are zero +/// updates. +fn global_tracker_empty_save(c: &mut Criterion) { + let config = initialize_config(); + let _lock = config + .acquire_package_cache_lock(CacheLockMode::DownloadExclusive) + .unwrap(); + let mut deferred = DeferredGlobalLastUse::new(); + let mut tracker = GlobalCacheTracker::new(&config).unwrap(); + + c.bench_function("global_tracker_empty_save", |b| { + b.iter(|| { + deferred.save(&mut tracker).unwrap(); + }) + }); +} + +fn load_random_sample() -> Vec<(InternedString, InternedString, u64)> { + let path = Path::new(env!("CARGO_MANIFEST_DIR")).join(GLOBAL_CACHE_RANDOM); + fs::read_to_string(path) + .unwrap() + .lines() + .map(|s| { + let mut s = s.split(','); + ( + s.next().unwrap().into(), + s.next().unwrap().into(), + s.next().unwrap().parse().unwrap(), + ) + }) + .collect() +} + +/// Tests performance of updating the last-use timestamps in an already +/// populated database. +/// +/// This runs for different sizes of number of crates to update (selecting +/// from the random sample stored on disk). +fn global_tracker_update(c: &mut Criterion) { + let config = initialize_config(); + let _lock = config + .acquire_package_cache_lock(CacheLockMode::DownloadExclusive) + .unwrap(); + let sample = Path::new(env!("CARGO_MANIFEST_DIR")).join(GLOBAL_CACHE_SAMPLE); + let db_path = GlobalCacheTracker::db_path(&config).into_path_unlocked(); + + let random_sample = load_random_sample(); + + let mut group = c.benchmark_group("global_tracker_update"); + for size in [1, 10, 100, 500] { + if db_path.exists() { + fs::remove_file(&db_path).unwrap(); + } + + fs::copy(&sample, &db_path).unwrap(); + let mut deferred = DeferredGlobalLastUse::new(); + let mut tracker = GlobalCacheTracker::new(&config).unwrap(); + group.bench_with_input(size.to_string(), &size, |b, &size| { + b.iter(|| { + for (encoded_registry_name, name, size) in &random_sample[..size] { + deferred.mark_registry_crate_used(global_cache_tracker::RegistryCrate { + encoded_registry_name: *encoded_registry_name, + crate_filename: format!("{}.crate", name).into(), + size: *size, + }); + deferred.mark_registry_src_used(global_cache_tracker::RegistrySrc { + encoded_registry_name: *encoded_registry_name, + package_dir: *name, + size: Some(*size), + }); + } + deferred.save(&mut tracker).unwrap(); + }) + }); + } +} + +criterion_group!( + benches, + global_tracker_init, + global_tracker_empty_save, + global_tracker_update +); +criterion_main!(benches); diff --git a/benches/benchsuite/global-cache-tracker/global-cache-sample b/benches/benchsuite/global-cache-tracker/global-cache-sample new file mode 100644 index 00000000000..dc134f53854 Binary files /dev/null and b/benches/benchsuite/global-cache-tracker/global-cache-sample differ diff --git a/benches/benchsuite/global-cache-tracker/random-sample b/benches/benchsuite/global-cache-tracker/random-sample new file mode 100644 index 00000000000..62b611cff30 --- /dev/null +++ b/benches/benchsuite/global-cache-tracker/random-sample @@ -0,0 +1,500 @@ +github.aaakk.us.kg-1ecc6299db9ec823,tungstenite-0.18.0,218740 +github.aaakk.us.kg-1ecc6299db9ec823,integer-encoding-1.1.5,30672 +github.aaakk.us.kg-1ecc6299db9ec823,tungstenite-0.14.0,315676 +github.aaakk.us.kg-1ecc6299db9ec823,oxcable-0.5.1,163196 +github.aaakk.us.kg-1ecc6299db9ec823,swc_ecma_transforms_typescript-0.32.0,245522 +github.aaakk.us.kg-1ecc6299db9ec823,hyper-0.12.35,601153 +github.aaakk.us.kg-1ecc6299db9ec823,resiter-0.4.0,59880 +github.aaakk.us.kg-1ecc6299db9ec823,net2-0.2.37,115813 +github.aaakk.us.kg-1ecc6299db9ec823,str_inflector-0.12.0,182460 +github.aaakk.us.kg-1ecc6299db9ec823,derive_builder_macro-0.10.2,16441 +github.aaakk.us.kg-1ecc6299db9ec823,smol_str-0.1.23,42436 +github.aaakk.us.kg-1ecc6299db9ec823,wasm-bindgen-multi-value-xform-0.2.83,35347 +github.aaakk.us.kg-1ecc6299db9ec823,time-macros-0.1.0,1620 +github.aaakk.us.kg-1ecc6299db9ec823,unicode-bidi-0.3.7,140153 +github.aaakk.us.kg-1ecc6299db9ec823,socket2-0.4.0,167295 +github.aaakk.us.kg-1ecc6299db9ec823,ppv-lite86-0.2.10,125234 +github.aaakk.us.kg-1ecc6299db9ec823,tracing-wasm-0.2.1,31449 +github.aaakk.us.kg-1ecc6299db9ec823,eframe-0.19.0,158130 +github.aaakk.us.kg-1ecc6299db9ec823,block-modes-0.7.0,42530 +github.aaakk.us.kg-1ecc6299db9ec823,rangemap-0.1.11,144157 +github.aaakk.us.kg-1ecc6299db9ec823,metal-0.23.1,1038699 +github.aaakk.us.kg-1ecc6299db9ec823,os_str_bytes-6.0.1,86390 +github.aaakk.us.kg-1ecc6299db9ec823,plotters-backend-0.3.4,53018 +github.aaakk.us.kg-1ecc6299db9ec823,spidev-0.4.0,45301 +github.aaakk.us.kg-1ecc6299db9ec823,axum-macros-0.2.3,102058 +github.aaakk.us.kg-1ecc6299db9ec823,embedded-time-0.12.1,246450 +github.aaakk.us.kg-1ecc6299db9ec823,envmnt-0.10.4,2328079 +github.aaakk.us.kg-1ecc6299db9ec823,camino-1.1.1,133976 +github.aaakk.us.kg-1ecc6299db9ec823,siphasher-0.3.5,46666 +github.aaakk.us.kg-1ecc6299db9ec823,lexical-write-integer-0.8.5,388374 +github.aaakk.us.kg-1ecc6299db9ec823,reqwest-0.11.14,686608 +github.aaakk.us.kg-1ecc6299db9ec823,enum-map-2.4.1,51184 +github.aaakk.us.kg-1ecc6299db9ec823,sentry-panic-0.29.0,18211 +github.aaakk.us.kg-1ecc6299db9ec823,msf-srtp-0.2.0,73164 +github.aaakk.us.kg-1ecc6299db9ec823,near-sandbox-utils-0.4.1,7543 +github.aaakk.us.kg-1ecc6299db9ec823,ablescript-0.5.2,129318 +github.aaakk.us.kg-1ecc6299db9ec823,apecs-derive-0.2.3,10620 +github.aaakk.us.kg-1ecc6299db9ec823,libc-0.2.133,3417382 +github.aaakk.us.kg-1ecc6299db9ec823,tracing-0.1.35,380627 +github.aaakk.us.kg-1ecc6299db9ec823,serde-wasm-bindgen-0.3.1,55371 +github.aaakk.us.kg-1ecc6299db9ec823,compiler_builtins-0.1.71,692853 +github.aaakk.us.kg-1ecc6299db9ec823,mockito-0.7.2,1179718 +github.aaakk.us.kg-1ecc6299db9ec823,tonic-0.5.2,420299 +github.aaakk.us.kg-1ecc6299db9ec823,tracing-core-0.1.30,240058 +github.aaakk.us.kg-1ecc6299db9ec823,tower-timeout-0.3.0-alpha.2,7486 +github.aaakk.us.kg-1ecc6299db9ec823,js-intern-0.3.1,7026 +github.aaakk.us.kg-1ecc6299db9ec823,json-ld-context-processing-0.12.1,78101 +github.aaakk.us.kg-1ecc6299db9ec823,generic-array-0.14.6,67349 +github.aaakk.us.kg-1ecc6299db9ec823,synstructure-0.12.3,93523 +github.aaakk.us.kg-1ecc6299db9ec823,version-compare-0.0.10,74950 +github.aaakk.us.kg-1ecc6299db9ec823,dirs-1.0.5,51075 +github.aaakk.us.kg-1ecc6299db9ec823,worker-kv-0.5.1,67351 +github.aaakk.us.kg-1ecc6299db9ec823,vsimd-0.8.0,170805 +github.aaakk.us.kg-1ecc6299db9ec823,mockall-0.9.1,187734 +github.aaakk.us.kg-1ecc6299db9ec823,nan-preserving-float-0.1.0,6341 +github.aaakk.us.kg-1ecc6299db9ec823,wasmer-types-2.3.0,192436 +github.aaakk.us.kg-1ecc6299db9ec823,sodiumoxide-0.2.7,5131115 +github.aaakk.us.kg-1ecc6299db9ec823,tracing-attributes-0.1.11,74857 +github.aaakk.us.kg-1ecc6299db9ec823,treediff-4.0.2,72588 +github.aaakk.us.kg-1ecc6299db9ec823,wiggle-generate-5.0.0,103044 +github.aaakk.us.kg-1ecc6299db9ec823,lapin-1.6.6,497368 +github.aaakk.us.kg-1ecc6299db9ec823,cranelift-entity-0.93.1,114206 +github.aaakk.us.kg-1ecc6299db9ec823,pcap-parser-0.13.3,184131 +github.aaakk.us.kg-1ecc6299db9ec823,rustfft-5.1.1,1638221 +github.aaakk.us.kg-1ecc6299db9ec823,string_cache-0.7.5,75074 +github.aaakk.us.kg-1ecc6299db9ec823,maybe-uninit-2.0.0,38492 +github.aaakk.us.kg-1ecc6299db9ec823,diesel_full_text_search-2.0.0,10179 +github.aaakk.us.kg-1ecc6299db9ec823,quinn-proto-0.8.4,687565 +github.aaakk.us.kg-1ecc6299db9ec823,semver-0.5.1,73365 +github.aaakk.us.kg-1ecc6299db9ec823,rocket_http-0.5.0-rc.2,409939 +github.aaakk.us.kg-1ecc6299db9ec823,dialoguer-0.7.1,95159 +github.aaakk.us.kg-1ecc6299db9ec823,fallible_collections-0.4.5,244152 +github.aaakk.us.kg-1ecc6299db9ec823,parking_lot_core-0.9.0,138932 +github.aaakk.us.kg-1ecc6299db9ec823,relative-path-1.6.0,103315 +github.aaakk.us.kg-1ecc6299db9ec823,lua52-sys-0.1.2,584054 +github.aaakk.us.kg-1ecc6299db9ec823,actix-files-0.6.0,126121 +github.aaakk.us.kg-1ecc6299db9ec823,crates-io-0.35.1,29498 +github.aaakk.us.kg-1ecc6299db9ec823,sentry-backtrace-0.19.1,20268 +github.aaakk.us.kg-1ecc6299db9ec823,text_unit-0.1.10,26100 +github.aaakk.us.kg-1ecc6299db9ec823,ascii-1.0.0,143025 +github.aaakk.us.kg-1ecc6299db9ec823,crossbeam-utils-0.8.6,169542 +github.aaakk.us.kg-1ecc6299db9ec823,nelf-0.1.0,28868 +github.aaakk.us.kg-1ecc6299db9ec823,colorsys-0.6.5,86989 +github.aaakk.us.kg-1ecc6299db9ec823,enum-iterator-1.2.0,31042 +github.aaakk.us.kg-1ecc6299db9ec823,ansi-str-0.7.2,111689 +github.aaakk.us.kg-1ecc6299db9ec823,anyhow-1.0.68,209123 +github.aaakk.us.kg-1ecc6299db9ec823,gix-lock-5.0.1,65110 +github.aaakk.us.kg-1ecc6299db9ec823,nom-supreme-0.8.0,147530 +github.aaakk.us.kg-1ecc6299db9ec823,path-slash-0.1.4,28655 +github.aaakk.us.kg-1ecc6299db9ec823,crates-io-0.35.0,29406 +github.aaakk.us.kg-1ecc6299db9ec823,stb_truetype-0.2.8,22939 +github.aaakk.us.kg-1ecc6299db9ec823,proc-macro2-1.0.50,185288 +github.aaakk.us.kg-1ecc6299db9ec823,snapbox-0.4.1,169526 +github.aaakk.us.kg-1ecc6299db9ec823,hyper-0.14.9,764075 +github.aaakk.us.kg-1ecc6299db9ec823,ab_glyph-0.2.15,61722 +github.aaakk.us.kg-1ecc6299db9ec823,uuid-0.1.18,47889 +github.aaakk.us.kg-1ecc6299db9ec823,data-url-0.2.0,123480 +github.aaakk.us.kg-1ecc6299db9ec823,threadpool-1.7.1,59558 +github.aaakk.us.kg-1ecc6299db9ec823,thiserror-impl-1.0.29,65149 +github.aaakk.us.kg-1ecc6299db9ec823,sha1-0.6.0,31102 +github.aaakk.us.kg-1ecc6299db9ec823,tokio-tls-0.2.1,51467 +github.aaakk.us.kg-1ecc6299db9ec823,locspan-derive-0.6.0,59360 +github.aaakk.us.kg-1ecc6299db9ec823,ureq-1.5.1,249335 +github.aaakk.us.kg-1ecc6299db9ec823,protoc-rust-2.24.1,13459 +github.aaakk.us.kg-1ecc6299db9ec823,serde-1.0.159,509060 +github.aaakk.us.kg-1ecc6299db9ec823,unescape-0.1.0,6047 +github.aaakk.us.kg-1ecc6299db9ec823,data-encoding-2.2.0,113191 +github.aaakk.us.kg-1ecc6299db9ec823,bytestring-1.1.0,23705 +github.aaakk.us.kg-1ecc6299db9ec823,ab_glyph_rasterizer-0.1.8,34773 +github.aaakk.us.kg-1ecc6299db9ec823,syn-0.12.15,912964 +github.aaakk.us.kg-1ecc6299db9ec823,reqwest-0.11.9,656209 +github.aaakk.us.kg-1ecc6299db9ec823,rustls-0.17.0,903717 +github.aaakk.us.kg-1ecc6299db9ec823,term_size-0.3.2,36226 +github.aaakk.us.kg-1ecc6299db9ec823,ordered-float-3.1.0,91357 +github.aaakk.us.kg-1ecc6299db9ec823,cookie-0.2.5,44912 +github.aaakk.us.kg-1ecc6299db9ec823,debugid-0.8.0,44521 +github.aaakk.us.kg-1ecc6299db9ec823,conrod-0.51.1,2154016 +github.aaakk.us.kg-1ecc6299db9ec823,indexmap-1.6.1,247801 +github.aaakk.us.kg-1ecc6299db9ec823,target-spec-1.3.1,68315 +github.aaakk.us.kg-1ecc6299db9ec823,lexical-parse-integer-0.8.6,139671 +github.aaakk.us.kg-1ecc6299db9ec823,time-0.1.38,131629 +github.aaakk.us.kg-1ecc6299db9ec823,glib-macros-0.14.1,102959 +github.aaakk.us.kg-1ecc6299db9ec823,metrics-macros-0.6.0,37750 +github.aaakk.us.kg-1ecc6299db9ec823,structopt-0.3.12,224213 +github.aaakk.us.kg-1ecc6299db9ec823,criterion-0.3.2,439241 +github.aaakk.us.kg-1ecc6299db9ec823,lyon_path-0.17.7,186745 +github.aaakk.us.kg-1ecc6299db9ec823,miette-5.5.0,312945 +github.aaakk.us.kg-1ecc6299db9ec823,tokio-codec-0.2.0-alpha.6,118193 +github.aaakk.us.kg-1ecc6299db9ec823,structopt-derive-0.4.14,84883 +github.aaakk.us.kg-1ecc6299db9ec823,objekt-0.1.2,24191 +github.aaakk.us.kg-1ecc6299db9ec823,sqlx-macros-0.5.7,110890 +github.aaakk.us.kg-1ecc6299db9ec823,systemstat-0.1.10,127295 +github.aaakk.us.kg-1ecc6299db9ec823,colorful-0.2.2,99698 +github.aaakk.us.kg-1ecc6299db9ec823,quick-xml-0.20.0,645935 +github.aaakk.us.kg-1ecc6299db9ec823,selinux-sys-0.6.2,27060 +github.aaakk.us.kg-1ecc6299db9ec823,vsmtp-mail-parser-1.4.0-rc.10,137699 +github.aaakk.us.kg-1ecc6299db9ec823,sec1-0.7.2,64870 +github.aaakk.us.kg-1ecc6299db9ec823,nix-0.22.1,1161830 +github.aaakk.us.kg-1ecc6299db9ec823,snow-0.9.0,2658286 +github.aaakk.us.kg-1ecc6299db9ec823,per_test_directory_macros-0.1.0,2962 +github.aaakk.us.kg-1ecc6299db9ec823,syn-helpers-0.4.3,58801 +github.aaakk.us.kg-1ecc6299db9ec823,terminal_size-0.2.2,29633 +github.aaakk.us.kg-1ecc6299db9ec823,bevy_hierarchy-0.7.0,41018 +github.aaakk.us.kg-1ecc6299db9ec823,dynamic_reload-0.4.0,74455 +github.aaakk.us.kg-1ecc6299db9ec823,http-signature-normalization-actix-0.5.0-beta.14,126857 +github.aaakk.us.kg-1ecc6299db9ec823,http-body-0.4.1,24138 +github.aaakk.us.kg-1ecc6299db9ec823,gix-index-0.13.0,207795 +github.aaakk.us.kg-1ecc6299db9ec823,darling_macro-0.13.1,4156 +github.aaakk.us.kg-1ecc6299db9ec823,serde_json-1.0.66,543072 +github.aaakk.us.kg-1ecc6299db9ec823,minreq-1.4.1,41355 +github.aaakk.us.kg-1ecc6299db9ec823,sct-0.6.1,60974 +github.aaakk.us.kg-1ecc6299db9ec823,openssl-0.10.50,1173941 +github.aaakk.us.kg-1ecc6299db9ec823,bevy_pbr-0.6.0,201163 +github.aaakk.us.kg-1ecc6299db9ec823,security-framework-2.3.1,290512 +github.aaakk.us.kg-1ecc6299db9ec823,pin-project-internal-0.4.30,128419 +github.aaakk.us.kg-1ecc6299db9ec823,serde_yaml-0.7.5,158524 +github.aaakk.us.kg-1ecc6299db9ec823,cid-0.3.2,17269 +github.aaakk.us.kg-1ecc6299db9ec823,plotters-backend-0.3.0,51995 +github.aaakk.us.kg-1ecc6299db9ec823,serde_yaml-0.8.12,179579 +github.aaakk.us.kg-1ecc6299db9ec823,cosmwasm-schema-derive-1.1.9,34956 +github.aaakk.us.kg-1ecc6299db9ec823,docopt-0.6.86,175553 +github.aaakk.us.kg-1ecc6299db9ec823,git-testament-0.2.4,27685 +github.aaakk.us.kg-1ecc6299db9ec823,htmlescape-0.3.1,143378 +github.aaakk.us.kg-1ecc6299db9ec823,is_proc_translated-0.1.1,16533 +github.aaakk.us.kg-1ecc6299db9ec823,futures-macro-0.3.4,33147 +github.aaakk.us.kg-1ecc6299db9ec823,futures-intrusive-0.4.2,520476 +github.aaakk.us.kg-1ecc6299db9ec823,rustix-0.35.13,1581355 +github.aaakk.us.kg-1ecc6299db9ec823,glsl-layout-0.3.2,75515 +github.aaakk.us.kg-1ecc6299db9ec823,darling-0.12.0,67446 +github.aaakk.us.kg-1ecc6299db9ec823,blake3-0.1.5,394136 +github.aaakk.us.kg-1ecc6299db9ec823,async-stripe-0.15.0,3157635 +github.aaakk.us.kg-1ecc6299db9ec823,hbs-common-sys-0.2.1,1034 +github.aaakk.us.kg-1ecc6299db9ec823,base58-0.1.0,7019 +github.aaakk.us.kg-1ecc6299db9ec823,time-0.2.23,342720 +github.aaakk.us.kg-1ecc6299db9ec823,memoffset-0.5.6,27595 +github.aaakk.us.kg-1ecc6299db9ec823,colored-1.9.3,85161 +github.aaakk.us.kg-1ecc6299db9ec823,lrpar-0.13.1,153317 +github.aaakk.us.kg-1ecc6299db9ec823,clap-2.34.0,975823 +github.aaakk.us.kg-1ecc6299db9ec823,chalk-engine-0.55.0,203718 +github.aaakk.us.kg-1ecc6299db9ec823,cosmic-space-0.3.6,800331 +github.aaakk.us.kg-1ecc6299db9ec823,syn-1.0.93,1886902 +github.aaakk.us.kg-1ecc6299db9ec823,futures-core-0.3.5,43430 +github.aaakk.us.kg-1ecc6299db9ec823,prost-derive-0.11.6,99428 +github.aaakk.us.kg-1ecc6299db9ec823,toml_edit-0.15.0,491549 +github.aaakk.us.kg-1ecc6299db9ec823,pcb-llvm-0.2.0,17328 +github.aaakk.us.kg-1ecc6299db9ec823,rusticata-macros-2.1.0,35537 +github.aaakk.us.kg-1ecc6299db9ec823,rustyline-with-hint-fix-10.1.0,548833 +github.aaakk.us.kg-1ecc6299db9ec823,sharded-slab-0.1.1,239224 +github.aaakk.us.kg-1ecc6299db9ec823,literally-0.1.3,20415 +github.aaakk.us.kg-1ecc6299db9ec823,riff-1.0.1,20582 +github.aaakk.us.kg-1ecc6299db9ec823,futures-macro-0.3.23,38691 +github.aaakk.us.kg-1ecc6299db9ec823,criterion-0.3.1,431723 +github.aaakk.us.kg-1ecc6299db9ec823,atty-0.2.14,14567 +github.aaakk.us.kg-1ecc6299db9ec823,vergen-3.1.0,49089 +github.aaakk.us.kg-1ecc6299db9ec823,peeking_take_while-0.1.2,18604 +github.aaakk.us.kg-1ecc6299db9ec823,serde_derive-1.0.156,316173 +github.aaakk.us.kg-1ecc6299db9ec823,geo-0.23.1,1022596 +github.aaakk.us.kg-1ecc6299db9ec823,persy-1.4.3,778219 +github.aaakk.us.kg-1ecc6299db9ec823,futures-lite-1.13.0,214632 +github.aaakk.us.kg-1ecc6299db9ec823,ms_dtyp-0.0.3,44387 +github.aaakk.us.kg-1ecc6299db9ec823,thiserror-1.0.33,66618 +github.aaakk.us.kg-1ecc6299db9ec823,marksman_escape-0.1.2,587235 +github.aaakk.us.kg-1ecc6299db9ec823,serde_derive-1.0.101,289156 +github.aaakk.us.kg-1ecc6299db9ec823,gix-ref-0.29.0,214105 +github.aaakk.us.kg-1ecc6299db9ec823,der-0.7.5,384316 +github.aaakk.us.kg-1ecc6299db9ec823,promptly-0.3.0,35216 +github.aaakk.us.kg-1ecc6299db9ec823,libc-0.2.115,3166629 +github.aaakk.us.kg-1ecc6299db9ec823,ppv-lite86-0.1.2,33514 +github.aaakk.us.kg-1ecc6299db9ec823,gfx-hal-0.6.0,254453 +github.aaakk.us.kg-1ecc6299db9ec823,as-slice-0.1.3,20306 +github.aaakk.us.kg-1ecc6299db9ec823,gpu-alloc-0.3.0,78823 +github.aaakk.us.kg-1ecc6299db9ec823,arc-swap-0.4.8,167950 +github.aaakk.us.kg-1ecc6299db9ec823,libusb1-sys-0.5.0,1458763 +github.aaakk.us.kg-1ecc6299db9ec823,sysinfo-0.26.8,609932 +github.aaakk.us.kg-1ecc6299db9ec823,refinery-macros-0.8.7,6514 +github.aaakk.us.kg-1ecc6299db9ec823,assert_float_eq-1.1.3,38445 +github.aaakk.us.kg-1ecc6299db9ec823,tinyvec-1.1.0,363582 +github.aaakk.us.kg-1ecc6299db9ec823,predicates-1.0.7,1168580 +github.aaakk.us.kg-1ecc6299db9ec823,pulldown-cmark-0.9.3,595681 +github.aaakk.us.kg-1ecc6299db9ec823,aws-sigv4-0.46.0,97885 +github.aaakk.us.kg-1ecc6299db9ec823,fastrand-1.5.0,39175 +github.aaakk.us.kg-1ecc6299db9ec823,futures-channel-0.3.17,131816 +github.aaakk.us.kg-1ecc6299db9ec823,usbd_scsi-0.1.0,172205 +github.aaakk.us.kg-1ecc6299db9ec823,tinyvec-1.4.0,379505 +github.aaakk.us.kg-1ecc6299db9ec823,structsy-0.5.1,513822 +github.aaakk.us.kg-1ecc6299db9ec823,aws-sdk-ssm-0.21.0,9755619 +github.aaakk.us.kg-1ecc6299db9ec823,pin-project-lite-0.1.1,63942 +github.aaakk.us.kg-1ecc6299db9ec823,tokio-rustls-0.13.0,78252 +github.aaakk.us.kg-1ecc6299db9ec823,tinyvec_macros-0.1.0,2912 +github.aaakk.us.kg-1ecc6299db9ec823,extended_matrix_float-1.0.0,6233 +github.aaakk.us.kg-1ecc6299db9ec823,displaydoc-0.2.3,68676 +github.aaakk.us.kg-1ecc6299db9ec823,typed-arena-2.0.2,43549 +github.aaakk.us.kg-1ecc6299db9ec823,cranelift-0.86.1,16294 +github.aaakk.us.kg-1ecc6299db9ec823,modular-bitfield-impl-0.10.0,64389 +github.aaakk.us.kg-1ecc6299db9ec823,schemafy_core-0.5.2,7696 +github.aaakk.us.kg-1ecc6299db9ec823,sea-orm-macros-0.8.0,86930 +github.aaakk.us.kg-1ecc6299db9ec823,core-foundation-sys-0.4.6,61859 +github.aaakk.us.kg-1ecc6299db9ec823,move-symbol-pool-0.3.2,14473 +github.aaakk.us.kg-1ecc6299db9ec823,glutin-0.25.1,300518 +github.aaakk.us.kg-1ecc6299db9ec823,postcard-cobs-0.2.0,41524 +github.aaakk.us.kg-1ecc6299db9ec823,quote-0.6.11,69636 +github.aaakk.us.kg-1ecc6299db9ec823,encoding_rs-0.8.32,5022316 +github.aaakk.us.kg-1ecc6299db9ec823,clap-2.32.0,946148 +github.aaakk.us.kg-1ecc6299db9ec823,term-0.6.1,181220 +github.aaakk.us.kg-1ecc6299db9ec823,enumset-1.0.12,85911 +github.aaakk.us.kg-1ecc6299db9ec823,ctest2-0.4.1,100745 +github.aaakk.us.kg-1ecc6299db9ec823,serde-xml-any-0.0.3,70554 +github.aaakk.us.kg-1ecc6299db9ec823,proc-macro-hack-0.5.11,39025 +github.aaakk.us.kg-1ecc6299db9ec823,remove_dir_all-0.5.1,23418 +github.aaakk.us.kg-1ecc6299db9ec823,weezl-0.1.5,134218 +github.aaakk.us.kg-1ecc6299db9ec823,windows_x86_64_gnullvm-0.42.1,3254874 +github.aaakk.us.kg-1ecc6299db9ec823,rocket-0.5.0-rc.2,1225987 +github.aaakk.us.kg-1ecc6299db9ec823,pin-project-0.4.27,282004 +github.aaakk.us.kg-1ecc6299db9ec823,criterion-cycles-per-byte-0.1.3,18296 +github.aaakk.us.kg-1ecc6299db9ec823,coco-0.1.1,107143 +github.aaakk.us.kg-1ecc6299db9ec823,solana-bloom-1.15.1,22207 +github.aaakk.us.kg-1ecc6299db9ec823,qoqo_calculator-1.1.1,163666 +github.aaakk.us.kg-1ecc6299db9ec823,aes-gcm-0.9.4,381036 +github.aaakk.us.kg-1ecc6299db9ec823,blowfish-0.9.1,39658 +github.aaakk.us.kg-1ecc6299db9ec823,pango-0.14.3,258440 +github.aaakk.us.kg-1ecc6299db9ec823,clap_derive-3.0.0,129105 +github.aaakk.us.kg-1ecc6299db9ec823,content_inspector-0.2.4,27568 +github.aaakk.us.kg-1ecc6299db9ec823,jsona-0.2.0,104104 +github.aaakk.us.kg-1ecc6299db9ec823,gix-quote-0.4.3,32314 +github.aaakk.us.kg-1ecc6299db9ec823,bcs-0.1.3,93194 +github.aaakk.us.kg-1ecc6299db9ec823,statrs-0.14.0,681982 +github.aaakk.us.kg-1ecc6299db9ec823,cw-controllers-0.16.0,32195 +github.aaakk.us.kg-1ecc6299db9ec823,hyper-0.12.36,578470 +github.aaakk.us.kg-1ecc6299db9ec823,argon2-0.4.1,112707 +github.aaakk.us.kg-1ecc6299db9ec823,fraction-0.12.2,482976 +github.aaakk.us.kg-1ecc6299db9ec823,quickcheck-0.7.2,89884 +github.aaakk.us.kg-1ecc6299db9ec823,typetag-0.1.8,135149 +github.aaakk.us.kg-1ecc6299db9ec823,object-0.20.0,916661 +github.aaakk.us.kg-1ecc6299db9ec823,pest_derive-2.2.1,60318 +github.aaakk.us.kg-1ecc6299db9ec823,coremidi-sys-3.1.0,40849 +github.aaakk.us.kg-1ecc6299db9ec823,either-1.6.0,48881 +github.aaakk.us.kg-1ecc6299db9ec823,tarpc-0.29.0,244416 +github.aaakk.us.kg-1ecc6299db9ec823,num-integer-0.1.42,88403 +github.aaakk.us.kg-1ecc6299db9ec823,oid-registry-0.6.0,46996 +github.aaakk.us.kg-1ecc6299db9ec823,historian-3.0.11,23818 +github.aaakk.us.kg-1ecc6299db9ec823,ui-sys-0.1.3,1784250 +github.aaakk.us.kg-1ecc6299db9ec823,cranelift-frontend-0.92.0,166902 +github.aaakk.us.kg-1ecc6299db9ec823,pin-project-lite-0.1.12,77882 +github.aaakk.us.kg-1ecc6299db9ec823,piston2d-gfx_graphics-0.72.0,91826 +github.aaakk.us.kg-1ecc6299db9ec823,stylist-macros-0.9.2,78647 +github.aaakk.us.kg-1ecc6299db9ec823,valico-3.4.0,1394467 +github.aaakk.us.kg-1ecc6299db9ec823,inventory-0.3.3,40329 +github.aaakk.us.kg-1ecc6299db9ec823,wrapping_arithmetic-0.1.0,8774 +github.aaakk.us.kg-1ecc6299db9ec823,serde-1.0.138,502921 +github.aaakk.us.kg-1ecc6299db9ec823,ra_common-0.1.3,16920 +github.aaakk.us.kg-1ecc6299db9ec823,markup5ever-0.10.0,213742 +github.aaakk.us.kg-1ecc6299db9ec823,libp2p-core-0.20.1,460422 +github.aaakk.us.kg-1ecc6299db9ec823,inout-0.1.2,40474 +github.aaakk.us.kg-1ecc6299db9ec823,flatbuffers-23.1.21,103944 +github.aaakk.us.kg-1ecc6299db9ec823,gdk-pixbuf-sys-0.10.0,42914 +github.aaakk.us.kg-1ecc6299db9ec823,miniz_oxide-0.5.1,223551 +github.aaakk.us.kg-1ecc6299db9ec823,merge-0.1.0,70214 +github.aaakk.us.kg-1ecc6299db9ec823,pagecache-0.6.0,260742 +github.aaakk.us.kg-1ecc6299db9ec823,ritelinked-0.3.2,142063 +github.aaakk.us.kg-1ecc6299db9ec823,ethers-contract-1.0.2,589452 +github.aaakk.us.kg-1ecc6299db9ec823,color_quant-1.1.0,21284 +github.aaakk.us.kg-1ecc6299db9ec823,libykpers-sys-0.3.1,14270 +github.aaakk.us.kg-1ecc6299db9ec823,cgmath-0.17.0,367702 +github.aaakk.us.kg-1ecc6299db9ec823,clap-4.0.18,1096299 +github.aaakk.us.kg-1ecc6299db9ec823,ears-0.5.1,165152 +github.aaakk.us.kg-1ecc6299db9ec823,h2-0.2.5,765073 +github.aaakk.us.kg-1ecc6299db9ec823,image-0.22.5,725576 +github.aaakk.us.kg-1ecc6299db9ec823,digest-0.10.1,83013 +github.aaakk.us.kg-1ecc6299db9ec823,js-sys-0.3.46,410849 +github.aaakk.us.kg-1ecc6299db9ec823,psl-types-2.0.11,25329 +github.aaakk.us.kg-1ecc6299db9ec823,apub-core-0.2.0,52434 +github.aaakk.us.kg-1ecc6299db9ec823,thiserror-1.0.22,59077 +github.aaakk.us.kg-1ecc6299db9ec823,num-complex-0.4.3,139539 +github.aaakk.us.kg-1ecc6299db9ec823,autocfg-1.0.1,41521 +github.aaakk.us.kg-1ecc6299db9ec823,amethyst_locale-0.15.3,4896 +github.aaakk.us.kg-1ecc6299db9ec823,tokio-timer-0.2.11,167147 +github.aaakk.us.kg-1ecc6299db9ec823,pipe-trait-0.2.1,11031 +github.aaakk.us.kg-1ecc6299db9ec823,http-muncher-0.3.2,259101 +github.aaakk.us.kg-1ecc6299db9ec823,thin-dst-1.1.0,46297 +github.aaakk.us.kg-1ecc6299db9ec823,float-ord-0.2.0,21145 +github.aaakk.us.kg-1ecc6299db9ec823,trust-dns-proto-0.21.2,1312809 +github.aaakk.us.kg-1ecc6299db9ec823,ordered-multimap-0.4.3,178966 +github.aaakk.us.kg-1ecc6299db9ec823,bitflags-0.4.0,33932 +github.aaakk.us.kg-1ecc6299db9ec823,windows_x86_64_gnullvm-0.42.0,3240134 +github.aaakk.us.kg-1ecc6299db9ec823,cargo-util-0.1.2,72189 +github.aaakk.us.kg-1ecc6299db9ec823,serde_with_macros-1.5.2,72325 +github.aaakk.us.kg-1ecc6299db9ec823,wasmer-2.3.0,529984 +github.aaakk.us.kg-1ecc6299db9ec823,tokio-codec-0.1.2,30428 +github.aaakk.us.kg-1ecc6299db9ec823,pico-args-0.5.0,54991 +github.aaakk.us.kg-1ecc6299db9ec823,migformatting-0.1.1,1680 +github.aaakk.us.kg-1ecc6299db9ec823,lexical-core-0.6.7,2382284 +github.aaakk.us.kg-1ecc6299db9ec823,katex-wasmbind-0.10.0,274096 +github.aaakk.us.kg-1ecc6299db9ec823,blender-armature-0.0.1,51371 +github.aaakk.us.kg-1ecc6299db9ec823,twoway-0.2.1,129719 +github.aaakk.us.kg-1ecc6299db9ec823,sha3-0.10.0,540582 +github.aaakk.us.kg-1ecc6299db9ec823,ringbuf-0.2.8,92733 +github.aaakk.us.kg-1ecc6299db9ec823,pest_meta-2.1.3,175833 +github.aaakk.us.kg-1ecc6299db9ec823,selectme-macros-0.7.1,79130 +github.aaakk.us.kg-1ecc6299db9ec823,secp256k1-sys-0.7.0,5303296 +github.aaakk.us.kg-1ecc6299db9ec823,panic-probe-0.3.0,18841 +github.aaakk.us.kg-1ecc6299db9ec823,ron-0.6.6,208755 +github.aaakk.us.kg-1ecc6299db9ec823,defmt-macros-0.3.3,78405 +github.aaakk.us.kg-1ecc6299db9ec823,winapi-x86_64-pc-windows-gnu-0.4.0,53158182 +github.aaakk.us.kg-1ecc6299db9ec823,aph-0.2.0,30088 +github.aaakk.us.kg-1ecc6299db9ec823,winnow-0.4.6,959730 +github.aaakk.us.kg-1ecc6299db9ec823,syntex_syntax-0.54.0,1272567 +github.aaakk.us.kg-1ecc6299db9ec823,prost-derive-0.11.9,99428 +github.aaakk.us.kg-1ecc6299db9ec823,commoncrypto-sys-0.2.0,16095 +github.aaakk.us.kg-1ecc6299db9ec823,yew-router-macro-0.15.0,42667 +github.aaakk.us.kg-1ecc6299db9ec823,http-range-header-0.3.0,29647 +github.aaakk.us.kg-1ecc6299db9ec823,crossbeam-queue-0.2.3,60131 +github.aaakk.us.kg-1ecc6299db9ec823,slice-deque-0.3.0,271889 +github.aaakk.us.kg-1ecc6299db9ec823,libc-0.2.65,2334946 +github.aaakk.us.kg-1ecc6299db9ec823,minidom-0.14.0,102507 +github.aaakk.us.kg-1ecc6299db9ec823,tokio-native-tls-0.3.0,60313 +github.aaakk.us.kg-1ecc6299db9ec823,glam-0.17.3,1191013 +github.aaakk.us.kg-1ecc6299db9ec823,semver-1.0.6,114819 +github.aaakk.us.kg-1ecc6299db9ec823,cortex-m-rtfm-macros-0.5.1,112048 +github.aaakk.us.kg-1ecc6299db9ec823,bitvec-1.0.0,1006982 +github.aaakk.us.kg-1ecc6299db9ec823,gfx-backend-metal-0.6.5,660301 +github.aaakk.us.kg-1ecc6299db9ec823,object-0.30.1,1467041 +github.aaakk.us.kg-1ecc6299db9ec823,proc-macro-error-attr-0.4.11,18220 +github.aaakk.us.kg-1ecc6299db9ec823,proteus-0.5.0,179567 +github.aaakk.us.kg-1ecc6299db9ec823,crunchy-0.1.6,6678 +github.aaakk.us.kg-1ecc6299db9ec823,once_cell-1.7.2,121632 +github.aaakk.us.kg-1ecc6299db9ec823,rel-0.2.0,14524 +github.aaakk.us.kg-1ecc6299db9ec823,lexical-core-0.7.5,2355166 +github.aaakk.us.kg-1ecc6299db9ec823,windows_x86_64_gnu-0.42.1,10581222 +github.aaakk.us.kg-1ecc6299db9ec823,thread_local-1.1.5,49409 +github.aaakk.us.kg-1ecc6299db9ec823,openssl-sys-0.9.63,285709 +github.aaakk.us.kg-1ecc6299db9ec823,simplelog-0.11.2,85170 +github.aaakk.us.kg-1ecc6299db9ec823,thiserror-impl-1.0.25,55249 +github.aaakk.us.kg-1ecc6299db9ec823,quanta-0.10.0,82241 +github.aaakk.us.kg-1ecc6299db9ec823,vsmtp-common-1.4.0-rc.10,122740 +github.aaakk.us.kg-1ecc6299db9ec823,tonic-0.1.0-alpha.6,302938 +github.aaakk.us.kg-1ecc6299db9ec823,ecdsa-0.16.1,121203 +github.aaakk.us.kg-1ecc6299db9ec823,deltae-0.3.0,2871017 +github.aaakk.us.kg-1ecc6299db9ec823,phf_shared-0.11.1,30454 +github.aaakk.us.kg-1ecc6299db9ec823,trustfall-rustdoc-adapter-22.5.2,5348192 +github.aaakk.us.kg-1ecc6299db9ec823,mockall_derive-0.11.0,227736 +github.aaakk.us.kg-1ecc6299db9ec823,wasm-bindgen-0.2.64,584320 +github.aaakk.us.kg-1ecc6299db9ec823,sg-std-0.12.0,27020 +github.aaakk.us.kg-1ecc6299db9ec823,chalk-ir-0.87.0,288472 +github.aaakk.us.kg-1ecc6299db9ec823,environment-0.1.1,9957 +github.aaakk.us.kg-1ecc6299db9ec823,crash-handler-0.3.3,125183 +github.aaakk.us.kg-1ecc6299db9ec823,bindgen-0.59.2,958852 +github.aaakk.us.kg-1ecc6299db9ec823,serde_path_to_error-0.1.7,101591 +github.aaakk.us.kg-1ecc6299db9ec823,tinyvec-0.3.3,77508 +github.aaakk.us.kg-1ecc6299db9ec823,precomputed-hash-0.1.1,2853 +github.aaakk.us.kg-1ecc6299db9ec823,rustc-rayon-core-0.4.1,264995 +github.aaakk.us.kg-1ecc6299db9ec823,gix-sec-0.6.2,57428 +github.aaakk.us.kg-1ecc6299db9ec823,pistoncore-input-0.19.0,83490 +github.aaakk.us.kg-1ecc6299db9ec823,gloo-utils-0.1.5,15602 +github.aaakk.us.kg-1ecc6299db9ec823,redox_intelflash-0.1.3,28056 +github.aaakk.us.kg-1ecc6299db9ec823,block2-0.2.0-alpha.6,39192 +github.aaakk.us.kg-1ecc6299db9ec823,fastly-shared-0.9.1,19292 +github.aaakk.us.kg-1ecc6299db9ec823,ibc-chain-registry-0.1.0,48243 +github.aaakk.us.kg-1ecc6299db9ec823,socket2-0.4.4,205035 +github.aaakk.us.kg-1ecc6299db9ec823,futures-channel-0.3.19,132274 +github.aaakk.us.kg-1ecc6299db9ec823,structopt-0.3.16,217443 +github.aaakk.us.kg-1ecc6299db9ec823,rusty-fork-0.2.2,64570 +github.aaakk.us.kg-1ecc6299db9ec823,parking_lot_core-0.9.7,139601 +github.aaakk.us.kg-1ecc6299db9ec823,async-lock-2.6.0,99844 +github.aaakk.us.kg-1ecc6299db9ec823,bindgen-0.56.0,923373 +github.aaakk.us.kg-1ecc6299db9ec823,quad-rand-0.2.1,9108 +github.aaakk.us.kg-1ecc6299db9ec823,wasmflow-codec-0.10.0,12343 +github.aaakk.us.kg-1ecc6299db9ec823,gix-0.38.0,883190 +github.aaakk.us.kg-1ecc6299db9ec823,futures-macro-0.3.27,38519 +github.aaakk.us.kg-1ecc6299db9ec823,portable-atomic-0.3.13,549649 +github.aaakk.us.kg-1ecc6299db9ec823,portable-atomic-1.3.2,799707 +github.aaakk.us.kg-1ecc6299db9ec823,bevy-crevice-derive-0.6.0,16165 +github.aaakk.us.kg-1ecc6299db9ec823,gltf-json-0.15.2,118263 +github.aaakk.us.kg-1ecc6299db9ec823,struple-impl-0.1.0,4096 +github.aaakk.us.kg-1ecc6299db9ec823,annotate-snippets-0.9.1,153174 +github.aaakk.us.kg-1ecc6299db9ec823,futures-core-0.3.28,46207 +github.aaakk.us.kg-1ecc6299db9ec823,wezterm-bidi-0.2.2,361283 +github.aaakk.us.kg-1ecc6299db9ec823,mildew-0.1.2,3002 +github.aaakk.us.kg-1ecc6299db9ec823,bytecount-0.6.3,46567 +github.aaakk.us.kg-1ecc6299db9ec823,numext-fixed-hash-core-0.1.6,7403 +github.aaakk.us.kg-1ecc6299db9ec823,bytesize-1.1.0,34012 +github.aaakk.us.kg-1ecc6299db9ec823,oxsdatatypes-0.1.0,174662 +github.aaakk.us.kg-1ecc6299db9ec823,hostname-0.1.5,4811 +github.aaakk.us.kg-1ecc6299db9ec823,io-lifetimes-1.0.4,207652 +github.aaakk.us.kg-1ecc6299db9ec823,derive_builder_core-0.11.2,135502 +github.aaakk.us.kg-1ecc6299db9ec823,ttf-parser-0.15.2,711615 +github.aaakk.us.kg-1ecc6299db9ec823,tracing-opentelemetry-0.17.4,187675 +github.aaakk.us.kg-1ecc6299db9ec823,ab_glyph_rasterizer-0.1.7,34278 +github.aaakk.us.kg-1ecc6299db9ec823,bevy_diagnostic-0.6.0,14396 +github.aaakk.us.kg-1ecc6299db9ec823,toml_datetime-0.5.0,34801 +github.aaakk.us.kg-1ecc6299db9ec823,wasm-parser-0.1.7,39726 +github.aaakk.us.kg-1ecc6299db9ec823,ppv-null-0.1.2,26098 +github.aaakk.us.kg-1ecc6299db9ec823,ci_info-0.10.2,1197933 +github.aaakk.us.kg-1ecc6299db9ec823,jobserver-0.1.21,72720 +github.aaakk.us.kg-1ecc6299db9ec823,sentencepiece-sys-0.10.0,10055292 +github.aaakk.us.kg-1ecc6299db9ec823,zstd-sys-2.0.1+zstd.1.5.2,3387955 +github.aaakk.us.kg-1ecc6299db9ec823,byte-strings-proc_macros-0.2.2,7886 +github.aaakk.us.kg-1ecc6299db9ec823,snapbox-0.4.11,193312 +github.aaakk.us.kg-1ecc6299db9ec823,ron-0.6.4,198516 +github.aaakk.us.kg-1ecc6299db9ec823,gix-object-0.28.0,102536 +github.aaakk.us.kg-1ecc6299db9ec823,strum_macros-0.23.1,87403 +github.aaakk.us.kg-1ecc6299db9ec823,defmt-0.3.2,93568 +github.aaakk.us.kg-1ecc6299db9ec823,openssl-0.10.35,971227 +github.aaakk.us.kg-1ecc6299db9ec823,gtk-sys-0.14.0,1376726 +github.aaakk.us.kg-1ecc6299db9ec823,gpu-alloc-0.4.7,99476 +github.aaakk.us.kg-1ecc6299db9ec823,colored-2.0.0,91075 +github.aaakk.us.kg-1ecc6299db9ec823,fixedbitset-0.4.2,67872 +github.aaakk.us.kg-1ecc6299db9ec823,argparse-0.2.2,95032 +github.aaakk.us.kg-1ecc6299db9ec823,bevy_mod_raycast-0.6.2,456756 +github.aaakk.us.kg-1ecc6299db9ec823,byte-strings-0.2.2,35209 +github.aaakk.us.kg-1ecc6299db9ec823,mem_tools-0.1.0,937956 +github.aaakk.us.kg-1ecc6299db9ec823,deno_core-0.167.0,11067700 +github.aaakk.us.kg-1ecc6299db9ec823,rocksdb-0.19.0,628015 +github.aaakk.us.kg-1ecc6299db9ec823,num-traits-0.2.12,231414 +github.aaakk.us.kg-1ecc6299db9ec823,type-info-derive-0.2.0,56221 +github.aaakk.us.kg-1ecc6299db9ec823,structopt-derive-0.3.4,68017 +github.aaakk.us.kg-1ecc6299db9ec823,extendr-macros-0.3.1,49695 +github.aaakk.us.kg-1ecc6299db9ec823,secret-cosmwasm-std-1.0.0,632711 +github.aaakk.us.kg-1ecc6299db9ec823,skim-0.7.0,380243 +github.aaakk.us.kg-1ecc6299db9ec823,serde-1.0.135,501463 +github.aaakk.us.kg-1ecc6299db9ec823,lock_api-0.1.5,109183 +github.aaakk.us.kg-1ecc6299db9ec823,cw-multi-test-0.16.2,445599 +github.aaakk.us.kg-1ecc6299db9ec823,quote-1.0.10,120640 +github.aaakk.us.kg-1ecc6299db9ec823,safemem-0.3.2,17382 +github.aaakk.us.kg-1ecc6299db9ec823,gloo-dialogs-0.1.1,4653 +github.aaakk.us.kg-1ecc6299db9ec823,dashmap-4.0.2,105438 +github.aaakk.us.kg-1ecc6299db9ec823,oorandom-11.1.0,31893 +github.aaakk.us.kg-1ecc6299db9ec823,polars-core-0.21.1,1678691 +github.aaakk.us.kg-1ecc6299db9ec823,claxon-0.4.2,259276 +github.aaakk.us.kg-1ecc6299db9ec823,cc-1.0.35,179169 +github.aaakk.us.kg-1ecc6299db9ec823,cocoa-0.19.1,296083 +github.aaakk.us.kg-1ecc6299db9ec823,tokio-1.9.0,2490393 +github.aaakk.us.kg-1ecc6299db9ec823,gix-refspec-0.10.1,105495 +github.aaakk.us.kg-1ecc6299db9ec823,futures-task-0.3.12,39561 +github.aaakk.us.kg-1ecc6299db9ec823,sqlx-core-0.4.2,1064795 +github.aaakk.us.kg-1ecc6299db9ec823,futures-task-0.3.14,39566 +github.aaakk.us.kg-1ecc6299db9ec823,datastore_grpc-0.4.0,18233399 +github.aaakk.us.kg-1ecc6299db9ec823,directories-4.0.1,74013 +github.aaakk.us.kg-1ecc6299db9ec823,wgpu-hal-0.15.1,1201034 +github.aaakk.us.kg-1ecc6299db9ec823,discard-1.0.4,14342 +github.aaakk.us.kg-1ecc6299db9ec823,tinytga-0.1.0,102322 +github.aaakk.us.kg-1ecc6299db9ec823,prost-types-0.10.1,126121 +github.aaakk.us.kg-1ecc6299db9ec823,assert2-0.3.6,36145 +github.aaakk.us.kg-1ecc6299db9ec823,syn-inline-mod-0.5.0,35740 +github.aaakk.us.kg-1ecc6299db9ec823,bat-0.22.1,5407476 +github.aaakk.us.kg-1ecc6299db9ec823,minidumper-child-0.1.0,32329 +github.aaakk.us.kg-1ecc6299db9ec823,libp2p-kad-0.21.0,416675 +github.aaakk.us.kg-1ecc6299db9ec823,asn1_der-0.6.3,1102166 +github.aaakk.us.kg-1ecc6299db9ec823,h2-0.2.4,764682 +github.aaakk.us.kg-1ecc6299db9ec823,ena-0.14.2,90713 +github.aaakk.us.kg-1ecc6299db9ec823,prost-build-0.8.0,31248726 +github.aaakk.us.kg-1ecc6299db9ec823,wasmer-compiler-cranelift-3.1.1,300456 +github.aaakk.us.kg-1ecc6299db9ec823,gfx-hal-0.7.0,238750 +github.aaakk.us.kg-1ecc6299db9ec823,nom-4.2.3,644514 +github.aaakk.us.kg-1ecc6299db9ec823,os_str_bytes-2.4.0,52159 +github.aaakk.us.kg-1ecc6299db9ec823,sourcemap-6.2.1,135303 +github.aaakk.us.kg-1ecc6299db9ec823,actix-router-0.5.1,150753 +github.aaakk.us.kg-1ecc6299db9ec823,markup5ever-0.9.0,229731 +github.aaakk.us.kg-1ecc6299db9ec823,gloo-worker-0.2.1,31624 +github.aaakk.us.kg-1ecc6299db9ec823,object-0.25.3,1313095 +github.aaakk.us.kg-1ecc6299db9ec823,rustversion-1.0.0,41602 diff --git a/benches/benchsuite/src/bin/capture-last-use.rs b/benches/benchsuite/src/bin/capture-last-use.rs new file mode 100644 index 00000000000..3034d49ac97 --- /dev/null +++ b/benches/benchsuite/src/bin/capture-last-use.rs @@ -0,0 +1,148 @@ +//! Utility for capturing a global cache last-use database based on the files +//! on a real-world system. +//! +//! This will look in the CARGO_HOME of the current system and record last-use +//! data for all files in the cache. This is intended to provide a real-world +//! example for a benchmark that should be close to what a real set of data +//! should look like. +//! +//! See `benches/global_cache_tracker.rs` for the benchmark that uses this +//! data. +//! +//! The database is kept in git. It usually shouldn't need to be re-generated +//! unless there is a change in the schema or the benchmark. + +use cargo::core::global_cache_tracker::{self, DeferredGlobalLastUse, GlobalCacheTracker}; +use cargo::util::cache_lock::CacheLockMode; +use cargo::util::interning::InternedString; +use cargo::Config; +use rand::prelude::SliceRandom; +use std::collections::HashMap; +use std::fs; +use std::fs::File; +use std::io::Write; +use std::path::Path; + +fn main() { + // Set up config. + let shell = cargo::core::Shell::new(); + let homedir = Path::new(env!("CARGO_MANIFEST_DIR")).join("global-cache-tracker"); + let cwd = homedir.clone(); + let mut config = Config::new(shell, cwd, homedir.clone()); + config + .configure( + 0, + false, + None, + false, + false, + false, + &None, + &["gc".to_string()], + &[], + ) + .unwrap(); + let db_path = GlobalCacheTracker::db_path(&config).into_path_unlocked(); + if db_path.exists() { + fs::remove_file(&db_path).unwrap(); + } + + let _lock = config + .acquire_package_cache_lock(CacheLockMode::DownloadExclusive) + .unwrap(); + let mut deferred = DeferredGlobalLastUse::new(); + let mut tracker = GlobalCacheTracker::new(&config).unwrap(); + + let real_home = cargo::util::homedir(&std::env::current_dir().unwrap()).unwrap(); + + let cache_dir = real_home.join("registry/cache"); + for dir_ent in fs::read_dir(cache_dir).unwrap() { + let registry = dir_ent.unwrap(); + let encoded_registry_name = InternedString::new(®istry.file_name().to_string_lossy()); + for krate in fs::read_dir(registry.path()).unwrap() { + let krate = krate.unwrap(); + let meta = krate.metadata().unwrap(); + deferred.mark_registry_crate_used_stamp( + global_cache_tracker::RegistryCrate { + encoded_registry_name, + crate_filename: krate.file_name().to_string_lossy().as_ref().into(), + size: meta.len(), + }, + Some(&meta.modified().unwrap()), + ); + } + } + + let mut src_entries = Vec::new(); + + let cache_dir = real_home.join("registry/src"); + for dir_ent in fs::read_dir(cache_dir).unwrap() { + let registry = dir_ent.unwrap(); + let encoded_registry_name = InternedString::new(®istry.file_name().to_string_lossy()); + for krate in fs::read_dir(registry.path()).unwrap() { + let krate = krate.unwrap(); + let meta = krate.metadata().unwrap(); + let src = global_cache_tracker::RegistrySrc { + encoded_registry_name, + package_dir: krate.file_name().to_string_lossy().as_ref().into(), + size: Some(cargo_util::du(&krate.path(), &[]).unwrap()), + }; + src_entries.push(src.clone()); + let timestamp = meta.modified().unwrap(); + deferred.mark_registry_src_used_stamp(src, Some(×tamp)); + } + } + + let git_co_dir = real_home.join("git/checkouts"); + for dir_ent in fs::read_dir(git_co_dir).unwrap() { + let git_source = dir_ent.unwrap(); + let encoded_git_name = InternedString::new(&git_source.file_name().to_string_lossy()); + for co in fs::read_dir(git_source.path()).unwrap() { + let co = co.unwrap(); + let meta = co.metadata().unwrap(); + deferred.mark_git_checkout_used_stamp( + global_cache_tracker::GitCheckout { + encoded_git_name, + short_name: co.file_name().to_string_lossy().as_ref().into(), + size: Some(cargo_util::du(&co.path(), &[]).unwrap()), + }, + Some(&meta.modified().unwrap()), + ); + } + } + + deferred.save(&mut tracker).unwrap(); + drop(deferred); + drop(tracker); + fs::rename(&db_path, homedir.join("global-cache-sample")).unwrap(); + // Clean up the lock file created above. + fs::remove_file(homedir.join(".package-cache")).unwrap(); + + // Save a random sample of crates that the benchmark should update. + // Pick whichever registry has the most entries. This is to be somewhat + // realistic for the common case that all dependencies come from one + // registry (crates.io). + let mut counts = HashMap::new(); + for src in &src_entries { + let c: &mut u32 = counts.entry(src.encoded_registry_name).or_default(); + *c += 1; + } + let mut counts: Vec<_> = counts.into_iter().map(|(k, v)| (v, k)).collect(); + counts.sort(); + let biggest = counts.last().unwrap().1; + + src_entries.retain(|src| src.encoded_registry_name == biggest); + let mut rng = &mut rand::thread_rng(); + let sample: Vec<_> = src_entries.choose_multiple(&mut rng, 500).collect(); + let mut f = File::create(homedir.join("random-sample")).unwrap(); + for src in sample { + writeln!( + f, + "{},{},{}", + src.encoded_registry_name, + src.package_dir, + src.size.unwrap() + ) + .unwrap(); + } +} diff --git a/crates/cargo-test-support/Cargo.toml b/crates/cargo-test-support/Cargo.toml index fc32e1c9cba..42f8c2af915 100644 --- a/crates/cargo-test-support/Cargo.toml +++ b/crates/cargo-test-support/Cargo.toml @@ -29,6 +29,7 @@ tar.workspace = true time.workspace = true toml.workspace = true url.workspace = true +walkdir.workspace = true [target.'cfg(windows)'.dependencies] windows-sys = { workspace = true, features = ["Win32_Storage_FileSystem"] } diff --git a/crates/cargo-test-support/src/paths.rs b/crates/cargo-test-support/src/paths.rs index 50040e1d4cd..a07491bccec 100644 --- a/crates/cargo-test-support/src/paths.rs +++ b/crates/cargo-test-support/src/paths.rs @@ -114,6 +114,10 @@ pub trait CargoPathExt { fn rm_rf(&self); fn mkdir_p(&self); + /// Returns a list of all files and directories underneath the given + /// directory, recursively, including the starting path. + fn ls_r(&self) -> Vec; + fn move_into_the_past(&self) { self.move_in_time(|sec, nsec| (sec - 3600, nsec)) } @@ -155,6 +159,14 @@ impl CargoPathExt for Path { .unwrap_or_else(|e| panic!("failed to mkdir_p {}: {}", self.display(), e)) } + fn ls_r(&self) -> Vec { + walkdir::WalkDir::new(self) + .sort_by_file_name() + .into_iter() + .filter_map(|e| e.map(|e| e.path().to_owned()).ok()) + .collect() + } + fn move_in_time(&self, travel_amount: F) where F: Fn(i64, u32) -> (i64, u32), diff --git a/crates/cargo-util/Cargo.toml b/crates/cargo-util/Cargo.toml index 616a79c5ed1..d4376d05bd0 100644 --- a/crates/cargo-util/Cargo.toml +++ b/crates/cargo-util/Cargo.toml @@ -12,6 +12,7 @@ description = "Miscellaneous support code used by Cargo." anyhow.workspace = true filetime.workspace = true hex.workspace = true +ignore.workspace = true jobserver.workspace = true libc.workspace = true same-file.workspace = true diff --git a/crates/cargo-util/src/du.rs b/crates/cargo-util/src/du.rs new file mode 100644 index 00000000000..a4f2cbe8c99 --- /dev/null +++ b/crates/cargo-util/src/du.rs @@ -0,0 +1,77 @@ +//! A simple disk usage estimator. + +use anyhow::{Context, Result}; +use ignore::overrides::OverrideBuilder; +use ignore::{WalkBuilder, WalkState}; +use std::path::Path; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, Mutex}; + +/// Determines the disk usage of all files in the given directory. +/// +/// The given patterns are gitignore style patterns relative to the given +/// path. If there are patterns, it will only count things matching that +/// pattern. `!` can be used to exclude things. See [`OverrideBuilder::add`] +/// for more info. +/// +/// This is a primitive implementation that doesn't handle hard links, and +/// isn't particularly fast (for example, not using `getattrlistbulk` on +/// macOS). It also only uses actual byte sizes instead of block counts (and +/// thus vastly undercounts directories with lots of small files). It would be +/// nice to improve this or replace it with something better. +pub fn du(path: &Path, patterns: &[&str]) -> Result { + du_inner(path, patterns).with_context(|| format!("failed to walk `{}`", path.display())) +} + +fn du_inner(path: &Path, patterns: &[&str]) -> Result { + let mut builder = OverrideBuilder::new(path); + for pattern in patterns { + builder.add(pattern)?; + } + let overrides = builder.build()?; + + let mut builder = WalkBuilder::new(path); + builder + .overrides(overrides) + .hidden(false) + .parents(false) + .ignore(false) + .git_global(false) + .git_ignore(false) + .git_exclude(false); + let walker = builder.build_parallel(); + let total = Arc::new(AtomicU64::new(0)); + // A slot used to indicate there was an error while walking. + // + // It is possible that more than one error happens (such as in different + // threads). The error returned is arbitrary in that case. + let err = Arc::new(Mutex::new(None)); + walker.run(|| { + Box::new(|entry| { + match entry { + Ok(entry) => match entry.metadata() { + Ok(meta) => { + if meta.is_file() { + total.fetch_add(meta.len(), Ordering::SeqCst); + } + } + Err(e) => { + *err.lock().unwrap() = Some(e.into()); + return WalkState::Quit; + } + }, + Err(e) => { + *err.lock().unwrap() = Some(e.into()); + return WalkState::Quit; + } + } + WalkState::Continue + }) + }); + + if let Some(e) = err.lock().unwrap().take() { + return Err(e); + } + + Ok(total.load(Ordering::SeqCst)) +} diff --git a/crates/cargo-util/src/lib.rs b/crates/cargo-util/src/lib.rs index 0cbc920ecf3..599d7d861c1 100644 --- a/crates/cargo-util/src/lib.rs +++ b/crates/cargo-util/src/lib.rs @@ -1,10 +1,12 @@ //! Miscellaneous support code used by Cargo. pub use self::read2::read2; +pub use du::du; pub use process_builder::ProcessBuilder; pub use process_error::{exit_status_to_string, is_simple_exit_code, ProcessError}; pub use sha256::Sha256; +mod du; pub mod paths; mod process_builder; mod process_error; diff --git a/src/bin/cargo/commands/clean.rs b/src/bin/cargo/commands/clean.rs index 8596561c90c..c44a2968114 100644 --- a/src/bin/cargo/commands/clean.rs +++ b/src/bin/cargo/commands/clean.rs @@ -1,7 +1,12 @@ use crate::command_prelude::*; - +use crate::util::cache_lock::CacheLockMode; +use cargo::core::gc::Gc; +use cargo::core::gc::{parse_human_size, parse_time_span, GcOpts}; +use cargo::core::global_cache_tracker::GlobalCacheTracker; +use cargo::ops::CleanContext; use cargo::ops::{self, CleanOptions}; use cargo::util::print_available_packages; +use std::time::Duration; pub fn cli() -> Command { subcommand("clean") @@ -15,12 +20,123 @@ pub fn cli() -> Command { .arg_target_dir() .arg_manifest_path() .arg_dry_run("Display what would be deleted without deleting anything") + .args_conflicts_with_subcommands(true) + .subcommand( + subcommand("gc") + .about("Clean global caches") + .hide(true) + // FIXME: arg_quiet doesn't work because `config_configure` + // doesn't know about subcommands. + .arg_dry_run("Display what would be deleted without deleting anything") + // NOTE: Not all of these options may get stabilized. Some of them are + // very low-level details, and may not be something typical users need. + .arg( + opt( + "max-src-age", + "Deletes source cache files that have not been used \ + since the given age (unstable)", + ) + .value_name("DURATION") + .value_parser(parse_time_span), + ) + .arg( + opt( + "max-crate-age", + "Deletes crate cache files that have not been used \ + since the given age (unstable)", + ) + .value_name("DURATION") + .value_parser(parse_time_span), + ) + .arg( + opt( + "max-index-age", + "Deletes registry indexes that have not been used \ + since the given age (unstable)", + ) + .value_name("DURATION") + .value_parser(parse_time_span), + ) + .arg( + opt( + "max-git-co-age", + "Deletes git dependency checkouts that have not been used \ + since the given age (unstable)", + ) + .value_name("DURATION") + .value_parser(parse_time_span), + ) + .arg( + opt( + "max-git-db-age", + "Deletes git dependency clones that have not been used \ + since the given age (unstable)", + ) + .value_name("DURATION") + .value_parser(parse_time_span), + ) + .arg( + opt( + "max-download-age", + "Deletes any downloaded cache data that has not been used \ + since the given age (unstable)", + ) + .value_name("DURATION") + .value_parser(parse_time_span), + ) + .arg( + opt( + "max-src-size", + "Deletes source cache files until the cache is under the \ + given size (unstable)", + ) + .value_name("SIZE") + .value_parser(parse_human_size), + ) + .arg( + opt( + "max-crate-size", + "Deletes crate cache files until the cache is under the \ + given size (unstable)", + ) + .value_name("SIZE") + .value_parser(parse_human_size), + ) + .arg( + opt( + "max-git-size", + "Deletes git dependency caches until the cache is under \ + the given size (unstable)", + ) + .value_name("SIZE") + .value_parser(parse_human_size), + ) + .arg( + opt( + "max-download-size", + "Deletes downloaded cache data until the cache is under \ + the given size (unstable)", + ) + .value_name("SIZE") + .value_parser(parse_human_size), + ), + ) .after_help(color_print::cstr!( "Run `cargo help clean` for more detailed information.\n" )) } pub fn exec(config: &mut Config, args: &ArgMatches) -> CliResult { + match args.subcommand() { + Some(("gc", args)) => { + return gc(config, args); + } + Some((cmd, _)) => { + unreachable!("unexpected command {}", cmd) + } + None => {} + } + let ws = args.workspace(config)?; if args.is_present_with_zero_values("package") { @@ -39,3 +155,44 @@ pub fn exec(config: &mut Config, args: &ArgMatches) -> CliResult { ops::clean(&ws, &opts)?; Ok(()) } + +fn gc(config: &Config, args: &ArgMatches) -> CliResult { + config.cli_unstable().fail_if_stable_command( + config, + "clean gc", + 12633, + "gc", + config.cli_unstable().gc, + )?; + + let size_opt = |opt| -> Option { args.get_one::(opt).copied() }; + let duration_opt = |opt| -> Option { args.get_one::(opt).copied() }; + let mut gc_opts = GcOpts { + max_src_age: duration_opt("max-src-age"), + max_crate_age: duration_opt("max-crate-age"), + max_index_age: duration_opt("max-index-age"), + max_git_co_age: duration_opt("max-git-co-age"), + max_git_db_age: duration_opt("max-git-db-age"), + max_src_size: size_opt("max-src-size"), + max_crate_size: size_opt("max-crate-size"), + max_git_size: size_opt("max-git-size"), + max_download_size: size_opt("max-download-size"), + }; + if let Some(age) = duration_opt("max-download-age") { + gc_opts.set_max_download_age(age); + } + // If the user sets any options, then only perform the options requested. + // If no options are set, do the default behavior. + if !gc_opts.is_download_cache_opt_set() { + gc_opts.update_for_auto_gc(config)?; + } + + let _lock = config.acquire_package_cache_lock(CacheLockMode::MutateExclusive)?; + let mut cache_track = GlobalCacheTracker::new(&config)?; + let mut gc = Gc::new(config, &mut cache_track)?; + let mut clean_ctx = CleanContext::new(config); + clean_ctx.dry_run = args.dry_run(); + gc.gc(&mut clean_ctx, &gc_opts)?; + clean_ctx.display_summary()?; + Ok(()) +} diff --git a/src/bin/cargo/commands/config.rs b/src/bin/cargo/commands/config.rs index 84c5e9209b8..feea9ed2876 100644 --- a/src/bin/cargo/commands/config.rs +++ b/src/bin/cargo/commands/config.rs @@ -31,9 +31,13 @@ pub fn cli() -> Command { } pub fn exec(config: &mut Config, args: &ArgMatches) -> CliResult { - config - .cli_unstable() - .fail_if_stable_command(config, "config", 9301)?; + config.cli_unstable().fail_if_stable_command( + config, + "config", + 9301, + "unstable-options", + config.cli_unstable().unstable_options, + )?; match args.subcommand() { Some(("get", args)) => { let opts = cargo_config::GetOptions { diff --git a/src/cargo/core/features.rs b/src/cargo/core/features.rs index 72a267f0402..2ce4a57c072 100644 --- a/src/cargo/core/features.rs +++ b/src/cargo/core/features.rs @@ -741,6 +741,7 @@ unstable_cli_options!( doctest_xcompile: bool = ("Compile and run doctests for non-host target using runner config"), dual_proc_macros: bool = ("Build proc-macros for both the host and the target"), features: Option> = (HIDDEN), + gc: bool = ("Track cache usage and \"garbage collect\" unused files"), gitoxide: Option = ("Use gitoxide for the given git interactions, or all of them if no argument is given"), host_config: bool = ("Enable the [host] section in the .cargo/config.toml file"), lints: bool = ("Pass `[lints]` to the linting tools"), @@ -1077,6 +1078,7 @@ impl CliUnstable { "direct-minimal-versions" => self.direct_minimal_versions = parse_empty(k, v)?, "doctest-xcompile" => self.doctest_xcompile = parse_empty(k, v)?, "dual-proc-macros" => self.dual_proc_macros = parse_empty(k, v)?, + "gc" => self.gc = parse_empty(k, v)?, "gitoxide" => { self.gitoxide = v.map_or_else( || Ok(Some(GitoxideFeatures::all())), @@ -1114,7 +1116,17 @@ impl CliUnstable { /// Generates an error if `-Z unstable-options` was not used for a new, /// unstable command-line flag. pub fn fail_if_stable_opt(&self, flag: &str, issue: u32) -> CargoResult<()> { - if !self.unstable_options { + self.fail_if_stable_opt_custom_z(flag, issue, "unstable-options", self.unstable_options) + } + + pub fn fail_if_stable_opt_custom_z( + &self, + flag: &str, + issue: u32, + z_name: &str, + enabled: bool, + ) -> CargoResult<()> { + if !enabled { let see = format!( "See https://github.com/rust-lang/cargo/issues/{issue} for more \ information about the `{flag}` flag." @@ -1123,7 +1135,7 @@ impl CliUnstable { let channel = channel(); if channel == "nightly" || channel == "dev" { bail!( - "the `{flag}` flag is unstable, pass `-Z unstable-options` to enable it\n\ + "the `{flag}` flag is unstable, pass `-Z {z_name}` to enable it\n\ {see}" ); } else { @@ -1145,8 +1157,10 @@ impl CliUnstable { config: &Config, command: &str, issue: u32, + z_name: &str, + enabled: bool, ) -> CargoResult<()> { - if self.unstable_options { + if enabled { return Ok(()); } let see = format!( @@ -1156,10 +1170,9 @@ impl CliUnstable { ); if config.nightly_features_allowed { bail!( - "the `cargo {}` command is unstable, pass `-Z unstable-options` to enable it\n\ - {}", - command, - see + "the `cargo {command}` command is unstable, pass `-Z {z_name}` \ + to enable it\n\ + {see}", ); } else { bail!( diff --git a/src/cargo/core/gc.rs b/src/cargo/core/gc.rs new file mode 100644 index 00000000000..565078ff006 --- /dev/null +++ b/src/cargo/core/gc.rs @@ -0,0 +1,509 @@ +//! Support for garbage collecting unused files from downloaded files or +//! artifacts from the target directory. +//! +//! The [`Gc`] type provides the high-level interface for the +//! garbage-collection system. +//! +//! Garbage collection can be done "automatically" by cargo, which it does by +//! default once a day when running any command that does a lot of work (like +//! `cargo build`). The entry point for this is the [`auto_gc`] function, +//! which handles some basic setup, creating the [`Gc`], and calling +//! [`Gc::auto`]. +//! +//! Garbage collection can also be done manually via the `cargo clean` command +//! by passing any option that requests deleting unused files. That is +//! implemented by calling the [`Gc::gc`] method. +//! +//! Garbage collection for the global cache is guided by the last-use tracking +//! implemented in the [`crate::core::global_cache_tracker`] module. See that +//! module documentation for an in-depth explanation of how global cache +//! tracking works. + +use crate::core::global_cache_tracker::{self, GlobalCacheTracker}; +use crate::ops::CleanContext; +use crate::util::cache_lock::{CacheLock, CacheLockMode}; +use crate::{CargoResult, Config}; +use anyhow::{format_err, Context}; +use serde::Deserialize; +use std::time::Duration; + +/// Default max age to auto-clean extracted sources, which can be recovered +/// without downloading anything. +const DEFAULT_MAX_AGE_EXTRACTED: &str = "1 month"; +/// Default max ago to auto-clean cache data, which must be downloaded to +/// recover. +const DEFAULT_MAX_AGE_DOWNLOADED: &str = "3 months"; +/// How often auto-gc will run by default unless overridden in the config. +const DEFAULT_AUTO_FREQUENCY: &str = "1 day"; + +/// Performs automatic garbage collection. +/// +/// This is called in various places in Cargo where garbage collection should +/// be performed automatically based on the config settings. The default +/// behavior is to only clean once a day. +/// +/// This should only be called in code paths for commands that are already +/// doing a lot of work. It should only be called *after* crates are +/// downloaded so that the last-use data is updated first. +/// +/// It should be cheap to call this multiple times (subsequent calls are +/// ignored), but try not to abuse that. +pub fn auto_gc(config: &Config) { + if !config.cli_unstable().gc { + return; + } + if !config.network_allowed() { + // As a conservative choice, auto-gc is disabled when offline. If the + // user is indefinitely offline, we don't want to delete things they + // may later depend on. + tracing::trace!(target: "gc", "running offline, auto gc disabled"); + return; + } + + if let Err(e) = auto_gc_inner(config) { + if global_cache_tracker::is_silent_error(&e) && !config.extra_verbose() { + tracing::warn!(target: "gc", "failed to auto-clean cache data: {e:?}"); + } else { + crate::display_warning_with_error( + "failed to auto-clean cache data", + &e, + &mut config.shell(), + ); + } + } +} + +fn auto_gc_inner(config: &Config) -> CargoResult<()> { + let _lock = match config.try_acquire_package_cache_lock(CacheLockMode::MutateExclusive)? { + Some(lock) => lock, + None => { + tracing::debug!(target: "gc", "unable to acquire mutate lock, auto gc disabled"); + return Ok(()); + } + }; + // This should not be called when there are pending deferred entries, so check that. + let deferred = config.deferred_global_last_use()?; + debug_assert!(deferred.is_empty()); + let mut global_cache_tracker = config.global_cache_tracker()?; + let mut gc = Gc::new(config, &mut global_cache_tracker)?; + let mut clean_ctx = CleanContext::new(config); + gc.auto(&mut clean_ctx)?; + Ok(()) +} + +/// Automatic garbage collection settings from the `gc.auto` config table. +/// +/// NOTE: Not all of these options may get stabilized. Some of them are very +/// low-level details, and may not be something typical users need. +/// +/// If any of these options are `None`, the built-in default is used. +#[derive(Deserialize, Default)] +#[serde(rename_all = "kebab-case")] +struct AutoConfig { + /// The maximum frequency that automatic garbage collection happens. + frequency: Option, + /// Anything older than this duration will be deleted in the source cache. + max_src_age: Option, + /// Anything older than this duration will be deleted in the compressed crate cache. + max_crate_age: Option, + /// Any index older than this duration will be deleted from the index cache. + max_index_age: Option, + /// Any git checkout older than this duration will be deleted from the checkout cache. + max_git_co_age: Option, + /// Any git clone older than this duration will be deleted from the git cache. + max_git_db_age: Option, +} + +/// Options to use for garbage collection. +#[derive(Clone, Debug, Default)] +pub struct GcOpts { + /// The `--max-src-age` CLI option. + pub max_src_age: Option, + // The `--max-crate-age` CLI option. + pub max_crate_age: Option, + /// The `--max-index-age` CLI option. + pub max_index_age: Option, + /// The `--max-git-co-age` CLI option. + pub max_git_co_age: Option, + /// The `--max-git-db-age` CLI option. + pub max_git_db_age: Option, + /// The `--max-src-size` CLI option. + pub max_src_size: Option, + /// The `--max-crate-size` CLI option. + pub max_crate_size: Option, + /// The `--max-git-size` CLI option. + pub max_git_size: Option, + /// The `--max-download-size` CLI option. + pub max_download_size: Option, +} + +impl GcOpts { + /// Returns whether any download cache cleaning options are set. + pub fn is_download_cache_opt_set(&self) -> bool { + self.max_src_age.is_some() + || self.max_crate_age.is_some() + || self.max_index_age.is_some() + || self.max_git_co_age.is_some() + || self.max_git_db_age.is_some() + || self.max_src_size.is_some() + || self.max_crate_size.is_some() + || self.max_git_size.is_some() + || self.max_download_size.is_some() + } + + /// Returns whether any download cache cleaning options based on size are set. + pub fn is_download_cache_size_set(&self) -> bool { + self.max_src_size.is_some() + || self.max_crate_size.is_some() + || self.max_git_size.is_some() + || self.max_download_size.is_some() + } + + /// Updates the `GcOpts` to incorporate the specified max download age. + /// + /// "Download" means any cached data that can be re-downloaded. + pub fn set_max_download_age(&mut self, max_download_age: Duration) { + self.max_src_age = Some(maybe_newer_span(max_download_age, self.max_src_age)); + self.max_crate_age = Some(maybe_newer_span(max_download_age, self.max_crate_age)); + self.max_index_age = Some(maybe_newer_span(max_download_age, self.max_index_age)); + self.max_git_co_age = Some(maybe_newer_span(max_download_age, self.max_git_co_age)); + self.max_git_db_age = Some(maybe_newer_span(max_download_age, self.max_git_db_age)); + } + + /// Updates the configuration of this [`GcOpts`] to incorporate the + /// settings from config. + pub fn update_for_auto_gc(&mut self, config: &Config) -> CargoResult<()> { + let auto_config = config + .get::>("gc.auto")? + .unwrap_or_default(); + self.update_for_auto_gc_config(&auto_config) + } + + fn update_for_auto_gc_config(&mut self, auto_config: &AutoConfig) -> CargoResult<()> { + self.max_src_age = newer_time_span_for_config( + self.max_src_age, + "gc.auto.max-src-age", + auto_config + .max_src_age + .as_deref() + .unwrap_or(DEFAULT_MAX_AGE_EXTRACTED), + )?; + self.max_crate_age = newer_time_span_for_config( + self.max_crate_age, + "gc.auto.max-crate-age", + auto_config + .max_crate_age + .as_deref() + .unwrap_or(DEFAULT_MAX_AGE_DOWNLOADED), + )?; + self.max_index_age = newer_time_span_for_config( + self.max_index_age, + "gc.auto.max-index-age", + auto_config + .max_index_age + .as_deref() + .unwrap_or(DEFAULT_MAX_AGE_DOWNLOADED), + )?; + self.max_git_co_age = newer_time_span_for_config( + self.max_git_co_age, + "gc.auto.max-git-co-age", + auto_config + .max_git_co_age + .as_deref() + .unwrap_or(DEFAULT_MAX_AGE_EXTRACTED), + )?; + self.max_git_db_age = newer_time_span_for_config( + self.max_git_db_age, + "gc.auto.max-git-db-age", + auto_config + .max_git_db_age + .as_deref() + .unwrap_or(DEFAULT_MAX_AGE_DOWNLOADED), + )?; + Ok(()) + } +} + +/// Garbage collector. +/// +/// See the module docs at [`crate::core::gc`] for more information on GC. +pub struct Gc<'a, 'config> { + config: &'config Config, + global_cache_tracker: &'a mut GlobalCacheTracker, + /// A lock on the package cache. + /// + /// This is important to be held, since we don't want multiple cargos to + /// be allowed to write to the cache at the same time, or for others to + /// read while we are modifying the cache. + #[allow(dead_code)] // Held for drop. + lock: CacheLock<'config>, +} + +impl<'a, 'config> Gc<'a, 'config> { + pub fn new( + config: &'config Config, + global_cache_tracker: &'a mut GlobalCacheTracker, + ) -> CargoResult> { + let lock = config.acquire_package_cache_lock(CacheLockMode::MutateExclusive)?; + Ok(Gc { + config, + global_cache_tracker, + lock, + }) + } + + /// Performs automatic garbage cleaning. + /// + /// This returns immediately without doing work if garbage collection has + /// been performed recently (since `gc.auto.frequency`). + fn auto(&mut self, clean_ctx: &mut CleanContext<'config>) -> CargoResult<()> { + if !self.config.cli_unstable().gc { + return Ok(()); + } + let auto_config = self + .config + .get::>("gc.auto")? + .unwrap_or_default(); + let Some(freq) = parse_frequency( + auto_config + .frequency + .as_deref() + .unwrap_or(DEFAULT_AUTO_FREQUENCY), + )? + else { + tracing::trace!(target: "gc", "auto gc disabled"); + return Ok(()); + }; + if !self.global_cache_tracker.should_run_auto_gc(freq)? { + return Ok(()); + } + let mut gc_opts = GcOpts::default(); + gc_opts.update_for_auto_gc_config(&auto_config)?; + self.gc(clean_ctx, &gc_opts)?; + if !clean_ctx.dry_run { + self.global_cache_tracker.set_last_auto_gc()?; + } + Ok(()) + } + + /// Performs garbage collection based on the given options. + pub fn gc( + &mut self, + clean_ctx: &mut CleanContext<'config>, + gc_opts: &GcOpts, + ) -> CargoResult<()> { + self.global_cache_tracker.clean(clean_ctx, gc_opts)?; + // In the future, other gc operations go here, such as target cleaning. + Ok(()) + } +} + +/// Returns the shorter duration from `cur_span` versus `config_span`. +/// +/// This is used because the user may specify multiple options which overlap, +/// and this will pick whichever one is shorter. +/// +/// * `cur_span` is the span we are comparing against (the value from the CLI +/// option). If None, just returns the config duration. +/// * `config_name` is the name of the config option the span is loaded from. +/// * `config_span` is the span value loaded from config. +fn newer_time_span_for_config( + cur_span: Option, + config_name: &str, + config_span: &str, +) -> CargoResult> { + let config_span = parse_time_span_for_config(config_name, config_span)?; + Ok(Some(maybe_newer_span(config_span, cur_span))) +} + +/// Returns whichever [`Duration`] is shorter. +fn maybe_newer_span(a: Duration, b: Option) -> Duration { + match b { + Some(b) => { + if b < a { + b + } else { + a + } + } + None => a, + } +} + +/// Parses a frequency string. +/// +/// Returns `Ok(None)` if the frequency is "never". +fn parse_frequency(frequency: &str) -> CargoResult> { + if frequency == "always" { + return Ok(Some(Duration::new(0, 0))); + } else if frequency == "never" { + return Ok(None); + } + let duration = maybe_parse_time_span(frequency).ok_or_else(|| { + format_err!( + "config option `gc.auto.frequency` expected a value of \"always\", \"never\", \ + or \"N seconds/minutes/days/weeks/months\", got: {frequency:?}" + ) + })?; + Ok(Some(duration)) +} + +/// Parses a time span value fetched from config. +/// +/// This is here to provide better error messages specific to reading from +/// config. +fn parse_time_span_for_config(config_name: &str, span: &str) -> CargoResult { + maybe_parse_time_span(span).ok_or_else(|| { + format_err!( + "config option `{config_name}` expected a value of the form \ + \"N seconds/minutes/days/weeks/months\", got: {span:?}" + ) + }) +} + +/// Parses a time span string. +/// +/// Returns None if the value is not valid. See [`parse_time_span`] if you +/// need a variant that generates an error message. +fn maybe_parse_time_span(span: &str) -> Option { + let Some(right_i) = span.find(|c: char| !c.is_ascii_digit()) else { + return None; + }; + let (left, mut right) = span.split_at(right_i); + if right.starts_with(' ') { + right = &right[1..]; + } + let count: u64 = left.parse().ok()?; + let factor = match right { + "second" | "seconds" => 1, + "minute" | "minutes" => 60, + "hour" | "hours" => 60 * 60, + "day" | "days" => 24 * 60 * 60, + "week" | "weeks" => 7 * 24 * 60 * 60, + "month" | "months" => 2_629_746, // average is 30.436875 days + _ => return None, + }; + Some(Duration::from_secs(factor * count)) +} + +/// Parses a time span string. +pub fn parse_time_span(span: &str) -> CargoResult { + maybe_parse_time_span(span).ok_or_else(|| { + format_err!( + "expected a value of the form \ + \"N seconds/minutes/days/weeks/months\", got: {span:?}" + ) + }) +} + +/// Parses a file size using metric or IEC units. +pub fn parse_human_size(input: &str) -> CargoResult { + let re = regex::Regex::new(r"(?i)^([0-9]+(\.[0-9])?) ?(b|kb|mb|gb|kib|mib|gib)?$").unwrap(); + let cap = re.captures(input).ok_or_else(|| { + format_err!( + "invalid size `{input}`, \ + expected a number with an optional B, kB, MB, GB, kiB, MiB, or GiB suffix" + ) + })?; + let factor = match cap.get(3) { + Some(suffix) => match suffix.as_str().to_lowercase().as_str() { + "b" => 1.0, + "kb" => 1_000.0, + "mb" => 1_000_000.0, + "gb" => 1_000_000_000.0, + "kib" => 1024.0, + "mib" => 1024.0 * 1024.0, + "gib" => 1024.0 * 1024.0 * 1024.0, + s => unreachable!("suffix `{s}` out of sync with regex"), + }, + None => { + return cap[1] + .parse() + .with_context(|| format!("expected an integer size, got `{}`", &cap[1])) + } + }; + let num = cap[1] + .parse::() + .with_context(|| format!("expected an integer or float, found `{}`", &cap[1]))?; + Ok((num * factor) as u64) +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn time_spans() { + let d = |x| Some(Duration::from_secs(x)); + assert_eq!(maybe_parse_time_span("0 seconds"), d(0)); + assert_eq!(maybe_parse_time_span("1second"), d(1)); + assert_eq!(maybe_parse_time_span("23 seconds"), d(23)); + assert_eq!(maybe_parse_time_span("5 minutes"), d(60 * 5)); + assert_eq!(maybe_parse_time_span("2 hours"), d(60 * 60 * 2)); + assert_eq!(maybe_parse_time_span("1 day"), d(60 * 60 * 24)); + assert_eq!(maybe_parse_time_span("2 weeks"), d(60 * 60 * 24 * 14)); + assert_eq!(maybe_parse_time_span("6 months"), d(2_629_746 * 6)); + + assert_eq!(parse_frequency("5 seconds").unwrap(), d(5)); + assert_eq!(parse_frequency("always").unwrap(), d(0)); + assert_eq!(parse_frequency("never").unwrap(), None); + } + + #[test] + fn time_span_errors() { + assert_eq!(maybe_parse_time_span(""), None); + assert_eq!(maybe_parse_time_span("1"), None); + assert_eq!(maybe_parse_time_span("second"), None); + assert_eq!(maybe_parse_time_span("+2 seconds"), None); + assert_eq!(maybe_parse_time_span("day"), None); + assert_eq!(maybe_parse_time_span("-1 days"), None); + assert_eq!(maybe_parse_time_span("1.5 days"), None); + assert_eq!(maybe_parse_time_span("1 dayz"), None); + assert_eq!(maybe_parse_time_span("always"), None); + assert_eq!(maybe_parse_time_span("never"), None); + assert_eq!(maybe_parse_time_span("1 day "), None); + assert_eq!(maybe_parse_time_span(" 1 day"), None); + assert_eq!(maybe_parse_time_span("1 second"), None); + + let e = parse_time_span_for_config("gc.auto.max-src-age", "-1 days").unwrap_err(); + assert_eq!( + e.to_string(), + "config option `gc.auto.max-src-age` \ + expected a value of the form \"N seconds/minutes/days/weeks/months\", \ + got: \"-1 days\"" + ); + let e = parse_frequency("abc").unwrap_err(); + assert_eq!( + e.to_string(), + "config option `gc.auto.frequency` \ + expected a value of \"always\", \"never\", or \"N seconds/minutes/days/weeks/months\", \ + got: \"abc\"" + ); + } + + #[test] + fn human_sizes() { + assert_eq!(parse_human_size("0").unwrap(), 0); + assert_eq!(parse_human_size("123").unwrap(), 123); + assert_eq!(parse_human_size("123b").unwrap(), 123); + assert_eq!(parse_human_size("123B").unwrap(), 123); + assert_eq!(parse_human_size("123 b").unwrap(), 123); + assert_eq!(parse_human_size("123 B").unwrap(), 123); + assert_eq!(parse_human_size("1kb").unwrap(), 1_000); + assert_eq!(parse_human_size("5kb").unwrap(), 5_000); + assert_eq!(parse_human_size("1mb").unwrap(), 1_000_000); + assert_eq!(parse_human_size("1gb").unwrap(), 1_000_000_000); + assert_eq!(parse_human_size("1kib").unwrap(), 1_024); + assert_eq!(parse_human_size("1mib").unwrap(), 1_048_576); + assert_eq!(parse_human_size("1gib").unwrap(), 1_073_741_824); + assert_eq!(parse_human_size("1.5kb").unwrap(), 1_500); + assert_eq!(parse_human_size("1.7b").unwrap(), 1); + + assert!(parse_human_size("").is_err()); + assert!(parse_human_size("x").is_err()); + assert!(parse_human_size("1x").is_err()); + assert!(parse_human_size("1 2").is_err()); + assert!(parse_human_size("1.5").is_err()); + assert!(parse_human_size("+1").is_err()); + assert!(parse_human_size("123 b").is_err()); + } +} diff --git a/src/cargo/core/global_cache_tracker.rs b/src/cargo/core/global_cache_tracker.rs new file mode 100644 index 00000000000..5d5dd993de0 --- /dev/null +++ b/src/cargo/core/global_cache_tracker.rs @@ -0,0 +1,1810 @@ +//! Support for tracking the last time files were used to assist with cleaning +//! up those files if they haven't been used in a while. +//! +//! Tracking of cache files is stored in a sqlite database which contains a +//! timestamp of the last time the file was used, as well as the size of the +//! file. +//! +//! While cargo is running, when it detects a use of a cache file, it adds a +//! timestamp to [`DeferredGlobalLastUse`]. This batches up a set of changes +//! that are then flushed to the database all at once (via +//! [`DeferredGlobalLastUse::save`]). Ideally saving would only be done once +//! for performance reasons, but that is not really possible due to the way +//! cargo works, since there are different ways cargo can be used (like `cargo +//! generate-lockfile`, `cargo fetch`, and `cargo build` are all very +//! different ways the code is used). +//! +//! All of the database interaction is done through the [`GlobalCacheTracker`] +//! type. +//! +//! There is a single global [`GlobalCacheTracker`] and +//! [`DeferredGlobalLastUse`] stored in [`Config`]. +//! +//! The high-level interface for performing garbage collection is defined in +//! the [`crate::core::gc`] module. The functions there are responsible for +//! interacting with the [`GlobalCacheTracker`] to handle cleaning of global +//! cache data. +//! +//! ## Automatic gc +//! +//! Some commands (primarily the build commands) will trigger an automatic +//! deletion of files that haven't been used in a while. The high-level +//! interface for this is the [`crate::core::gc::auto_gc`] function. +//! +//! The [`GlobalCacheTracker`] database tracks the last time an automatic gc +//! was performed so that it is only done once per day for performance +//! reasons. +//! +//! ## Manual gc +//! +//! The user can perform a manual garbage collection with the `cargo clean` +//! command. That command has a variety of options to specify what to delete. +//! Manual gc supports deleting based on age or size or both. From a +//! high-level, this is done by the [`crate::core::gc::Gc::gc`] method, which +//! calls into [`GlobalCacheTracker`] to handle all the cleaning. +//! +//! ## Locking +//! +//! Usage of the database requires that the package cache is locked to prevent +//! concurrent access. Although sqlite has built-in locking support, we want +//! to use cargo's locking so that the "Blocking" message gets displayed, and +//! so that locks can block indefinitely for long-running build commands. +//! [`rusqlite`] has a default timeout of 5 seconds, though that is +//! configurable. +//! +//! When garbage collection is being performed, the package cache lock must be +//! in [`CacheLockMode::MutateExclusive`] to ensure no other cargo process is +//! running. See [`crate::util::cache_lock`] for more detail on locking. +//! +//! When performing automatic gc, [`crate::core::gc::auto_gc`] will skip the +//! GC if the package cache lock is already held by anything else. Automatic +//! GC is intended to be opportunistic, and should impose as little disruption +//! to the user as possible. +//! +//! ## Compatibility +//! +//! The database must retain both forwards and backwards compatibility between +//! different versions of cargo. For the most part, this shouldn't be too +//! difficult to maintain. Generally sqlite doesn't change on-disk formats +//! between versions (the introduction of WAL is one of the few examples where +//! version 3 had a format change, but we wouldn't use it anyway since it has +//! shared-memory requirements cargo can't depend on due to things like +//! network mounts). +//! +//! Schema changes must be managed through [`migrations`] by adding new +//! entries that make a change to the database. Changes must not break older +//! versions of cargo. Generally, adding columns should be fine (either with a +//! default value, or NULL). Adding tables should also be fine. Just don't do +//! destructive things like removing a column, or changing the semantics of an +//! existing column. +//! +//! Since users may run older versions of cargo that do not do cache tracking, +//! the [`GlobalCacheTracker::sync_db_with_files`] method helps dealing with +//! keeping the database in sync in the presence of older versions of cargo +//! touching the cache directories. +//! +//! ## Performance +//! +//! A lot of focus on the design of this system is to minimize the performance +//! impact. Every build command needs to save updates which we try to avoid +//! having a noticeable impact on build times. Systems like Windows, +//! particularly with a magnetic hard disk, can experience a fairly large +//! impact of cargo's overhead. Cargo's benchsuite has some benchmarks to help +//! compare different environments, or changes to the code here. Please try to +//! keep performance in mind if making any major changes. +//! +//! Performance of `cargo clean` is not quite as important since it is not +//! expected to be run often. However, it is still courteous to the user to +//! try to not impact it too much. One part that has a performance concern is +//! that the clean command will synchronize the database with whatever is on +//! disk if needed (in case files were added by older versions of cargo that +//! don't do cache tracking, or if the user manually deleted some files). This +//! can potentially be very slow, especially if the two are very out of sync. +//! +//! ## Filesystems +//! +//! Everything here is sensitive to the kind of filesystem it is running on. +//! People tend to run cargo in all sorts of strange environments that have +//! limited capabilities, or on things like read-only mounts. The code here +//! needs to gracefully handle as many situations as possible. +//! +//! See also the information in the [Performance](#performance) and +//! [Locking](#locking) sections when considering different filesystems and +//! their impact on performance and locking. +//! +//! There are checks for read-only filesystems, which is generally ignored. + +use crate::core::gc::GcOpts; +use crate::core::Verbosity; +use crate::ops::CleanContext; +use crate::util::cache_lock::CacheLockMode; +use crate::util::interning::InternedString; +use crate::util::sqlite::{self, basic_migration, Migration}; +use crate::util::{Filesystem, Progress, ProgressStyle}; +use crate::{CargoResult, Config}; +use anyhow::{bail, Context}; +use cargo_util::paths; +use rusqlite::{params, Connection, ErrorCode}; +use std::collections::{hash_map, HashMap}; +use std::path::{Path, PathBuf}; +use std::time::{Duration, SystemTime}; +use tracing::{debug, trace}; + +/// The filename of the database. +const GLOBAL_CACHE_FILENAME: &str = ".global-cache"; + +const REGISTRY_INDEX_TABLE: &str = "registry_index"; +const REGISTRY_CRATE_TABLE: &str = "registry_crate"; +const REGISTRY_SRC_TABLE: &str = "registry_src"; +const GIT_DB_TABLE: &str = "git_db"; +const GIT_CO_TABLE: &str = "git_checkout"; + +/// How often timestamps will be updated. +/// +/// As an optimization timestamps are not updated unless they are older than +/// the given number of seconds. This helps reduce the amount of disk I/O when +/// running cargo multiple times within a short window. +const UPDATE_RESOLUTION: u64 = 60 * 5; + +/// Type for timestamps as stored in the database. +/// +/// These are seconds since the Unix epoch. +type Timestamp = u64; + +/// The key for a registry index entry stored in the database. +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub struct RegistryIndex { + pub encoded_registry_name: InternedString, +} + +/// The key for a registry `.crate` entry stored in the database. +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub struct RegistryCrate { + pub encoded_registry_name: InternedString, + pub crate_filename: InternedString, + pub size: u64, +} + +/// The key for a registry src directory entry stored in the database. +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub struct RegistrySrc { + pub encoded_registry_name: InternedString, + pub package_dir: InternedString, + // Total size of the src directory in bytes. + // + // This can be None when the size is unknown. For example, when the src + // directory already exists on disk, and we just want to update the + // last-use timestamp. We don't want to take the expense of computing disk + // usage unless necessary. `populate_untracked_src` will handle any actual + // NULL values in the database, which can happen when the src directory is + // created by an older version of cargo that did not track sizes. + pub size: Option, +} + +/// The key for a git db entry stored in the database. +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub struct GitDb { + pub encoded_git_name: InternedString, +} + +/// The key for a git checkout entry stored in the database. +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub struct GitCheckout { + pub encoded_git_name: InternedString, + pub short_name: InternedString, + /// Total size of the checkout directory. + /// + /// This can be None when the size is unknown. See [`RegistrySrc::size`] + /// for an explanation. + pub size: Option, +} + +/// Filesystem paths in the global cache. +/// +/// Accessing these assumes a lock has already been acquired. +struct BasePaths { + /// Root path to the index caches. + index: PathBuf, + /// Root path to the git DBs. + git_db: PathBuf, + /// Root path to the git checkouts. + git_co: PathBuf, + /// Root path to the `.crate` files. + crate_dir: PathBuf, + /// Root path to the `src` directories. + src: PathBuf, +} + +/// Migrations which initialize the database, and can be used to evolve it over time. +/// +/// See [`Migration`] for more detail. +/// +/// **Be sure to not change the order or entries here!** +fn migrations() -> Vec { + vec![ + // registry_index tracks the overall usage of an index cache, and tracks a + // numeric ID to refer to that index that is used in other tables. + basic_migration( + "CREATE TABLE registry_index ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE NOT NULL, + timestamp INTEGER NOT NULL + )", + ), + // .crate files + basic_migration( + "CREATE TABLE registry_crate ( + registry_id INTEGER NOT NULL, + name TEXT NOT NULL, + size INTEGER NOT NULL, + timestamp INTEGER NOT NULL, + PRIMARY KEY (registry_id, name), + FOREIGN KEY (registry_id) REFERENCES registry_index (id) ON DELETE CASCADE + )", + ), + // Extracted src directories + // + // Note that `size` can be NULL. This will happen when marking a src + // directory as used that was created by an older version of cargo + // that didn't do size tracking. + basic_migration( + "CREATE TABLE registry_src ( + registry_id INTEGER NOT NULL, + name TEXT NOT NULL, + size INTEGER, + timestamp INTEGER NOT NULL, + PRIMARY KEY (registry_id, name), + FOREIGN KEY (registry_id) REFERENCES registry_index (id) ON DELETE CASCADE + )", + ), + // Git db directories + basic_migration( + "CREATE TABLE git_db ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE NOT NULL, + timestamp INTEGER NOT NULL + )", + ), + // Git checkout directories + basic_migration( + "CREATE TABLE git_checkout ( + git_id INTEGER NOT NULL, + name TEXT UNIQUE NOT NULL, + size INTEGER, + timestamp INTEGER NOT NULL, + PRIMARY KEY (git_id, name), + FOREIGN KEY (git_id) REFERENCES git_db (id) ON DELETE CASCADE + )", + ), + // This is a general-purpose single-row table that can store arbitrary + // data. Feel free to add columns (with ALTER TABLE) if necessary. + basic_migration( + "CREATE TABLE global_data ( + last_auto_gc INTEGER NOT NULL + )", + ), + // last_auto_gc tracks the last time auto-gc was run (so that it only + // runs roughly once a day for performance reasons). Prime it with the + // current time to establish a baseline. + Box::new(|conn| { + conn.execute( + "INSERT INTO global_data (last_auto_gc) VALUES (?1)", + [now()], + )?; + Ok(()) + }), + ] +} + +/// Type for SQL columns that refer to the primary key of their parent table. +/// +/// For example, `registry_crate.registry_id` refers to its parent `registry_index.id`. +#[derive(Copy, Clone, Debug, PartialEq)] +struct ParentId(i64); + +impl rusqlite::types::FromSql for ParentId { + fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult { + let i = i64::column_result(value)?; + Ok(ParentId(i)) + } +} + +impl rusqlite::types::ToSql for ParentId { + fn to_sql(&self) -> rusqlite::Result> { + Ok(rusqlite::types::ToSqlOutput::from(self.0)) + } +} + +/// Tracking for the global shared cache (registry files, etc.). +/// +/// This is the interface to the global cache database, used for tracking and +/// cleaning. See the [`crate::core::global_cache_tracker`] module docs for +/// details. +#[derive(Debug)] +pub struct GlobalCacheTracker { + /// Connection to the SQLite database. + conn: Connection, + /// This is an optimization used to make sure cargo only checks if gc + /// needs to run once per session. This starts as `false`, and then the + /// first time it checks if automatic gc needs to run, it will be set to + /// `true`. + auto_gc_checked_this_session: bool, +} + +impl GlobalCacheTracker { + /// Creates a new [`GlobalCacheTracker`]. + /// + /// The caller is responsible for locking the package cache with + /// [`CacheLockMode::DownloadExclusive`] before calling this. + pub fn new(config: &Config) -> CargoResult { + let db_path = Self::db_path(config); + // A package cache lock is required to ensure only one cargo is + // accessing at the same time. If there is concurrent access, we + // want to rely on cargo's own "Blocking" system (which can + // provide user feedback) rather than blocking inside sqlite + // (which by default has a short timeout). + let db_path = + config.assert_package_cache_locked(CacheLockMode::DownloadExclusive, &db_path); + let mut conn = if config.cli_unstable().gc { + Connection::open(db_path)? + } else { + // To simplify things (so there aren't checks everywhere for being + // enabled), just process everything in memory. + Connection::open_in_memory()? + }; + conn.pragma_update(None, "foreign_keys", true)?; + sqlite::migrate(&mut conn, &migrations())?; + Ok(GlobalCacheTracker { + conn, + auto_gc_checked_this_session: false, + }) + } + + /// The path to the database. + pub fn db_path(config: &Config) -> Filesystem { + config.home().join(GLOBAL_CACHE_FILENAME) + } + + /// Given an encoded registry name, returns its ID. + /// + /// Returns None if the given name isn't in the database. + fn id_from_name( + conn: &Connection, + table_name: &str, + encoded_name: &str, + ) -> CargoResult> { + let mut stmt = + conn.prepare_cached(&format!("SELECT id FROM {table_name} WHERE name = ?"))?; + match stmt.query_row([encoded_name], |row| row.get(0)) { + Ok(id) => Ok(Some(id)), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(e.into()), + } + } + + /// Returns a map of ID to path for the given ids in the given table. + /// + /// For example, given `registry_index` IDs, it returns filenames of the + /// form "index.crates.io-6f17d22bba15001f". + fn get_id_map( + conn: &Connection, + table_name: &str, + ids: &[i64], + ) -> CargoResult> { + let mut stmt = + conn.prepare_cached(&format!("SELECT name FROM {table_name} WHERE id = ?1"))?; + ids.iter() + .map(|id| { + let name = stmt.query_row(params![id], |row| { + Ok(PathBuf::from(row.get::<_, String>(0)?)) + })?; + Ok((*id, name)) + }) + .collect() + } + + /// Returns all index cache timestamps. + pub fn registry_index_all(&self) -> CargoResult> { + let mut stmt = self + .conn + .prepare_cached("SELECT name, timestamp FROM registry_index")?; + let rows = stmt + .query_map([], |row| { + let encoded_registry_name = row.get_unwrap(0); + let timestamp = row.get_unwrap(1); + let kind = RegistryIndex { + encoded_registry_name, + }; + Ok((kind, timestamp)) + })? + .collect::, _>>()?; + Ok(rows) + } + + /// Returns all registry crate cache timestamps. + pub fn registry_crate_all(&self) -> CargoResult> { + let mut stmt = self.conn.prepare_cached( + "SELECT registry_index.name, registry_crate.name, registry_crate.size, registry_crate.timestamp + FROM registry_index, registry_crate + WHERE registry_crate.registry_id = registry_index.id", + )?; + let rows = stmt + .query_map([], |row| { + let encoded_registry_name = row.get_unwrap(0); + let crate_filename = row.get_unwrap(1); + let size = row.get_unwrap(2); + let timestamp = row.get_unwrap(3); + let kind = RegistryCrate { + encoded_registry_name, + crate_filename, + size, + }; + Ok((kind, timestamp)) + })? + .collect::, _>>()?; + Ok(rows) + } + + /// Returns all registry source cache timestamps. + pub fn registry_src_all(&self) -> CargoResult> { + let mut stmt = self.conn.prepare_cached( + "SELECT registry_index.name, registry_src.name, registry_src.size, registry_src.timestamp + FROM registry_index, registry_src + WHERE registry_src.registry_id = registry_index.id", + )?; + let rows = stmt + .query_map([], |row| { + let encoded_registry_name = row.get_unwrap(0); + let package_dir = row.get_unwrap(1); + let size = row.get_unwrap(2); + let timestamp = row.get_unwrap(3); + let kind = RegistrySrc { + encoded_registry_name, + package_dir, + size, + }; + Ok((kind, timestamp)) + })? + .collect::, _>>()?; + Ok(rows) + } + + /// Returns all git db timestamps. + pub fn git_db_all(&self) -> CargoResult> { + let mut stmt = self + .conn + .prepare_cached("SELECT name, timestamp FROM git_db")?; + let rows = stmt + .query_map([], |row| { + let encoded_git_name = row.get_unwrap(0); + let timestamp = row.get_unwrap(1); + let kind = GitDb { encoded_git_name }; + Ok((kind, timestamp)) + })? + .collect::, _>>()?; + Ok(rows) + } + + /// Returns all git checkout timestamps. + pub fn git_checkout_all(&self) -> CargoResult> { + let mut stmt = self.conn.prepare_cached( + "SELECT git_db.name, git_checkout.name, git_checkout.size, git_checkout.timestamp + FROM git_db, git_checkout + WHERE git_checkout.registry_id = git_db.id", + )?; + let rows = stmt + .query_map([], |row| { + let encoded_git_name = row.get_unwrap(0); + let short_name = row.get_unwrap(1); + let size = row.get_unwrap(2); + let timestamp = row.get_unwrap(3); + let kind = GitCheckout { + encoded_git_name, + short_name, + size, + }; + Ok((kind, timestamp)) + })? + .collect::, _>>()?; + Ok(rows) + } + + /// Returns whether or not an auto GC should be performed, compared to the + /// last time it was recorded in the database. + pub fn should_run_auto_gc(&mut self, frequency: Duration) -> CargoResult { + trace!(target: "gc", "should_run_auto_gc"); + if self.auto_gc_checked_this_session { + return Ok(false); + } + let last_auto_gc: Timestamp = + self.conn + .query_row("SELECT last_auto_gc FROM global_data", [], |row| row.get(0))?; + let should_run = last_auto_gc + frequency.as_secs() < now(); + trace!(target: "gc", + "last auto gc was {}, {}", + last_auto_gc, + if should_run { "running" } else { "skipping" } + ); + self.auto_gc_checked_this_session = true; + Ok(should_run) + } + + /// Writes to the database to indicate that an automatic GC has just been + /// completed. + pub fn set_last_auto_gc(&self) -> CargoResult<()> { + self.conn + .execute("UPDATE global_data SET last_auto_gc = ?1", [now()])?; + Ok(()) + } + + /// Deletes files from the global cache based on the given options. + pub fn clean(&mut self, clean_ctx: &mut CleanContext<'_>, gc_opts: &GcOpts) -> CargoResult<()> { + self.clean_inner(clean_ctx, gc_opts) + .with_context(|| "failed to clean entries from the global cache") + } + + fn clean_inner( + &mut self, + clean_ctx: &mut CleanContext<'_>, + gc_opts: &GcOpts, + ) -> CargoResult<()> { + let _p = crate::util::profile::start("cleaning global cache files"); + let config = clean_ctx.config; + let base_git_path = config.git_path().into_path_unlocked(); + let base = BasePaths { + index: config.registry_index_path().into_path_unlocked(), + git_db: base_git_path.join("db"), + git_co: base_git_path.join("checkouts"), + crate_dir: config.registry_cache_path().into_path_unlocked(), + src: config.registry_source_path().into_path_unlocked(), + }; + let now = now(); + trace!(target: "gc", "cleaning {gc_opts:?}"); + let tx = self.conn.transaction()?; + let mut delete_paths = Vec::new(); + // This can be an expensive operation, so only perform it if necessary. + if gc_opts.is_download_cache_opt_set() { + // TODO: Investigate how slow this might be. + Self::sync_db_with_files( + &tx, + config, + &base, + gc_opts.is_download_cache_size_set(), + &mut delete_paths, + ) + .with_context(|| "failed to sync tracking database")? + } + if let Some(max_age) = gc_opts.max_index_age { + let max_age = now - max_age.as_secs(); + Self::get_registry_index_to_clean(&tx, max_age, &base, &mut delete_paths)?; + } + if let Some(max_age) = gc_opts.max_src_age { + let max_age = now - max_age.as_secs(); + Self::get_registry_items_to_clean_age( + &tx, + max_age, + REGISTRY_SRC_TABLE, + &base.src, + &mut delete_paths, + )?; + } + if let Some(max_age) = gc_opts.max_crate_age { + let max_age = now - max_age.as_secs(); + Self::get_registry_items_to_clean_age( + &tx, + max_age, + REGISTRY_CRATE_TABLE, + &base.crate_dir, + &mut delete_paths, + )?; + } + if let Some(max_age) = gc_opts.max_git_db_age { + let max_age = now - max_age.as_secs(); + Self::get_git_db_items_to_clean(&tx, max_age, &base, &mut delete_paths)?; + } + if let Some(max_age) = gc_opts.max_git_co_age { + let max_age = now - max_age.as_secs(); + Self::get_git_co_items_to_clean(&tx, max_age, &base.git_co, &mut delete_paths)?; + } + // Size collection must happen after date collection so that dates + // have precedence, since size constraints are a more blunt + // instrument. + // + // These are also complicated by the `--max-download-size` option + // overlapping with `--max-crate-size` and `--max-src-size`, which + // requires some coordination between those options which isn't + // necessary with the age-based options. An item's age is either older + // or it isn't, but contrast that with size which is based on the sum + // of all tracked items. Also, `--max-download-size` is summed against + // both the crate and src tracking, which requires combining them to + // compute the size, and then separating them to calculate the correct + // paths. + if let Some(max_size) = gc_opts.max_crate_size { + Self::get_registry_items_to_clean_size( + &tx, + max_size, + REGISTRY_CRATE_TABLE, + &base.crate_dir, + &mut delete_paths, + )?; + } + if let Some(max_size) = gc_opts.max_src_size { + Self::get_registry_items_to_clean_size( + &tx, + max_size, + REGISTRY_SRC_TABLE, + &base.src, + &mut delete_paths, + )?; + } + if let Some(max_size) = gc_opts.max_git_size { + Self::get_git_items_to_clean_size(&tx, max_size, &base, &mut delete_paths)?; + } + if let Some(max_size) = gc_opts.max_download_size { + Self::get_registry_items_to_clean_size_both(&tx, max_size, &base, &mut delete_paths)?; + } + + clean_ctx.remove_paths(&delete_paths)?; + + if clean_ctx.dry_run { + tx.rollback()?; + } else { + tx.commit()?; + } + Ok(()) + } + + /// Returns a list of directory entries in the given path. + fn names_from(path: &Path) -> CargoResult> { + let entries = match path.read_dir() { + Ok(e) => e, + Err(e) => { + if e.kind() == std::io::ErrorKind::NotFound { + return Ok(Vec::new()); + } else { + return Err( + anyhow::Error::new(e).context(format!("failed to read path `{path:?}`")) + ); + } + } + }; + let names = entries + .filter_map(|entry| entry.ok()?.file_name().into_string().ok()) + .collect(); + Ok(names) + } + + /// Synchronizes the database to match the files on disk. + /// + /// This performs the following cleanups: + /// + /// 1. Remove entries from the database that are missing on disk. + /// 2. Adds missing entries to the database that are on disk (such as when + /// files are added by older versions of cargo). + /// 3. Fills in the `size` column where it is NULL (such as when something + /// is added to disk by an older version of cargo, and one of the mark + /// functions marked it without knowing the size). + /// + /// Size computations are only done if `sync_size` is set since it can + /// be a very expensive operation. This should only be set if the user + /// requested to clean based on the cache size. + /// 4. Checks for orphaned files. For example, if there are `.crate` files + /// associated with an index that does not exist. + /// + /// These orphaned files will be added to `delete_paths` so that the + /// caller can delete them. + fn sync_db_with_files( + conn: &Connection, + config: &Config, + base: &BasePaths, + sync_size: bool, + delete_paths: &mut Vec, + ) -> CargoResult<()> { + let _p = crate::util::profile::start("global cache db sync"); + debug!(target: "gc", "starting db sync"); + // For registry_index and git_db, add anything that is missing in the db. + Self::update_parent_for_missing_from_db(conn, REGISTRY_INDEX_TABLE, &base.index)?; + Self::update_parent_for_missing_from_db(conn, GIT_DB_TABLE, &base.git_db)?; + + // For registry_crate, registry_src, and git_checkout, remove anything + // from the db that isn't on disk. + Self::update_db_for_removed( + conn, + REGISTRY_INDEX_TABLE, + "registry_id", + REGISTRY_CRATE_TABLE, + &base.crate_dir, + )?; + Self::update_db_for_removed( + conn, + REGISTRY_INDEX_TABLE, + "registry_id", + REGISTRY_SRC_TABLE, + &base.src, + )?; + Self::update_db_for_removed(conn, GIT_DB_TABLE, "git_id", GIT_CO_TABLE, &base.git_co)?; + + // For registry_index and git_db, remove anything from the db that + // isn't on disk. + // + // This also collects paths for any child files that don't have their + // respective parent on disk. + Self::update_db_parent_for_removed_from_disk( + conn, + REGISTRY_INDEX_TABLE, + &base.index, + &[&base.crate_dir, &base.src], + delete_paths, + )?; + Self::update_db_parent_for_removed_from_disk( + conn, + GIT_DB_TABLE, + &base.git_db, + &[&base.git_co], + delete_paths, + )?; + + // For registry_crate, registry_src, and git_checkout, add anything + // that is missing in the db. + Self::populate_untracked_crate(conn, &base.crate_dir)?; + Self::populate_untracked( + conn, + config, + REGISTRY_INDEX_TABLE, + "registry_id", + REGISTRY_SRC_TABLE, + &base.src, + sync_size, + )?; + Self::populate_untracked( + conn, + config, + GIT_DB_TABLE, + "git_id", + GIT_CO_TABLE, + &base.git_co, + sync_size, + )?; + + // Update any NULL sizes if needed. + if sync_size { + Self::update_null_sizes( + conn, + config, + REGISTRY_INDEX_TABLE, + "registry_id", + REGISTRY_SRC_TABLE, + &base.src, + )?; + Self::update_null_sizes( + conn, + config, + GIT_DB_TABLE, + "git_id", + GIT_CO_TABLE, + &base.git_co, + )?; + } + Ok(()) + } + + /// For parent tables, add any entries that are on disk but aren't tracked in the db. + fn update_parent_for_missing_from_db( + conn: &Connection, + parent_table_name: &str, + base_path: &Path, + ) -> CargoResult<()> { + let _p = crate::util::profile::start(format!( + "update parent db for missing from db {parent_table_name}" + )); + trace!(target: "gc", "checking for untracked parent to add to {parent_table_name}"); + let names = Self::names_from(base_path)?; + + let mut stmt = conn.prepare_cached(&format!( + "INSERT INTO {parent_table_name} (name, timestamp) + VALUES (?1, ?2) + ON CONFLICT DO NOTHING", + ))?; + let now = now(); + for name in names { + stmt.execute(params![name, now])?; + } + Ok(()) + } + + /// Removes database entries for any files that are not on disk for the child tables. + /// + /// This could happen for example if the user manually deleted the file or + /// any such scenario where the filesystem and db are out of sync. + fn update_db_for_removed( + conn: &Connection, + parent_table_name: &str, + id_column_name: &str, + table_name: &str, + base_path: &Path, + ) -> CargoResult<()> { + let _p = crate::util::profile::start(format!("update db for removed {table_name}")); + trace!(target: "gc", "checking for db entries to remove from {table_name}"); + let mut select_stmt = conn.prepare_cached(&format!( + "SELECT {table_name}.rowid, {parent_table_name}.name, {table_name}.name + FROM {parent_table_name}, {table_name} + WHERE {table_name}.{id_column_name} = {parent_table_name}.id", + ))?; + let mut delete_stmt = + conn.prepare_cached(&format!("DELETE FROM {table_name} WHERE rowid = ?1"))?; + let mut rows = select_stmt.query([])?; + while let Some(row) = rows.next()? { + let rowid: i64 = row.get_unwrap(0); + let id_name: String = row.get_unwrap(1); + let name: String = row.get_unwrap(2); + if !base_path.join(id_name).join(name).exists() { + delete_stmt.execute([rowid])?; + } + } + Ok(()) + } + + /// Removes database entries for any files that are not on disk for the parent tables. + fn update_db_parent_for_removed_from_disk( + conn: &Connection, + parent_table_name: &str, + base_path: &Path, + child_base_paths: &[&Path], + delete_paths: &mut Vec, + ) -> CargoResult<()> { + let _p = crate::util::profile::start(format!( + "update db parent for removed from disk {parent_table_name}" + )); + trace!(target: "gc", "checking for db entries to remove from {parent_table_name}"); + let mut select_stmt = + conn.prepare_cached(&format!("SELECT rowid, name FROM {parent_table_name}"))?; + let mut delete_stmt = + conn.prepare_cached(&format!("DELETE FROM {parent_table_name} WHERE rowid = ?1"))?; + let mut rows = select_stmt.query([])?; + while let Some(row) = rows.next()? { + let rowid: i64 = row.get_unwrap(0); + let id_name: String = row.get_unwrap(1); + if !base_path.join(&id_name).exists() { + delete_stmt.execute([rowid])?; + // Make sure any child data is also cleaned up. + for child_base in child_base_paths { + let child_path = child_base.join(&id_name); + if child_path.exists() { + debug!(target: "gc", "removing orphaned path {child_path:?}"); + delete_paths.push(child_path); + } + } + } + } + Ok(()) + } + + /// Updates the database to add any `.crate` files that are currently + /// not tracked (such as when they are downloaded by an older version of + /// cargo). + fn populate_untracked_crate(conn: &Connection, base_path: &Path) -> CargoResult<()> { + let _p = crate::util::profile::start("populate untracked crate"); + trace!(target: "gc", "populating untracked crate files"); + let mut insert_stmt = conn.prepare_cached( + "INSERT INTO registry_crate (registry_id, name, size, timestamp) + VALUES (?1, ?2, ?3, ?4) + ON CONFLICT DO NOTHING", + )?; + let now = now(); + let index_names = Self::names_from(&base_path)?; + for index_name in index_names { + let Some(id) = Self::id_from_name(conn, REGISTRY_INDEX_TABLE, &index_name)? else { + // The id is missing from the database. This should be resolved + // via update_db_parent_for_removed_from_disk. + continue; + }; + let index_path = base_path.join(index_name); + for crate_name in Self::names_from(&index_path)? { + if crate_name.ends_with(".crate") { + // Missing files should have already been taken care of by + // update_db_for_removed. + let size = paths::metadata(index_path.join(&crate_name))?.len(); + insert_stmt.execute(params![id, crate_name, size, now])?; + } + } + } + Ok(()) + } + + /// Updates the database to add any files that are currently not tracked + /// (such as when they are downloaded by an older version of cargo). + fn populate_untracked( + conn: &Connection, + config: &Config, + id_table_name: &str, + id_column_name: &str, + table_name: &str, + base_path: &Path, + populate_size: bool, + ) -> CargoResult<()> { + let _p = crate::util::profile::start(format!("populate untracked {table_name}")); + trace!(target: "gc", "populating untracked files for {table_name}"); + // Gather names (and make sure they are in the database). + let id_names = Self::names_from(&base_path)?; + + // This SELECT is used to determine if the directory is already + // tracked. We don't want to do the expensive size computation unless + // necessary. + let mut select_stmt = conn.prepare_cached(&format!( + "SELECT 1 FROM {table_name} + WHERE {id_column_name} = ?1 AND name = ?2", + ))?; + let mut insert_stmt = conn.prepare_cached(&format!( + "INSERT INTO {table_name} ({id_column_name}, name, size, timestamp) + VALUES (?1, ?2, ?3, ?4) + ON CONFLICT DO NOTHING", + ))?; + let mut progress = Progress::with_style("Scanning", ProgressStyle::Ratio, config); + let now = now(); + // Compute the size of any directory not in the database. + for id_name in id_names { + let Some(id) = Self::id_from_name(conn, id_table_name, &id_name)? else { + // The id is missing from the database. This should be resolved + // via update_db_parent_for_removed_from_disk. + continue; + }; + let index_path = base_path.join(id_name); + let names = Self::names_from(&index_path)?; + let max = names.len(); + for (i, name) in names.iter().enumerate() { + if select_stmt.exists(params![id, name])? { + continue; + } + let dir_path = index_path.join(name); + if !dir_path.is_dir() { + continue; + } + progress.tick(i, max, "")?; + let size = if populate_size { + Some(du(&dir_path, table_name)?) + } else { + None + }; + insert_stmt.execute(params![id, name, size, now])?; + } + } + Ok(()) + } + + /// Fills in the `size` column where it is NULL. + /// + /// This can happen when something is added to disk by an older version of + /// cargo, and one of the mark functions marked it without knowing the + /// size. + /// + /// `update_db_for_removed` should be called before this is called. + fn update_null_sizes( + conn: &Connection, + config: &Config, + parent_table_name: &str, + id_column_name: &str, + table_name: &str, + base_path: &Path, + ) -> CargoResult<()> { + let _p = crate::util::profile::start(format!("update NULL sizes {table_name}")); + trace!(target: "gc", "updating NULL size information in {table_name}"); + let mut null_stmt = conn.prepare_cached(&format!( + "SELECT {table_name}.rowid, {table_name}.name, {parent_table_name}.name + FROM {table_name}, {parent_table_name} + WHERE {table_name}.size IS NULL AND {table_name}.{id_column_name} = {parent_table_name}.id", + ))?; + let mut update_stmt = conn.prepare_cached(&format!( + "UPDATE {table_name} SET size = ?1 WHERE rowid = ?2" + ))?; + let mut progress = Progress::with_style("Scanning", ProgressStyle::Ratio, config); + let rows: Vec<_> = null_stmt + .query_map([], |row| { + Ok((row.get_unwrap(0), row.get_unwrap(1), row.get_unwrap(2))) + })? + .collect(); + let max = rows.len(); + for (i, row) in rows.into_iter().enumerate() { + let (rowid, name, id_name): (i64, String, String) = row?; + let path = base_path.join(id_name).join(name); + progress.tick(i, max, "")?; + // Missing files should have already been taken care of by + // update_db_for_removed. + let size = du(&path, table_name)?; + update_stmt.execute(params![size, rowid])?; + } + Ok(()) + } + + /// Adds paths to delete from either registry_crate or registry_src whose + /// last use is older than the given timestamp. + fn get_registry_items_to_clean_age( + conn: &Connection, + max_age: Timestamp, + table_name: &str, + base_path: &Path, + delete_paths: &mut Vec, + ) -> CargoResult<()> { + debug!(target: "gc", "cleaning {table_name} since {max_age:?}"); + let mut stmt = conn.prepare_cached(&format!( + "DELETE FROM {table_name} WHERE timestamp < ?1 + RETURNING registry_id, name" + ))?; + let rows = stmt + .query_map(params![max_age], |row| { + let registry_id = row.get_unwrap(0); + let name: String = row.get_unwrap(1); + Ok((registry_id, name)) + })? + .collect::, _>>()?; + let ids: Vec<_> = rows.iter().map(|r| r.0).collect(); + let id_map = Self::get_id_map(conn, REGISTRY_INDEX_TABLE, &ids)?; + for (id, name) in rows { + let encoded_registry_name = &id_map[&id]; + delete_paths.push(base_path.join(encoded_registry_name).join(name)); + } + Ok(()) + } + + /// Adds paths to delete from either `registry_crate` or `registry_src` in + /// order to keep the total size under the given max size. + fn get_registry_items_to_clean_size( + conn: &Connection, + max_size: u64, + table_name: &str, + base_path: &Path, + delete_paths: &mut Vec, + ) -> CargoResult<()> { + debug!(target: "gc", "cleaning {table_name} till under {max_size:?}"); + let total_size: u64 = conn.query_row( + &format!("SELECT coalesce(SUM(size), 0) FROM {table_name}"), + [], + |row| row.get(0), + )?; + if total_size <= max_size { + return Ok(()); + } + // This SQL statement selects all of the rows ordered by timestamp, + // and then uses a window function to keep a running total of the + // size. It selects all rows until the running total exceeds the + // threshold of the total number of bytes that we want to delete. + // + // The window function essentially computes an aggregate over all + // previous rows as it goes along. As long as the running size is + // below the total amount that we need to delete, it keeps picking + // more rows. + // + // The ORDER BY includes `name` mainly for test purposes so that + // entries with the same timestamp have deterministic behavior. + // + // The coalesce helps convert NULL to 0. + let mut stmt = conn.prepare(&format!( + "DELETE FROM {table_name} WHERE rowid IN \ + (SELECT x.rowid FROM \ + (SELECT rowid, size, SUM(size) OVER \ + (ORDER BY timestamp, name ROWS UNBOUNDED PRECEDING) AS running_amount \ + FROM {table_name}) x \ + WHERE coalesce(x.running_amount, 0) - x.size < ?1) \ + RETURNING registry_id, name;" + ))?; + let rows = stmt + .query_map(params![total_size - max_size], |row| { + let id = row.get_unwrap(0); + let name: String = row.get_unwrap(1); + Ok((id, name)) + })? + .collect::, _>>()?; + // Convert registry_id to the encoded registry name, and join those. + let ids: Vec<_> = rows.iter().map(|r| r.0).collect(); + let id_map = Self::get_id_map(conn, REGISTRY_INDEX_TABLE, &ids)?; + for (id, name) in rows { + let encoded_name = &id_map[&id]; + delete_paths.push(base_path.join(encoded_name).join(name)); + } + Ok(()) + } + + /// Adds paths to delete from both `registry_crate` and `registry_src` in + /// order to keep the total size under the given max size. + fn get_registry_items_to_clean_size_both( + conn: &Connection, + max_size: u64, + base: &BasePaths, + delete_paths: &mut Vec, + ) -> CargoResult<()> { + debug!(target: "gc", "cleaning download till under {max_size:?}"); + + // This SQL statement selects from both registry_src and + // registry_crate so that sorting of timestamps incorporates both of + // them at the same time. It uses a const value of 1 or 2 as the first + // column so that the code below can determine which table the value + // came from. + let mut stmt = conn.prepare_cached( + "SELECT 1, registry_src.rowid, registry_src.name AS name, registry_index.name, + registry_src.size, registry_src.timestamp AS timestamp + FROM registry_src, registry_index + WHERE registry_src.registry_id = registry_index.id AND registry_src.size NOT NULL + + UNION + + SELECT 2, registry_crate.rowid, registry_crate.name AS name, registry_index.name, + registry_crate.size, registry_crate.timestamp AS timestamp + FROM registry_crate, registry_index + WHERE registry_crate.registry_id = registry_index.id + + ORDER BY timestamp, name", + )?; + let mut delete_src_stmt = + conn.prepare_cached("DELETE FROM registry_src WHERE rowid = ?1")?; + let mut delete_crate_stmt = + conn.prepare_cached("DELETE FROM registry_crate WHERE rowid = ?1")?; + let rows = stmt + .query_map([], |row| { + Ok(( + row.get_unwrap(0), + row.get_unwrap(1), + row.get_unwrap(2), + row.get_unwrap(3), + row.get_unwrap(4), + )) + })? + .collect::, _>>()?; + let mut total_size: u64 = rows.iter().map(|r| r.4).sum(); + debug!(target: "gc", "total download cache size appears to be {total_size}"); + for (table, rowid, name, index_name, size) in rows { + if total_size <= max_size { + break; + } + if table == 1 { + delete_paths.push(base.src.join(index_name).join(name)); + delete_src_stmt.execute([rowid])?; + } else { + delete_paths.push(base.crate_dir.join(index_name).join(name)); + delete_crate_stmt.execute([rowid])?; + } + // TODO: If delete crate, ensure src is also deleted. + total_size -= size; + } + Ok(()) + } + + /// Adds paths to delete from the git cache, keeping the total size under + /// the give value. + /// + /// Paths are relative to the `git` directory in the cache directory. + fn get_git_items_to_clean_size( + conn: &Connection, + max_size: u64, + base: &BasePaths, + delete_paths: &mut Vec, + ) -> CargoResult<()> { + debug!(target: "gc", "cleaning git till under {max_size:?}"); + + // Collect all the sizes from git_db and git_checkouts, and then sort them by timestamp. + let mut stmt = conn.prepare_cached("SELECT rowid, name, timestamp FROM git_db")?; + let mut git_info = stmt + .query_map([], |row| { + let rowid: i64 = row.get_unwrap(0); + let name: String = row.get_unwrap(1); + let timestamp: Timestamp = row.get_unwrap(2); + // Size is added below so that the error doesn't need to be + // converted to a rusqlite error. + Ok((timestamp, rowid, None, name, 0)) + })? + .collect::, _>>()?; + for info in &mut git_info { + let size = cargo_util::du(&base.git_db.join(&info.3), &[])?; + info.4 = size; + } + + let mut stmt = conn.prepare_cached( + "SELECT git_checkout.rowid, git_db.name, git_checkout.name, + git_checkout.size, git_checkout.timestamp + FROM git_checkout, git_db + WHERE git_checkout.git_id = git_db.id AND git_checkout.size NOT NULL", + )?; + let git_co_rows = stmt + .query_map([], |row| { + let rowid = row.get_unwrap(0); + let db_name: String = row.get_unwrap(1); + let name = row.get_unwrap(2); + let size = row.get_unwrap(3); + let timestamp = row.get_unwrap(4); + Ok((timestamp, rowid, Some(db_name), name, size)) + })? + .collect::, _>>()?; + git_info.extend(git_co_rows); + + // Sort by timestamp, and name. The name is included mostly for test + // purposes so that entries with the same timestamp have deterministic + // behavior. + git_info.sort_by(|a, b| (b.0, &b.3).cmp(&(a.0, &a.3))); + + // Collect paths to delete. + let mut delete_db_stmt = conn.prepare_cached("DELETE FROM git_db WHERE rowid = ?1")?; + let mut delete_co_stmt = + conn.prepare_cached("DELETE FROM git_checkout WHERE rowid = ?1")?; + let mut total_size: u64 = git_info.iter().map(|r| r.4).sum(); + debug!(target: "gc", "total git cache size appears to be {total_size}"); + while let Some((_timestamp, rowid, db_name, name, size)) = git_info.pop() { + if total_size <= max_size { + break; + } + if let Some(db_name) = db_name { + delete_paths.push(base.git_co.join(db_name).join(name)); + delete_co_stmt.execute([rowid])?; + total_size -= size; + } else { + total_size -= size; + delete_paths.push(base.git_db.join(&name)); + delete_db_stmt.execute([rowid])?; + // If the db is deleted, then all the checkouts must be deleted. + let mut i = 0; + while i < git_info.len() { + if git_info[i].2.as_deref() == Some(name.as_ref()) { + let (_, rowid, db_name, name, size) = git_info.remove(i); + delete_paths.push(base.git_co.join(db_name.unwrap()).join(name)); + delete_co_stmt.execute([rowid])?; + total_size -= size; + } else { + i += 1; + } + } + } + } + Ok(()) + } + + /// Adds paths to delete from `registry_index` whose last use is older + /// than the given timestamp. + fn get_registry_index_to_clean( + conn: &Connection, + max_age: Timestamp, + base: &BasePaths, + delete_paths: &mut Vec, + ) -> CargoResult<()> { + debug!(target: "gc", "cleaning index since {max_age:?}"); + let mut stmt = conn.prepare_cached( + "DELETE FROM registry_index WHERE timestamp < ?1 + RETURNING name", + )?; + let mut rows = stmt.query([max_age])?; + while let Some(row) = rows.next()? { + let name: String = row.get_unwrap(0); + delete_paths.push(base.index.join(&name)); + // Also delete .crate and src directories, since by definition + // they cannot be used without their index. + delete_paths.push(base.src.join(&name)); + delete_paths.push(base.crate_dir.join(&name)); + } + Ok(()) + } + + /// Adds paths to delete from `git_checkout` whose last use is + /// older than the given timestamp. + fn get_git_co_items_to_clean( + conn: &Connection, + max_age: Timestamp, + base_path: &Path, + delete_paths: &mut Vec, + ) -> CargoResult<()> { + debug!(target: "gc", "cleaning git co since {max_age:?}"); + let mut stmt = conn.prepare_cached( + "DELETE FROM git_checkout WHERE timestamp < ?1 + RETURNING git_id, name", + )?; + let rows = stmt + .query_map(params![max_age], |row| { + let git_id = row.get_unwrap(0); + let name: String = row.get_unwrap(1); + Ok((git_id, name)) + })? + .collect::, _>>()?; + let ids: Vec<_> = rows.iter().map(|r| r.0).collect(); + let id_map = Self::get_id_map(conn, GIT_DB_TABLE, &ids)?; + for (id, name) in rows { + let encoded_git_name = &id_map[&id]; + delete_paths.push(base_path.join(encoded_git_name).join(name)); + } + Ok(()) + } + + /// Adds paths to delete from `git_db` in order to keep the total size + /// under the given max size. + fn get_git_db_items_to_clean( + conn: &Connection, + max_age: Timestamp, + base: &BasePaths, + delete_paths: &mut Vec, + ) -> CargoResult<()> { + debug!(target: "gc", "cleaning git db since {max_age:?}"); + let mut stmt = conn.prepare_cached( + "DELETE FROM git_db WHERE timestamp < ?1 + RETURNING name", + )?; + let mut rows = stmt.query([max_age])?; + while let Some(row) = rows.next()? { + let name: String = row.get_unwrap(0); + delete_paths.push(base.git_db.join(&name)); + // Also delete checkout directories, since by definition they + // cannot be used without their db. + delete_paths.push(base.git_co.join(&name)); + } + Ok(()) + } +} + +/// Helper to generate the upsert for the parent tables. +/// +/// This handles checking if the row already exists, and only updates the +/// timestamp it if it hasn't been updated recently. This also handles keeping +/// a cached map of the `id` value. +/// +/// Unfortunately it is a bit tricky to share this code without a macro. +macro_rules! insert_or_update_parent { + ($self:expr, $conn:expr, $table_name:expr, $timestamps_field:ident, $keys_field:ident, $encoded_name:ident) => { + let mut select_stmt = $conn.prepare_cached(concat!( + "SELECT id, timestamp FROM ", + $table_name, + " WHERE name = ?1" + ))?; + let mut insert_stmt = $conn.prepare_cached(concat!( + "INSERT INTO ", + $table_name, + " (name, timestamp) + VALUES (?1, ?2) + ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp + RETURNING id", + ))?; + let mut update_stmt = $conn.prepare_cached(concat!( + "UPDATE ", + $table_name, + " SET timestamp = ?1 WHERE id = ?2" + ))?; + for (parent, new_timestamp) in std::mem::take(&mut $self.$timestamps_field) { + trace!(target: "gc", + concat!("insert ", $table_name, " {:?} {}"), + parent, + new_timestamp + ); + let mut rows = select_stmt.query([parent.$encoded_name])?; + let id = if let Some(row) = rows.next()? { + let id: ParentId = row.get_unwrap(0); + let timestamp: Timestamp = row.get_unwrap(1); + if timestamp < new_timestamp - UPDATE_RESOLUTION { + update_stmt.execute(params![new_timestamp, id])?; + } + id + } else { + insert_stmt.query_row(params![parent.$encoded_name, new_timestamp], |row| { + row.get(0) + })? + }; + match $self.$keys_field.entry(parent.$encoded_name) { + hash_map::Entry::Occupied(o) => { + assert_eq!(*o.get(), id); + } + hash_map::Entry::Vacant(v) => { + v.insert(id); + } + } + } + return Ok(()); + }; +} + +/// This is a cache of modifications that will be saved to disk all at once +/// via the [`DeferredGlobalLastUse::save`] method. +/// +/// This is here to improve performance. +#[derive(Debug)] +pub struct DeferredGlobalLastUse { + /// Cache of registry keys, used for faster fetching. + /// + /// The key is the registry name (which is its directory name) and the + /// value is the `id` in the `registry_index` table. + registry_keys: HashMap, + /// Cache of git keys, used for faster fetching. + /// + /// The key is the git db name (which is its directory name) and the value + /// is the `id` in the `git_db` table. + git_keys: HashMap, + + /// New registry index entries to insert. + registry_index_timestamps: HashMap, + /// New registry `.crate` entries to insert. + registry_crate_timestamps: HashMap, + /// New registry src directory entries to insert. + registry_src_timestamps: HashMap, + /// New git db entries to insert. + git_db_timestamps: HashMap, + /// New git checkout entries to insert. + git_checkout_timestamps: HashMap, + /// This is used so that a warning about failing to update the database is + /// only displayed once. + save_err_has_warned: bool, + /// The current time, used to improve performance to avoid accessing the + /// clock hundreds of times. + now: Timestamp, +} + +impl DeferredGlobalLastUse { + pub fn new() -> DeferredGlobalLastUse { + DeferredGlobalLastUse { + registry_keys: HashMap::new(), + git_keys: HashMap::new(), + registry_index_timestamps: HashMap::new(), + registry_crate_timestamps: HashMap::new(), + registry_src_timestamps: HashMap::new(), + git_db_timestamps: HashMap::new(), + git_checkout_timestamps: HashMap::new(), + save_err_has_warned: false, + now: now(), + } + } + + pub fn is_empty(&self) -> bool { + self.registry_index_timestamps.is_empty() + && self.registry_crate_timestamps.is_empty() + && self.registry_src_timestamps.is_empty() + && self.git_db_timestamps.is_empty() + && self.git_checkout_timestamps.is_empty() + } + + fn clear(&mut self) { + self.registry_index_timestamps.clear(); + self.registry_crate_timestamps.clear(); + self.registry_src_timestamps.clear(); + self.git_db_timestamps.clear(); + self.git_checkout_timestamps.clear(); + } + + /// Indicates the given [`RegistryIndex`] has been used right now. + pub fn mark_registry_index_used(&mut self, registry_index: RegistryIndex) { + self.mark_registry_index_used_stamp(registry_index, None); + } + + /// Indicates the given [`RegistryCrate`] has been used right now. + /// + /// Also implicitly marks the index used, too. + pub fn mark_registry_crate_used(&mut self, registry_crate: RegistryCrate) { + self.mark_registry_crate_used_stamp(registry_crate, None); + } + + /// Indicates the given [`RegistrySrc`] has been used right now. + /// + /// Also implicitly marks the index used, too. + pub fn mark_registry_src_used(&mut self, registry_src: RegistrySrc) { + self.mark_registry_src_used_stamp(registry_src, None); + } + + /// Indicates the given [`GitCheckout`] has been used right now. + /// + /// Also implicitly marks the git db used, too. + pub fn mark_git_checkout_used(&mut self, git_checkout: GitCheckout) { + self.mark_git_checkout_used_stamp(git_checkout, None); + } + + /// Indicates the given [`RegistryIndex`] has been used with the given + /// time (or "now" if `None`). + pub fn mark_registry_index_used_stamp( + &mut self, + registry_index: RegistryIndex, + timestamp: Option<&SystemTime>, + ) { + let timestamp = timestamp.map_or(self.now, to_timestamp); + self.registry_index_timestamps + .insert(registry_index, timestamp); + } + + /// Indicates the given [`RegistryCrate`] has been used with the given + /// time (or "now" if `None`). + /// + /// Also implicitly marks the index used, too. + pub fn mark_registry_crate_used_stamp( + &mut self, + registry_crate: RegistryCrate, + timestamp: Option<&SystemTime>, + ) { + let timestamp = timestamp.map_or(self.now, to_timestamp); + let index = RegistryIndex { + encoded_registry_name: registry_crate.encoded_registry_name, + }; + self.registry_index_timestamps.insert(index, timestamp); + self.registry_crate_timestamps + .insert(registry_crate, timestamp); + } + + /// Indicates the given [`RegistrySrc`] has been used with the given + /// time (or "now" if `None`). + /// + /// Also implicitly marks the index used, too. + pub fn mark_registry_src_used_stamp( + &mut self, + registry_src: RegistrySrc, + timestamp: Option<&SystemTime>, + ) { + let timestamp = timestamp.map_or(self.now, to_timestamp); + let index = RegistryIndex { + encoded_registry_name: registry_src.encoded_registry_name, + }; + self.registry_index_timestamps.insert(index, timestamp); + self.registry_src_timestamps.insert(registry_src, timestamp); + } + + /// Indicates the given [`GitCheckout`] has been used with the given + /// time (or "now" if `None`). + /// + /// Also implicitly marks the git db used, too. + pub fn mark_git_checkout_used_stamp( + &mut self, + git_checkout: GitCheckout, + timestamp: Option<&SystemTime>, + ) { + let timestamp = timestamp.map_or(self.now, to_timestamp); + let db = GitDb { + encoded_git_name: git_checkout.encoded_git_name, + }; + self.git_db_timestamps.insert(db, timestamp); + self.git_checkout_timestamps.insert(git_checkout, timestamp); + } + + /// Saves all of the deferred information to the database. + /// + /// This will also clear the state of `self`. + pub fn save(&mut self, tracker: &mut GlobalCacheTracker) -> CargoResult<()> { + let _p = crate::util::profile::start("saving last-use data"); + trace!(target: "gc", "saving last-use data"); + if self.is_empty() { + return Ok(()); + } + let tx = tracker.conn.transaction()?; + // These must run before the ones that refer to their IDs. + self.insert_registry_index_from_cache(&tx)?; + self.insert_git_db_from_cache(&tx)?; + self.insert_registry_crate_from_cache(&tx)?; + self.insert_registry_src_from_cache(&tx)?; + self.insert_git_checkout_from_cache(&tx)?; + tx.commit()?; + trace!(target: "gc", "last-use save complete"); + Ok(()) + } + + /// Variant of [`DeferredGlobalLastUse::save`] that does not return an + /// error. + /// + /// This will log or display a warning to the user. + pub fn save_no_error(&mut self, config: &Config) { + if let Err(e) = self.save_with_config(config) { + // Because there is an assertion in auto-gc that checks if this is + // empty, be sure to clear it so that assertion doesn't fail. + self.clear(); + if !self.save_err_has_warned { + if is_silent_error(&e) && config.shell().verbosity() != Verbosity::Verbose { + tracing::warn!("failed to save last-use data: {e:?}"); + } else { + crate::display_warning_with_error( + "failed to save last-use data\n\ + This may prevent cargo from accurately tracking what is being \ + used in its global cache. This information is used for \ + automatically removing unused data in the cache.", + &e, + &mut config.shell(), + ); + self.save_err_has_warned = true; + } + } + } + } + + fn save_with_config(&mut self, config: &Config) -> CargoResult<()> { + let mut tracker = config.global_cache_tracker()?; + self.save(&mut tracker) + } + + /// Flushes all of the `registry_index_timestamps` to the database, + /// clearing `registry_index_timestamps`. + fn insert_registry_index_from_cache(&mut self, conn: &Connection) -> CargoResult<()> { + insert_or_update_parent!( + self, + conn, + "registry_index", + registry_index_timestamps, + registry_keys, + encoded_registry_name + ); + } + + /// Flushes all of the `git_db_timestamps` to the database, + /// clearing `registry_index_timestamps`. + fn insert_git_db_from_cache(&mut self, conn: &Connection) -> CargoResult<()> { + insert_or_update_parent!( + self, + conn, + "git_db", + git_db_timestamps, + git_keys, + encoded_git_name + ); + } + + /// Flushes all of the `registry_crate_timestamps` to the database, + /// clearing `registry_index_timestamps`. + fn insert_registry_crate_from_cache(&mut self, conn: &Connection) -> CargoResult<()> { + let registry_crate_timestamps = std::mem::take(&mut self.registry_crate_timestamps); + for (registry_crate, timestamp) in registry_crate_timestamps { + trace!(target: "gc", "insert registry crate {registry_crate:?} {timestamp}"); + let registry_id = self.registry_id(conn, registry_crate.encoded_registry_name)?; + let mut stmt = conn.prepare_cached( + "INSERT INTO registry_crate (registry_id, name, size, timestamp) + VALUES (?1, ?2, ?3, ?4) + ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp + WHERE timestamp < ?5 + ", + )?; + stmt.execute(params![ + registry_id, + registry_crate.crate_filename, + registry_crate.size, + timestamp, + timestamp - UPDATE_RESOLUTION + ])?; + } + Ok(()) + } + + /// Flushes all of the `registry_src_timestamps` to the database, + /// clearing `registry_index_timestamps`. + fn insert_registry_src_from_cache(&mut self, conn: &Connection) -> CargoResult<()> { + let registry_src_timestamps = std::mem::take(&mut self.registry_src_timestamps); + for (registry_src, timestamp) in registry_src_timestamps { + trace!(target: "gc", "insert registry src {registry_src:?} {timestamp}"); + let registry_id = self.registry_id(conn, registry_src.encoded_registry_name)?; + let mut stmt = conn.prepare_cached( + "INSERT INTO registry_src (registry_id, name, size, timestamp) + VALUES (?1, ?2, ?3, ?4) + ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp + WHERE timestamp < ?5 + ", + )?; + stmt.execute(params![ + registry_id, + registry_src.package_dir, + registry_src.size, + timestamp, + timestamp - UPDATE_RESOLUTION + ])?; + } + + Ok(()) + } + + /// Flushes all of the `git_checkout_timestamps` to the database, + /// clearing `registry_index_timestamps`. + fn insert_git_checkout_from_cache(&mut self, conn: &Connection) -> CargoResult<()> { + let git_checkout_timestamps = std::mem::take(&mut self.git_checkout_timestamps); + for (git_checkout, timestamp) in git_checkout_timestamps { + let git_id = self.git_id(conn, git_checkout.encoded_git_name)?; + let mut stmt = conn.prepare_cached( + "INSERT INTO git_checkout (git_id, name, size, timestamp) + VALUES (?1, ?2, ?3, ?4) + ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp + WHERE timestamp < ?5", + )?; + stmt.execute(params![ + git_id, + git_checkout.short_name, + git_checkout.size, + timestamp, + timestamp - UPDATE_RESOLUTION + ])?; + } + + Ok(()) + } + + /// Returns the numeric ID of the registry, either fetching from the local + /// cache, or getting it from the database. + /// + /// It is an error if the registry does not exist. + fn registry_id( + &mut self, + conn: &Connection, + encoded_registry_name: InternedString, + ) -> CargoResult { + match self.registry_keys.get(&encoded_registry_name) { + Some(i) => Ok(*i), + None => { + let Some(id) = GlobalCacheTracker::id_from_name( + conn, + REGISTRY_INDEX_TABLE, + &encoded_registry_name, + )? + else { + bail!("expected registry_index {encoded_registry_name} to exist, but wasn't found"); + }; + self.registry_keys.insert(encoded_registry_name, id); + Ok(id) + } + } + } + + /// Returns the numeric ID of the git db, either fetching from the local + /// cache, or getting it from the database. + /// + /// It is an error if the git db does not exist. + fn git_id( + &mut self, + conn: &Connection, + encoded_git_name: InternedString, + ) -> CargoResult { + match self.git_keys.get(&encoded_git_name) { + Some(i) => Ok(*i), + None => { + let Some(id) = + GlobalCacheTracker::id_from_name(conn, GIT_DB_TABLE, &encoded_git_name)? + else { + bail!("expected git_db {encoded_git_name} to exist, but wasn't found") + }; + self.git_keys.insert(encoded_git_name, id); + Ok(id) + } + } + } +} + +/// Converts a [`SystemTime`] to a [`Timestamp`] which can be stored in the database. +fn to_timestamp(t: &SystemTime) -> Timestamp { + t.duration_since(SystemTime::UNIX_EPOCH) + .expect("invalid clock") + .as_secs() +} + +/// Returns the current time. +/// +/// This supports pretending that the time is different for testing using an +/// environment variable. +/// +/// If possible, try to avoid calling this too often since accessing clocks +/// can be a little slow on some systems. +#[allow(clippy::disallowed_methods)] +fn now() -> Timestamp { + match std::env::var("__CARGO_TEST_LAST_USE_NOW") { + Ok(now) => now.parse().unwrap(), + Err(_) => to_timestamp(&SystemTime::now()), + } +} + +/// Returns whether or not the given error should cause a warning to be +/// displayed to the user. +/// +/// In some situations, like a read-only global cache, we don't want to spam +/// the user with a warning. I think once cargo has controllable lints, I +/// think we should consider changing this to always warn, but give the user +/// an option to silence the warning. +pub fn is_silent_error(e: &anyhow::Error) -> bool { + if let Some(e) = e.downcast_ref::() { + if matches!( + e.sqlite_error_code(), + Some(ErrorCode::CannotOpen | ErrorCode::ReadOnly) + ) { + return true; + } + } + false +} + +/// Returns the disk usage for a git checkout directory. +pub fn du_git_checkout(path: &Path) -> CargoResult { + // !.git is used because clones typically use hardlinks for the git + // contents. TODO: Verify behavior on Windows. + // TODO: Or even better, switch to worktrees, and remove this. + cargo_util::du(&path, &["!.git"]) +} + +fn du(path: &Path, table_name: &str) -> CargoResult { + if table_name == GIT_CO_TABLE { + du_git_checkout(path) + } else { + cargo_util::du(&path, &[]) + } +} diff --git a/src/cargo/core/mod.rs b/src/cargo/core/mod.rs index 2add52d5c1c..80809106139 100644 --- a/src/cargo/core/mod.rs +++ b/src/cargo/core/mod.rs @@ -19,6 +19,8 @@ pub use crate::util::toml::schema::InheritableFields; pub mod compiler; pub mod dependency; pub mod features; +pub mod gc; +pub mod global_cache_tracker; pub mod manifest; pub mod package; pub mod package_id; diff --git a/src/cargo/core/package.rs b/src/cargo/core/package.rs index 274798474ac..d87f81036b1 100644 --- a/src/cargo/core/package.rs +++ b/src/cargo/core/package.rs @@ -491,6 +491,10 @@ impl<'cfg> PackageSet<'cfg> { pkgs.push(downloads.wait()?); } downloads.success = true; + drop(downloads); + + let mut deferred = self.config.deferred_global_last_use()?; + deferred.save_no_error(self.config); Ok(pkgs) } diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index 6f58b8bdc83..923b2decdc4 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -389,7 +389,7 @@ impl<'cfg> CleanContext<'cfg> { Ok(()) } - fn display_summary(&self) -> CargoResult<()> { + pub fn display_summary(&self) -> CargoResult<()> { let status = if self.dry_run { "Summary" } else { "Removed" }; let byte_count = if self.total_bytes_removed == 0 { String::new() diff --git a/src/cargo/ops/cargo_compile/mod.rs b/src/cargo/ops/cargo_compile/mod.rs index 94c6cf9de04..3522ef9d34d 100644 --- a/src/cargo/ops/cargo_compile/mod.rs +++ b/src/cargo/ops/cargo_compile/mod.rs @@ -153,6 +153,7 @@ pub fn compile_ws<'a>( unit_graph::emit_serialized_unit_graph(&bcx.roots, &bcx.unit_graph, ws.config())?; return Compilation::new(&bcx); } + crate::core::gc::auto_gc(bcx.config); let _p = profile::start("compiling"); let cx = Context::new(&bcx)?; cx.compile(exec) diff --git a/src/cargo/ops/cargo_fetch.rs b/src/cargo/ops/cargo_fetch.rs index 6acdbddefb6..ac2b60aabc0 100644 --- a/src/cargo/ops/cargo_fetch.rs +++ b/src/cargo/ops/cargo_fetch.rs @@ -76,6 +76,7 @@ pub fn fetch<'a>( } packages.get_many(to_download)?; + crate::core::gc::auto_gc(config); Ok((resolve, packages)) } diff --git a/src/cargo/ops/mod.rs b/src/cargo/ops/mod.rs index 13613eaf635..76fa91d2591 100644 --- a/src/cargo/ops/mod.rs +++ b/src/cargo/ops/mod.rs @@ -1,6 +1,6 @@ use crate::sources::CRATES_IO_DOMAIN; -pub use self::cargo_clean::{clean, CleanOptions}; +pub use self::cargo_clean::{clean, CleanContext, CleanOptions}; pub use self::cargo_compile::{ compile, compile_with_exec, compile_ws, create_bcx, print, resolve_all_features, CompileOptions, }; diff --git a/src/cargo/ops/resolve.rs b/src/cargo/ops/resolve.rs index 8ca72f77c6a..00d3b114450 100644 --- a/src/cargo/ops/resolve.rs +++ b/src/cargo/ops/resolve.rs @@ -530,6 +530,9 @@ pub fn resolve_with_previous<'cfg>( if let Some(previous) = previous { resolved.merge_from(previous)?; } + let config = ws.config(); + let mut deferred = config.deferred_global_last_use()?; + deferred.save_no_error(config); Ok(resolved) } diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index a75c1ec6d91..c89bf346ceb 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -1,5 +1,6 @@ //! See [GitSource]. +use crate::core::global_cache_tracker; use crate::core::GitReference; use crate::core::SourceId; use crate::core::{Dependency, Package, PackageId, Summary}; @@ -11,6 +12,7 @@ use crate::sources::PathSource; use crate::util::cache_lock::CacheLockMode; use crate::util::errors::CargoResult; use crate::util::hex::short_hash; +use crate::util::interning::InternedString; use crate::util::Config; use anyhow::Context; use cargo_util::paths::exclude_from_backups_and_indexing; @@ -74,9 +76,10 @@ pub struct GitSource<'cfg> { source_id: SourceId, /// The underlying path source to discover packages inside the Git repository. path_source: Option>, + short_id: Option, /// The identifier of this source for Cargo's Git cache directory. /// See [`ident`] for more. - ident: String, + ident: InternedString, config: &'cfg Config, /// Disables status messages. quiet: bool, @@ -104,7 +107,8 @@ impl<'cfg> GitSource<'cfg> { locked_rev, source_id, path_source: None, - ident, + short_id: None, + ident: ident.into(), config, quiet: false, }; @@ -127,6 +131,17 @@ impl<'cfg> GitSource<'cfg> { } self.path_source.as_mut().unwrap().read_packages() } + + fn mark_used(&self, size: Option) -> CargoResult<()> { + self.config + .deferred_global_last_use()? + .mark_git_checkout_used(global_cache_tracker::GitCheckout { + encoded_git_name: self.ident, + short_name: self.short_id.expect("update before download"), + size, + }); + Ok(()) + } } /// Create an identifier from a URL, @@ -200,6 +215,7 @@ impl<'cfg> Source for GitSource<'cfg> { fn block_until_ready(&mut self) -> CargoResult<()> { if self.path_source.is_some() { + self.mark_used(None)?; return Ok(()); } @@ -290,8 +306,16 @@ impl<'cfg> Source for GitSource<'cfg> { let path_source = PathSource::new_recursive(&checkout_path, source_id, self.config); self.path_source = Some(path_source); + self.short_id = Some(short_id.as_str().into()); self.locked_rev = Some(actual_rev); - self.path_source.as_mut().unwrap().update() + self.path_source.as_mut().unwrap().update()?; + + // Hopefully this shouldn't incur too much of a performance hit since + // most of this should already be in cache since it was just + // extracted. + let size = global_cache_tracker::du_git_checkout(&checkout_path)?; + self.mark_used(Some(size))?; + Ok(()) } fn download(&mut self, id: PackageId) -> CargoResult { @@ -300,6 +324,7 @@ impl<'cfg> Source for GitSource<'cfg> { id, self.remote ); + self.mark_used(None)?; self.path_source .as_mut() .expect("BUG: `update()` must be called before `get()`") diff --git a/src/cargo/sources/registry/download.rs b/src/cargo/sources/registry/download.rs index 7864328354f..daf1d0537cd 100644 --- a/src/cargo/sources/registry/download.rs +++ b/src/cargo/sources/registry/download.rs @@ -3,11 +3,13 @@ //! [`HttpRegistry`]: super::http_remote::HttpRegistry //! [`RemoteRegistry`]: super::remote::RemoteRegistry +use crate::util::interning::InternedString; use anyhow::Context; use cargo_credential::Operation; use cargo_util::registry::make_dep_path; use cargo_util::Sha256; +use crate::core::global_cache_tracker; use crate::core::PackageId; use crate::sources::registry::MaybeLock; use crate::sources::registry::RegistryConfig; @@ -34,6 +36,7 @@ const CHECKSUM_TEMPLATE: &str = "{sha256-checksum}"; pub(super) fn download( cache_path: &Filesystem, config: &Config, + encoded_registry_name: InternedString, pkg: PackageId, checksum: &str, registry_config: RegistryConfig, @@ -50,6 +53,13 @@ pub(super) fn download( if let Ok(dst) = File::open(path) { let meta = dst.metadata()?; if meta.len() > 0 { + config.deferred_global_last_use()?.mark_registry_crate_used( + global_cache_tracker::RegistryCrate { + encoded_registry_name, + crate_filename: pkg.tarball_name().into(), + size: meta.len(), + }, + ); return Ok(MaybeLock::Ready(dst)); } } @@ -106,6 +116,7 @@ pub(super) fn download( pub(super) fn finish_download( cache_path: &Filesystem, config: &Config, + encoded_registry_name: InternedString, pkg: PackageId, checksum: &str, data: &[u8], @@ -115,6 +126,13 @@ pub(super) fn finish_download( if actual != checksum { anyhow::bail!("failed to verify the checksum of `{}`", pkg) } + config.deferred_global_last_use()?.mark_registry_crate_used( + global_cache_tracker::RegistryCrate { + encoded_registry_name, + crate_filename: pkg.tarball_name().into(), + size: data.len() as u64, + }, + ); cache_path.create_dir()?; let path = cache_path.join(&pkg.tarball_name()); diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 3d31110c38e..1dfae4ad811 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -1,11 +1,13 @@ //! Access to a HTTP-based crate registry. See [`HttpRegistry`] for details. +use crate::core::global_cache_tracker; use crate::core::{PackageId, SourceId}; use crate::sources::registry::download; use crate::sources::registry::MaybeLock; use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData}; use crate::util::cache_lock::CacheLockMode; use crate::util::errors::{CargoResult, HttpNotSuccessful}; +use crate::util::interning::InternedString; use crate::util::network::http::http_handle; use crate::util::network::retry::{Retry, RetryResult}; use crate::util::network::sleep::SleepTracker; @@ -52,6 +54,7 @@ const UNKNOWN: &'static str = "Unknown"; /// /// [RFC 2789]: https://github.com/rust-lang/rfcs/pull/2789 pub struct HttpRegistry<'cfg> { + name: InternedString, /// Path to the registry index (`$CARGO_HOME/registry/index/$REG-HASH`). /// /// To be fair, `HttpRegistry` doesn't store the registry index it @@ -199,6 +202,7 @@ impl<'cfg> HttpRegistry<'cfg> { .expect("a url with the sparse+ stripped should still be valid"); Ok(HttpRegistry { + name: name.into(), index_path: config.registry_index_path().join(name), cache_path: config.registry_cache_path().join(name), source_id, @@ -454,6 +458,11 @@ impl<'cfg> HttpRegistry<'cfg> { impl<'cfg> RegistryData for HttpRegistry<'cfg> { fn prepare(&self) -> CargoResult<()> { + self.config + .deferred_global_last_use()? + .mark_registry_index_used(global_cache_tracker::RegistryIndex { + encoded_registry_name: self.name, + }); Ok(()) } @@ -750,6 +759,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { download::download( &self.cache_path, &self.config, + self.name.clone(), pkg, checksum, registry_config, @@ -762,7 +772,14 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { checksum: &str, data: &[u8], ) -> CargoResult { - download::finish_download(&self.cache_path, &self.config, pkg, checksum, data) + download::finish_download( + &self.cache_path, + &self.config, + self.name.clone(), + pkg, + checksum, + data, + ) } fn is_crate_downloaded(&self, pkg: PackageId) -> bool { diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 7ee461edd80..f884eec30b8 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -201,6 +201,7 @@ use tar::Archive; use tracing::debug; use crate::core::dependency::Dependency; +use crate::core::global_cache_tracker; use crate::core::{Package, PackageId, SourceId, Summary}; use crate::sources::source::MaybePackage; use crate::sources::source::QueryKind; @@ -239,6 +240,7 @@ struct LockMetadata { /// /// For general concepts of registries, see the [module-level documentation](crate::sources::registry). pub struct RegistrySource<'cfg> { + name: InternedString, /// The unique identifier of this source. source_id: SourceId, /// The path where crate files are extracted (`$CARGO_HOME/registry/src/$REG-HASH`). @@ -514,6 +516,7 @@ impl<'cfg> RegistrySource<'cfg> { yanked_whitelist: &HashSet, ) -> RegistrySource<'cfg> { RegistrySource { + name: name.into(), src_path: config.registry_source_path().join(name), config, source_id, @@ -589,6 +592,13 @@ impl<'cfg> RegistrySource<'cfg> { match fs::read_to_string(path) { Ok(ok) => match serde_json::from_str::(&ok) { Ok(lock_meta) if lock_meta.v == 1 => { + self.config + .deferred_global_last_use()? + .mark_registry_src_used(global_cache_tracker::RegistrySrc { + encoded_registry_name: self.name, + package_dir: package_dir.into(), + size: None, + }); return Ok(unpack_dir.to_path_buf()); } _ => { @@ -613,6 +623,7 @@ impl<'cfg> RegistrySource<'cfg> { set_mask(&mut tar); tar }; + let mut bytes_written = 0; let prefix = unpack_dir.file_name().unwrap(); let parent = unpack_dir.parent().unwrap(); for entry in tar.entries()? { @@ -644,6 +655,7 @@ impl<'cfg> RegistrySource<'cfg> { continue; } // Unpacking failed + bytes_written += entry.size(); let mut result = entry.unpack_in(parent).map_err(anyhow::Error::from); if cfg!(windows) && restricted_names::is_windows_reserved_path(&entry_path) { result = result.with_context(|| { @@ -670,6 +682,14 @@ impl<'cfg> RegistrySource<'cfg> { let lock_meta = LockMetadata { v: 1 }; write!(ok, "{}", serde_json::to_string(&lock_meta).unwrap())?; + self.config + .deferred_global_last_use()? + .mark_registry_src_used(global_cache_tracker::RegistrySrc { + encoded_registry_name: self.name, + package_dir: package_dir.into(), + size: Some(bytes_written), + }); + Ok(unpack_dir.to_path_buf()) } diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index ba171eac3d4..4e7dd5f6c22 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -1,5 +1,6 @@ //! Access to a Git index based registry. See [`RemoteRegistry`] for details. +use crate::core::global_cache_tracker; use crate::core::{GitReference, PackageId, SourceId}; use crate::sources::git; use crate::sources::git::fetch::RemoteKind; @@ -47,6 +48,7 @@ use tracing::{debug, trace}; /// /// [`HttpRegistry`]: super::http_remote::HttpRegistry pub struct RemoteRegistry<'cfg> { + name: InternedString, /// Path to the registry index (`$CARGO_HOME/registry/index/$REG-HASH`). index_path: Filesystem, /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/cache/$REG-HASH`). @@ -87,6 +89,7 @@ impl<'cfg> RemoteRegistry<'cfg> { /// registry index are stored. Expect to be unique. pub fn new(source_id: SourceId, config: &'cfg Config, name: &str) -> RemoteRegistry<'cfg> { RemoteRegistry { + name: name.into(), index_path: config.registry_index_path().join(name), cache_path: config.registry_cache_path().join(name), source_id, @@ -211,6 +214,11 @@ impl<'cfg> RemoteRegistry<'cfg> { impl<'cfg> RegistryData for RemoteRegistry<'cfg> { fn prepare(&self) -> CargoResult<()> { self.repo()?; + self.config + .deferred_global_last_use()? + .mark_registry_index_used(global_cache_tracker::RegistryIndex { + encoded_registry_name: self.name, + }); Ok(()) } @@ -403,6 +411,7 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { download::download( &self.cache_path, &self.config, + self.name, pkg, checksum, registry_config, @@ -415,7 +424,14 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { checksum: &str, data: &[u8], ) -> CargoResult { - download::finish_download(&self.cache_path, &self.config, pkg, checksum, data) + download::finish_download( + &self.cache_path, + &self.config, + self.name.clone(), + pkg, + checksum, + data, + ) } fn is_crate_downloaded(&self, pkg: PackageId) -> bool { diff --git a/src/cargo/util/config/mod.rs b/src/cargo/util/config/mod.rs index 50153466b82..b054541d059 100644 --- a/src/cargo/util/config/mod.rs +++ b/src/cargo/util/config/mod.rs @@ -68,6 +68,7 @@ use std::time::Instant; use self::ConfigValue as CV; use crate::core::compiler::rustdoc::RustdocExternMap; +use crate::core::global_cache_tracker::{DeferredGlobalLastUse, GlobalCacheTracker}; use crate::core::shell::Verbosity; use crate::core::{features, CliUnstable, Shell, SourceId, Workspace, WorkspaceRootConfig}; use crate::ops::RegistryCredentialConfig; @@ -244,6 +245,8 @@ pub struct Config { pub nightly_features_allowed: bool, /// WorkspaceRootConfigs that have been found pub ws_roots: RefCell>, + global_cache_tracker: LazyCell>, + deferred_global_last_use: LazyCell>, } impl Config { @@ -317,6 +320,8 @@ impl Config { env_config: LazyCell::new(), nightly_features_allowed: matches!(&*features::channel(), "nightly" | "dev"), ws_roots: RefCell::new(HashMap::new()), + global_cache_tracker: LazyCell::new(), + deferred_global_last_use: LazyCell::new(), } } @@ -1919,6 +1924,25 @@ impl Config { ) -> CargoResult>> { self.package_cache_lock.try_lock(self, mode) } + + /// Returns a reference to the shared [`GlobalCacheTracker`]. + /// + /// The package cache lock must be held to call this function (and to use + /// it in general). + pub fn global_cache_tracker(&self) -> CargoResult> { + let tracker = self.global_cache_tracker.try_borrow_with(|| { + Ok::<_, anyhow::Error>(RefCell::new(GlobalCacheTracker::new(self)?)) + })?; + Ok(tracker.borrow_mut()) + } + + /// Returns a reference to the shared [`DeferredGlobalLastUse`]. + pub fn deferred_global_last_use(&self) -> CargoResult> { + let deferred = self.deferred_global_last_use.try_borrow_with(|| { + Ok::<_, anyhow::Error>(RefCell::new(DeferredGlobalLastUse::new())) + })?; + Ok(deferred.borrow_mut()) + } } /// Internal error for serde errors. diff --git a/src/cargo/util/mod.rs b/src/cargo/util/mod.rs index fb4c4b39c1e..7829592945c 100644 --- a/src/cargo/util/mod.rs +++ b/src/cargo/util/mod.rs @@ -62,6 +62,7 @@ mod queue; pub mod restricted_names; pub mod rustc; mod semver_ext; +pub mod sqlite; pub mod style; pub mod toml; pub mod toml_mut; diff --git a/src/cargo/util/sqlite.rs b/src/cargo/util/sqlite.rs new file mode 100644 index 00000000000..b391cc6dbb2 --- /dev/null +++ b/src/cargo/util/sqlite.rs @@ -0,0 +1,118 @@ +//! Utilities to help with working with sqlite. + +use crate::util::interning::InternedString; +use crate::CargoResult; +use rusqlite::types::{FromSql, FromSqlError, ToSql, ToSqlOutput}; +use rusqlite::{Connection, TransactionBehavior}; + +impl FromSql for InternedString { + fn column_result(value: rusqlite::types::ValueRef<'_>) -> Result { + value.as_str().map(InternedString::new) + } +} + +impl ToSql for InternedString { + fn to_sql(&self) -> Result, rusqlite::Error> { + Ok(ToSqlOutput::from(self.as_str())) + } +} + +/// A function or closure representing a database migration. +/// +/// Migrations support evolving the schema and contents of the database across +/// new versions of cargo. The [`migrate`] function should be called +/// immediately after opening a connection to a database in order to configure +/// the schema. Whether or not a migration has been done is tracked by the +/// `pragma_user_version` value in the database. Typically you include the +/// initial `CREATE TABLE` statements in the initial list, but as time goes on +/// you can add new tables or `ALTER TABLE` statements. The migration code +/// will only execute statements that haven't previously been run. +/// +/// Important things to note about how you define migrations: +/// +/// * Never remove a migration entry from the list. Migrations are tracked by +/// the index number in the list. +/// * Never perform any schema modifications that would be backwards +/// incompatible. For example, don't drop tables or columns. +/// +/// The [`basic_migration`] function is a convenience function for specifying +/// migrations that are simple SQL statements. If you need to do something +/// more complex, then you can specify a closure that takes a [`Connection`] +/// and does whatever is needed. +/// +/// For example: +/// +/// ```rust +/// # use cargo::util::sqlite::*; +/// # use rusqlite::Connection; +/// # let mut conn = Connection::open_in_memory()?; +/// # fn generate_name() -> String { "example".to_string() }; +/// migrate( +/// &mut conn, +/// &[ +/// basic_migration( +/// "CREATE TABLE foo ( +/// id INTEGER PRIMARY KEY AUTOINCREMENT, +/// name STRING NOT NULL +/// )", +/// ), +/// Box::new(|conn| { +/// conn.execute("INSERT INTO foo (name) VALUES (?1)", [generate_name()])?; +/// Ok(()) +/// }), +/// basic_migration("ALTER TABLE foo ADD COLUMN size INTEGER"), +/// ], +/// )?; +/// # Ok::<(), anyhow::Error>(()) +/// ``` +pub type Migration = Box CargoResult<()>>; + +/// A basic migration that is a single static SQL statement. +/// +/// See [`Migration`] for more information. +pub fn basic_migration(stmt: &'static str) -> Migration { + Box::new(|conn| { + conn.execute(stmt, [])?; + Ok(()) + }) +} + +/// Perform one-time SQL migrations. +/// +/// See [`Migration`] for more information. +pub fn migrate(conn: &mut Connection, migrations: &[Migration]) -> CargoResult<()> { + // EXCLUSIVE ensures that it starts with an exclusive write lock. No other + // readers will be allowed. This generally shouldn't be needed if there is + // a file lock, but might be helpful in cases where cargo's `FileLock` + // failed. + let tx = conn.transaction_with_behavior(TransactionBehavior::Exclusive)?; + let user_version = tx.query_row("SELECT user_version FROM pragma_user_version", [], |row| { + row.get(0) + })?; + if user_version < migrations.len() { + for migration in &migrations[user_version..] { + migration(&tx)?; + } + tx.pragma_update(None, "user_version", &migrations.len())?; + } + tx.commit()?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn migrate_twice() -> CargoResult<()> { + // Check that a second migration will apply. + let mut conn = Connection::open_in_memory()?; + let mut migrations = vec![basic_migration("CREATE TABLE foo (a, b, c)")]; + migrate(&mut conn, &migrations)?; + conn.execute("INSERT INTO foo VALUES (1,2,3)", [])?; + migrations.push(basic_migration("ALTER TABLE foo ADD COLUMN d")); + migrate(&mut conn, &migrations)?; + conn.execute("INSERT INTO foo VALUES (1,2,3,4)", [])?; + Ok(()) + } +} diff --git a/src/doc/src/reference/unstable.md b/src/doc/src/reference/unstable.md index 0683daa3c59..b855ec1c3df 100644 --- a/src/doc/src/reference/unstable.md +++ b/src/doc/src/reference/unstable.md @@ -85,6 +85,7 @@ For the latest nightly, see the [nightly version] of this page. * [check-cfg](#check-cfg) --- Compile-time validation of `cfg` expressions. * [host-config](#host-config) --- Allows setting `[target]`-like configuration settings for host build targets. * [target-applies-to-host](#target-applies-to-host) --- Alters whether certain flags will be passed to host build targets. + * [gc](#gc) --- Global cache garbage collection. * rustdoc * [rustdoc-map](#rustdoc-map) --- Provides mappings for documentation to link to external sites like [docs.rs](https://docs.rs/). * [scrape-examples](#scrape-examples) --- Shows examples within documentation. @@ -1383,6 +1384,78 @@ This will not affect any hard-coded paths in the source code, such as in strings Common paths requiring sanitization include `OUT_DIR` and `CARGO_MANIFEST_DIR`, plus any other introduced by the build script, such as include directories. +## gc + +* Tracking Issue: [#12633](https://github.com/rust-lang/cargo/issues/12633) + +The `-Zgc` flag enables garbage-collection within cargo's global cache within the cargo home directory. +This includes downloaded dependencies such as compressed `.crate` files, extracted `src` directories, registry index caches, and git dependencies. +When `-Zgc` is present, cargo will track the last time any index and dependency was used, +and then uses those timestamps to manually or automatically delete cache entries that have not been used for a while. + +```sh +cargo build -Zgc +``` + +### Automatic garbage collection + +Automatic deletion happens on commands that are already doing a significant amount of work, +such as all of the build commands (`cargo build`, `cargo test`, `cargo check`, etc.), and `cargo fetch`. +The deletion happens just after resolution and packages have been downloaded. +Automatic deletion is only done once per day (see `gc.auto.frequency` to configure). +Automatic deletion is disabled if cargo is offline such as with `--offline` or `--frozen` to avoid deleting artifacts that may need to be used if you are offline for a long period of time. + +#### Automatic gc configuration + +The automatic gc behavior can be specified via a cargo configuration setting. +The settings available are: + +```toml +# Example config.toml file. + +# This table defines the behavior for automatic garbage collection. +[gc.auto] +# The maximum frequency that automatic garbage collection happens. +# Can be "never" to disable automatic-gc, or "always" to run on every command. +frequency = "1 day" +# Anything older than this duration will be deleted in the source cache. +max-src-age = "1 month" +# Anything older than this duration will be deleted in the compressed crate cache. +max-crate-age = "3 months" +# Any index older than this duration will be deleted from the index cache. +max-index-age = "3 months" +# Any git checkout older than this duration will be deleted from the checkout cache. +max-git-co-age = "1 month" +# Any git clone older than this duration will be deleted from the git cache. +max-git-db-age = "3 months" +``` + +### Manual garbage collection with `cargo clean` + +Manual deletion can be done with the `cargo clean gc` command. +Deletion of cache contents can be performed by passing one of the cache options: + +- `--max-src-age=DURATION` --- Deletes source cache files that have not been used since the given age. +- `--max-crate-age=DURATION` --- Deletes crate cache files that have not been used since the given age. +- `--max-index-age=DURATION` --- Deletes registry indexes that have not been used since then given age (including their `.crate` and `src` files). +- `--max-git-co-age=DURATION` --- Deletes git dependency checkouts that have not been used since then given age. +- `--max-git-db-age=DURATION` --- Deletes git dependency clones that have not been used since then given age. +- `--max-download-age=DURATION` --- Deletes any downloaded cache data that has not been used since then given age. +- `--max-src-size=SIZE` --- Deletes the oldest source cache files until the cache is under the given size. +- `--max-crate-size=SIZE` --- Deletes the oldest crate cache files until the cache is under the given size. +- `--max-git-size=SIZE` --- Deletes the oldest git dependency caches until the cache is under the given size. +- `--max-download-size=SIZE` --- Deletes the oldest downloaded cache data until the cache is under the given size. + +A DURATION is specified in the form "N seconds/minutes/days/weeks/months" where N is an integer. + +A SIZE is specified in the form "N *suffix*" where *suffix* is B, kB, MB, GB, kiB, MiB, or GiB, and N is an integer or floating point number. If no suffix is specified, the number is the number of bytes. + +```sh +cargo clean gc +cargo clean gc --max-download-age=1week +cargo clean gc --max-git-size=0 --max-download-size=100MB +``` + # Stabilized and removed features ## Compile progress diff --git a/tests/testsuite/clean.rs b/tests/testsuite/clean.rs index fbb4d3e5b40..fef351e9d75 100644 --- a/tests/testsuite/clean.rs +++ b/tests/testsuite/clean.rs @@ -1,5 +1,6 @@ //! Tests for the `cargo clean` command. +use cargo_test_support::paths::CargoPathExt; use cargo_test_support::registry::Package; use cargo_test_support::{ basic_bin_manifest, basic_manifest, git, main_file, project, project_in, rustc_host, @@ -805,15 +806,6 @@ fn clean_dry_run() { .file("src/lib.rs", "") .build(); - let ls_r = || -> Vec<_> { - let mut file_list: Vec<_> = walkdir::WalkDir::new(p.build_dir()) - .into_iter() - .filter_map(|e| e.map(|e| e.path().to_owned()).ok()) - .collect(); - file_list.sort(); - file_list - }; - // Start with no files. p.cargo("clean --dry-run") .with_stdout("") @@ -823,7 +815,7 @@ fn clean_dry_run() { ) .run(); p.cargo("check").run(); - let before = ls_r(); + let before = p.build_dir().ls_r(); p.cargo("clean --dry-run") .with_stderr( "[SUMMARY] [..] files, [..] total\n\ @@ -831,7 +823,7 @@ fn clean_dry_run() { ) .run(); // Verify it didn't delete anything. - let after = ls_r(); + let after = p.build_dir().ls_r(); assert_eq!(before, after); let expected = cargo::util::iter_join(before.iter().map(|p| p.to_str().unwrap()), "\n"); eprintln!("{expected}"); diff --git a/tests/testsuite/global_cache_tracker.rs b/tests/testsuite/global_cache_tracker.rs new file mode 100644 index 00000000000..27216d96c72 --- /dev/null +++ b/tests/testsuite/global_cache_tracker.rs @@ -0,0 +1,1835 @@ +//! Tests for last-use tracking and auto-gc. +//! +//! Cargo supports an environment variable called `__CARGO_TEST_LAST_USE_NOW` +//! to have cargo pretend that the current time is the given time (in seconds +//! since the unix epoch). This is used throughout these tests to simulate +//! what happens when time passes. The [`days_ago_unix`] and +//! [`months_ago_unix`] functions help with setting this value. + +use super::config::ConfigBuilder; +use cargo::core::global_cache_tracker::{self, DeferredGlobalLastUse, GlobalCacheTracker}; +use cargo::util::cache_lock::CacheLockMode; +use cargo::util::interning::InternedString; +use cargo::Config; +use cargo_test_support::paths::{self, CargoPathExt}; +use cargo_test_support::registry::{Package, RegistryBuilder}; +use cargo_test_support::{ + basic_manifest, cargo_process, execs, git, project, retry, sleep_ms, thread_wait_timeout, + Project, +}; +use itertools::Itertools; +use std::fmt::Write; +use std::path::PathBuf; +use std::process::Stdio; +use std::time::{Duration, SystemTime}; + +/// Helper to create a simple `foo` project which depends on a registry +/// dependency called `bar`. +fn basic_foo_bar_project() -> Project { + Package::new("bar", "1.0.0").publish(); + project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = "1.0" + "#, + ) + .file("src/lib.rs", "") + .build() +} + +/// Helper to get the names of files in a directory as strings. +fn get_names(glob: &str) -> Vec { + let mut names: Vec<_> = glob::glob(paths::home().join(glob).to_str().unwrap()) + .unwrap() + .map(|p| p.unwrap().file_name().unwrap().to_str().unwrap().to_owned()) + .collect(); + names.sort(); + names +} + +fn get_registry_names(which: &str) -> Vec { + get_names(&format!(".cargo/registry/{which}/*/*")) +} + +fn get_index_names() -> Vec { + get_names(&format!(".cargo/registry/index/*")) +} + +fn get_git_db_names() -> Vec { + get_names(&format!(".cargo/git/db/*")) +} + +fn get_git_checkout_names(db_name: &str) -> Vec { + get_names(&format!(".cargo/git/checkouts/{db_name}/*")) +} + +fn days_ago(n: u64) -> SystemTime { + SystemTime::now() - Duration::from_secs(60 * 60 * 24 * n) +} + +/// Helper for simulating running cargo in the past. Use with the +/// __CARGO_TEST_LAST_USE_NOW environment variable. +fn days_ago_unix(n: u64) -> String { + days_ago(n) + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs() + .to_string() +} + +/// Helper for simulating running cargo in the past. Use with the +/// __CARGO_TEST_LAST_USE_NOW environment variable. +fn months_ago_unix(n: u64) -> String { + days_ago_unix(n * 30) +} + +/// Populates last-use database and the cache files. +/// +/// This makes it easier to more accurately specify exact sizes. Creating +/// specific sizes with `Package` is too difficult. +fn populate_cache(config: &Config, test_crates: &[(&str, u64, u64, u64)]) -> (PathBuf, PathBuf) { + let cache_dir = paths::home().join(".cargo/registry/cache/example.com-a6c4a5adcb232b9a"); + let src_dir = paths::home().join(".cargo/registry/src/example.com-a6c4a5adcb232b9a"); + + GlobalCacheTracker::db_path(&config) + .into_path_unlocked() + .rm_rf(); + + let _lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let mut tracker = GlobalCacheTracker::new(&config).unwrap(); + let mut deferred = DeferredGlobalLastUse::new(); + + cache_dir.rm_rf(); + cache_dir.mkdir_p(); + src_dir.rm_rf(); + src_dir.mkdir_p(); + paths::home() + .join(".cargo/registry/index/example.com-a6c4a5adcb232b9a") + .mkdir_p(); + let mut create = |name: &str, age, crate_size: u64, src_size: u64| { + let crate_filename = InternedString::new(&format!("{name}.crate")); + deferred.mark_registry_crate_used_stamp( + global_cache_tracker::RegistryCrate { + encoded_registry_name: "example.com-a6c4a5adcb232b9a".into(), + crate_filename, + size: crate_size, + }, + Some(&days_ago(age)), + ); + deferred.mark_registry_src_used_stamp( + global_cache_tracker::RegistrySrc { + encoded_registry_name: "example.com-a6c4a5adcb232b9a".into(), + package_dir: name.into(), + size: Some(src_size), + }, + Some(&days_ago(age)), + ); + std::fs::write( + cache_dir.join(crate_filename), + "x".repeat(crate_size as usize), + ) + .unwrap(); + let path = src_dir.join(name); + path.mkdir_p(); + std::fs::write(path.join("data"), "x".repeat(src_size as usize)).unwrap() + }; + + for (name, age, crate_size, src_size) in test_crates { + create(name, *age, *crate_size, *src_size); + } + deferred.save(&mut tracker).unwrap(); + + (cache_dir, src_dir) +} + +#[cargo_test] +fn auto_gc_gated() { + // Requires -Zgc to both track last-use data and to run auto-gc. + let p = basic_foo_bar_project(); + p.cargo("check") + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + // Check that it did not create a database or delete anything. + let config = ConfigBuilder::new().build(); + assert!(!GlobalCacheTracker::db_path(&config) + .into_path_unlocked() + .exists()); + assert_eq!(get_index_names().len(), 1); + + // Again in the future, shouldn't auto-gc. + p.cargo("check").run(); + assert!(!GlobalCacheTracker::db_path(&config) + .into_path_unlocked() + .exists()); + assert_eq!(get_index_names().len(), 1); +} + +#[cargo_test] +fn clean_gc_gated() { + cargo_process("clean gc") + .with_status(101) + .with_stderr( + "\ +error: the `cargo clean gc` command is unstable, and only available on the \ +nightly channel of Cargo, but this is the `stable` channel +See [..] +See [..] +", + ) + .run(); +} + +#[cargo_test] +fn implies_source() { + // Checks that when a src, crate, or checkout is marked as used, the + // corresponding index or git db also gets marked as used. + let config = ConfigBuilder::new().unstable_flag("gc").build(); + let _lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let mut deferred = DeferredGlobalLastUse::new(); + let mut tracker = GlobalCacheTracker::new(&config).unwrap(); + + deferred.mark_registry_crate_used(global_cache_tracker::RegistryCrate { + encoded_registry_name: "example.com-a6c4a5adcb232b9a".into(), + crate_filename: "regex-1.8.4.crate".into(), + size: 123, + }); + deferred.mark_registry_src_used(global_cache_tracker::RegistrySrc { + encoded_registry_name: "index.crates.io-6f17d22bba15001f".into(), + package_dir: "rand-0.8.5".into(), + size: None, + }); + deferred.mark_git_checkout_used(global_cache_tracker::GitCheckout { + encoded_git_name: "cargo-e7ff1db891893a9e".into(), + short_name: "f0a4ee0".into(), + size: None, + }); + deferred.save(&mut tracker).unwrap(); + + let mut indexes = tracker.registry_index_all().unwrap(); + assert_eq!(indexes.len(), 2); + indexes.sort_by(|a, b| a.0.encoded_registry_name.cmp(&b.0.encoded_registry_name)); + assert_eq!( + indexes[0].0.encoded_registry_name, + "example.com-a6c4a5adcb232b9a" + ); + assert_eq!( + indexes[1].0.encoded_registry_name, + "index.crates.io-6f17d22bba15001f" + ); + + let dbs = tracker.git_db_all().unwrap(); + assert_eq!(dbs.len(), 1); + assert_eq!(dbs[0].0.encoded_git_name, "cargo-e7ff1db891893a9e"); +} + +#[cargo_test] +fn auto_gc_defaults() { + // Checks that the auto-gc deletes old entries, and leaves new ones intact. + Package::new("old", "1.0.0").publish(); + Package::new("new", "1.0.0").publish(); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + old = "1.0" + new = "1.0" + "#, + ) + .file("src/lib.rs", "") + .build(); + // Populate the last-use data. + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + assert_eq!(get_registry_names("src"), ["new-1.0.0", "old-1.0.0"]); + assert_eq!( + get_registry_names("cache"), + ["new-1.0.0.crate", "old-1.0.0.crate"] + ); + + // Run again with just one package. Make sure the old src gets deleted, + // but .crate does not. + p.change_file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + new = "1.0" + "#, + ); + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(2)) + .run(); + assert_eq!(get_registry_names("src"), ["new-1.0.0"]); + assert_eq!( + get_registry_names("cache"), + ["new-1.0.0.crate", "old-1.0.0.crate"] + ); + + // Run again after the .crate should have aged out. + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + assert_eq!(get_registry_names("src"), ["new-1.0.0"]); + assert_eq!(get_registry_names("cache"), ["new-1.0.0.crate"]); +} + +#[cargo_test] +fn auto_gc_config() { + // Can configure auto gc settings. + Package::new("old", "1.0.0").publish(); + Package::new("new", "1.0.0").publish(); + let p = project() + .file( + ".cargo/config.toml", + r#" + [gc.auto] + frequency = "always" + max-src-age = "1 day" + max-crate-age = "3 days" + max-index-age = "3 days" + max-git-co-age = "1 day" + max-git-db-age = "3 days" + "#, + ) + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + old = "1.0" + new = "1.0" + "#, + ) + .file("src/lib.rs", "") + .build(); + // Populate the last-use data. + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(4)) + .run(); + assert_eq!(get_registry_names("src"), ["new-1.0.0", "old-1.0.0"]); + assert_eq!( + get_registry_names("cache"), + ["new-1.0.0.crate", "old-1.0.0.crate"] + ); + + // Run again with just one package. Make sure the old src gets deleted, + // but .crate does not. + p.change_file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + new = "1.0" + "#, + ); + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(2)) + .run(); + assert_eq!(get_registry_names("src"), ["new-1.0.0"]); + assert_eq!( + get_registry_names("cache"), + ["new-1.0.0.crate", "old-1.0.0.crate"] + ); + + // Run again after the .crate should have aged out. + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + assert_eq!(get_registry_names("src"), ["new-1.0.0"]); + assert_eq!(get_registry_names("cache"), ["new-1.0.0.crate"]); +} + +#[cargo_test] +fn frequency() { + // gc.auto.frequency settings + let p = basic_foo_bar_project(); + p.change_file( + ".cargo/config.toml", + r#" + [gc.auto] + frequency = "never" + "#, + ); + // Populate data in the past. + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + assert_eq!(get_index_names().len(), 1); + assert_eq!(get_registry_names("src"), ["bar-1.0.0"]); + assert_eq!(get_registry_names("cache"), ["bar-1.0.0.crate"]); + + p.change_file("Cargo.toml", &basic_manifest("foo", "0.2.0")); + + // Try after the default expiration time, with "never" it shouldn't gc. + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + assert_eq!(get_index_names().len(), 1); + assert_eq!(get_registry_names("src"), ["bar-1.0.0"]); + assert_eq!(get_registry_names("cache"), ["bar-1.0.0.crate"]); + + // Try again with a setting that allows it to run. + p.cargo("check -Zgc") + .env("CARGO_GC_AUTO_FREQUENCY", "1 day") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + assert_eq!(get_index_names().len(), 0); + assert_eq!(get_registry_names("src").len(), 0); + assert_eq!(get_registry_names("cache").len(), 0); +} + +#[cargo_test] +fn auto_gc_index() { + // Deletes the index if it hasn't been used in a while. + let p = basic_foo_bar_project(); + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + assert_eq!(get_index_names().len(), 1); + + // Make sure it stays within the time frame. + p.change_file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + "#, + ); + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(2)) + .run(); + assert_eq!(get_index_names().len(), 1); + + // After it expires, it should be deleted. + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + assert_eq!(get_index_names().len(), 0); +} + +#[cargo_test] +fn auto_gc_git() { + // auto-gc should delete git checkouts and dbs. + + // Returns the short git name of a a checkout. + let short_id = |repo: &git2::Repository| -> String { + let head = repo.revparse_single("HEAD").unwrap(); + let short_id = head.short_id().unwrap(); + short_id.as_str().unwrap().to_owned() + }; + + // Set up a git dependency and fetch it and populate the database, + // 6 months in the past. + let (git_project, git_repo) = git::new_repo("bar", |p| { + p.file("Cargo.toml", &basic_manifest("bar", "1.0.0")) + .file("src/lib.rs", "") + }); + let p = project() + .file( + "Cargo.toml", + &format!( + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = {{ git = '{}' }} + "#, + git_project.url() + ), + ) + .file("src/lib.rs", "") + .build(); + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(6)) + .run(); + let db_names = get_git_db_names(); + assert_eq!(db_names.len(), 1); + let first_short_oid = short_id(&git_repo); + assert_eq!( + get_git_checkout_names(&db_names[0]), + [first_short_oid.clone()] + ); + + // Use a new git checkout, should keep both. + git_project.change_file("src/lib.rs", "// modified"); + git::add(&git_repo); + git::commit(&git_repo); + p.cargo("update -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(6)) + .run(); + assert_eq!(get_git_db_names().len(), 1); + let second_short_oid = short_id(&git_repo); + let mut both = vec![first_short_oid, second_short_oid.clone()]; + both.sort(); + assert_eq!(get_git_checkout_names(&db_names[0]), both); + + // In the future, using the second checkout should delete the first. + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + assert_eq!(get_git_db_names().len(), 1); + assert_eq!( + get_git_checkout_names(&db_names[0]), + [second_short_oid.clone()] + ); + + // After three months, the db should get deleted. + p.change_file("Cargo.toml", &basic_manifest("foo", "0.2.0")); + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + assert_eq!(get_git_db_names().len(), 0); + assert_eq!(get_git_checkout_names(&db_names[0]).len(), 0); +} + +#[cargo_test] +fn auto_gc_various_commands() { + // Checks that auto gc works with a variety of commands. + // + // Auto-gc is only run on a subset of commands. Generally it is run on + // commands that are already doing a lot of work, or heavily involve the + // use of the registry. + Package::new("bar", "1.0.0").publish(); + let cmds = ["check", "fetch"]; + for cmd in cmds { + eprintln!("checking command {cmd}"); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = "1.0" + "#, + ) + .file("src/lib.rs", "") + .build(); + // Populate the last-use data. + p.cargo(cmd) + .arg("-Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + let config = ConfigBuilder::new().unstable_flag("gc").build(); + let lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let tracker = GlobalCacheTracker::new(&config).unwrap(); + let indexes = tracker.registry_index_all().unwrap(); + assert_eq!(indexes.len(), 1); + let crates = tracker.registry_crate_all().unwrap(); + assert_eq!(crates.len(), 1); + let srcs = tracker.registry_src_all().unwrap(); + assert_eq!(srcs.len(), 1); + drop(lock); + + // After everything is aged out, it should all be deleted. + p.change_file("Cargo.toml", &basic_manifest("foo", "0.2.0")); + p.cargo(cmd) + .arg("-Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + let lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let indexes = tracker.registry_index_all().unwrap(); + assert_eq!(indexes.len(), 0); + let crates = tracker.registry_crate_all().unwrap(); + assert_eq!(crates.len(), 0); + let srcs = tracker.registry_src_all().unwrap(); + assert_eq!(srcs.len(), 0); + drop(tracker); + drop(lock); + paths::home().join(".cargo/registry").rm_rf(); + GlobalCacheTracker::db_path(&config) + .into_path_unlocked() + .rm_rf(); + } +} + +#[cargo_test] +fn updates_last_use_various_commands() { + // Checks that last-use tracking is updated by various commands. + // + // Not *all* commands update the index tracking, even though they + // technically involve reading the index. There isn't a convenient place + // to ensure it gets saved while avoiding saving too often in other + // commands. For the most part, this should be fine, since these commands + // usually aren't run without running one of the commands that does save + // the tracking. Some of the commands are: + // + // - login, owner, yank, search + // - report future-incompatibilities + // - package --no-verify + // - fetch --locked + Package::new("bar", "1.0.0").publish(); + let cmds = [ + // name, expected_crates (0=doesn't download) + ("check", 1), + ("fetch", 1), + ("tree", 1), + ("generate-lockfile", 0), + ("update", 0), + ("metadata", 1), + ("vendor --respect-source-config", 1), + ]; + for (cmd, expected_crates) in cmds { + eprintln!("checking command {cmd}"); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = "1.0" + "#, + ) + .file("src/lib.rs", "") + .build(); + // Populate the last-use data. + p.cargo(cmd) + .arg("-Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + let config = ConfigBuilder::new().unstable_flag("gc").build(); + let lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let tracker = GlobalCacheTracker::new(&config).unwrap(); + let indexes = tracker.registry_index_all().unwrap(); + assert_eq!(indexes.len(), 1); + let crates = tracker.registry_crate_all().unwrap(); + assert_eq!(crates.len(), expected_crates); + let srcs = tracker.registry_src_all().unwrap(); + assert_eq!(srcs.len(), expected_crates); + drop(tracker); + drop(lock); + paths::home().join(".cargo/registry").rm_rf(); + GlobalCacheTracker::db_path(&config) + .into_path_unlocked() + .rm_rf(); + } +} + +#[cargo_test] +fn both_git_and_http_index_cleans() { + // Checks that either the git or http index cache gets cleaned. + let _crates_io = RegistryBuilder::new().build(); + let _alternative = RegistryBuilder::new().alternative().http_index().build(); + Package::new("from_git", "1.0.0").publish(); + Package::new("from_http", "1.0.0") + .alternative(true) + .publish(); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + from_git = "1.0" + from_http = { version = "1.0", registry = "alternative" } + "#, + ) + .file("src/lib.rs", "") + .build(); + + p.cargo("update -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + let config = ConfigBuilder::new().unstable_flag("gc").build(); + let lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let tracker = GlobalCacheTracker::new(&config).unwrap(); + let indexes = tracker.registry_index_all().unwrap(); + assert_eq!(indexes.len(), 2); + assert_eq!(get_index_names().len(), 2); + drop(lock); + + // Running in the future without these indexes should delete them. + p.change_file("Cargo.toml", &basic_manifest("foo", "0.2.0")); + p.cargo("clean gc -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + let lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let indexes = tracker.registry_index_all().unwrap(); + assert_eq!(indexes.len(), 0); + assert_eq!(get_index_names().len(), 0); + drop(lock); +} + +#[cargo_test] +fn clean_gc_dry_run() { + // Basic `clean --gc --dry-run` test. + let p = basic_foo_bar_project(); + // Populate the last-use data. + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + + let registry_root = paths::home().join(".cargo/registry"); + let glob_registry = |name| -> PathBuf { + let mut paths: Vec<_> = glob::glob(registry_root.join(name).join("*").to_str().unwrap()) + .unwrap() + .map(|p| p.unwrap()) + .collect(); + assert_eq!(paths.len(), 1); + paths.pop().unwrap() + }; + let index = glob_registry("index").ls_r(); + let src = glob_registry("src").ls_r(); + let cache = glob_registry("cache").ls_r(); + let expected_files = index + .iter() + .chain(src.iter()) + .chain(cache.iter()) + .map(|p| p.to_str().unwrap()) + .join("\n"); + + p.cargo("clean gc --dry-run -v -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stdout_unordered(&expected_files) + .with_stderr( + "[SUMMARY] [..] files, [..] total\n\ + [WARNING] no files deleted due to --dry-run", + ) + .run(); + + // Again, make sure the information is still tracked. + p.cargo("clean gc --dry-run -v -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stdout_unordered(&expected_files) + .with_stderr( + "[SUMMARY] [..] files, [..] total\n\ + [WARNING] no files deleted due to --dry-run", + ) + .run(); +} + +#[cargo_test] +fn clean_default_gc() { + // `clean gc` without options should also gc + let p = basic_foo_bar_project(); + // Populate the last-use data. + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + p.cargo("clean gc -v -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr_unordered( + "\ +[REMOVING] [ROOT]/home/.cargo/registry/index/[..] +[REMOVING] [ROOT]/home/.cargo/registry/src/[..] +[REMOVING] [ROOT]/home/.cargo/registry/cache/[..] +[REMOVED] [..] files, [..] total +", + ) + .run(); +} + +#[cargo_test] +fn tracks_sizes() { + // Checks that sizes are properly tracked in the db. + Package::new("dep1", "1.0.0") + .file("src/lib.rs", "") + .publish(); + Package::new("dep2", "1.0.0") + .file("src/lib.rs", "") + .file("data", &"abcdefghijklmnopqrstuvwxyz".repeat(1000)) + .publish(); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + dep1 = "1.0" + dep2 = "1.0" + "#, + ) + .file("src/lib.rs", "") + .build(); + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + + // Check that the crate sizes are the same as on disk. + let config = ConfigBuilder::new().unstable_flag("gc").build(); + let _lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let tracker = GlobalCacheTracker::new(&config).unwrap(); + let mut crates = tracker.registry_crate_all().unwrap(); + crates.sort_by(|a, b| a.0.crate_filename.cmp(&b.0.crate_filename)); + let db_sizes: Vec<_> = crates.iter().map(|c| c.0.size).collect(); + + let mut actual: Vec<_> = p + .glob(paths::home().join(".cargo/registry/cache/*/*")) + .map(|p| p.unwrap()) + .collect(); + actual.sort(); + let actual_sizes: Vec<_> = actual + .iter() + .map(|path| std::fs::metadata(path).unwrap().len()) + .collect(); + assert_eq!(db_sizes, actual_sizes); + + // Also check the src sizes are computed. + let mut srcs = tracker.registry_src_all().unwrap(); + srcs.sort_by(|a, b| a.0.package_dir.cmp(&b.0.package_dir)); + let db_sizes: Vec<_> = srcs.iter().map(|c| c.0.size.unwrap()).collect(); + let mut actual: Vec<_> = p + .glob(paths::home().join(".cargo/registry/src/*/*")) + .map(|p| p.unwrap()) + .collect(); + actual.sort(); + // .cargo-ok is not tracked in the size. + actual.iter().for_each(|p| p.join(".cargo-ok").rm_rf()); + let actual_sizes: Vec<_> = actual + .iter() + .map(|path| cargo_util::du(path, &[]).unwrap()) + .collect(); + assert_eq!(db_sizes, actual_sizes); + assert!(db_sizes[1] > 26000); +} + +#[cargo_test] +fn max_size() { + // Checks --max-crate-size and --max-src-size with various cleaning thresholds. + let config = ConfigBuilder::new().unstable_flag("gc").build(); + + let test_crates = [ + // name, age, crate_size, src_size + ("a-1.0.0", 5, 1, 1), + ("b-1.0.0", 6, 2, 2), + ("c-1.0.0", 3, 3, 3), + ("d-1.0.0", 2, 4, 4), + ("e-1.0.0", 2, 5, 5), + ("f-1.0.0", 9, 6, 6), + ("g-1.0.0", 1, 1, 1), + ]; + + // Determine the order things get deleted so they can be verified. + let mut names_by_timestamp: Vec<_> = test_crates + .iter() + .map(|(name, age, _, _)| (days_ago_unix(*age), name)) + .collect(); + names_by_timestamp.sort(); + let names_by_timestamp: Vec<_> = names_by_timestamp + .into_iter() + .map(|(_, name)| name) + .collect(); + + // This exercises the different boundary conditions. + for (clean_size, files, bytes) in [ + (22, 0, 0), + (21, 1, 6), + (16, 1, 6), + (15, 2, 8), + (14, 2, 8), + (13, 3, 9), + (12, 4, 12), + (10, 4, 12), + (9, 5, 16), + (6, 5, 16), + (5, 6, 21), + (1, 6, 21), + (0, 7, 22), + ] { + let (removed, kept) = names_by_timestamp.split_at(files); + // --max-crate-size + let (cache_dir, src_dir) = populate_cache(&config, &test_crates); + let mut stderr = String::new(); + for name in removed { + writeln!(stderr, "[REMOVING] [..]{name}.crate").unwrap(); + } + let total_display = if removed.is_empty() { + String::new() + } else { + format!(", {bytes}B total") + }; + let files_display = if files == 1 { + format!("1 file") + } else { + format!("{files} files") + }; + write!(stderr, "[REMOVED] {files_display}{total_display}").unwrap(); + cargo_process(&format!("clean gc -Zgc -v --max-crate-size={clean_size}")) + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr_unordered(&stderr) + .run(); + for name in kept { + assert!(cache_dir.join(format!("{name}.crate")).exists()); + } + for name in removed { + assert!(!cache_dir.join(format!("{name}.crate")).exists()); + } + + // --max-src-size + populate_cache(&config, &test_crates); + let mut stderr = String::new(); + for name in removed { + writeln!(stderr, "[REMOVING] [..]{name}").unwrap(); + } + let total_display = if files == 0 { + String::new() + } else { + format!(", {bytes}B total") + }; + write!(stderr, "[REMOVED] {files_display}{total_display}").unwrap(); + cargo_process(&format!("clean gc -Zgc -v --max-src-size={clean_size}")) + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr_unordered(&stderr) + .run(); + for name in kept { + assert!(src_dir.join(name).exists()); + } + for name in removed { + assert!(!src_dir.join(name).exists()); + } + } +} + +#[cargo_test] +fn max_size_untracked_crate() { + // When a .crate file exists from an older version of cargo that did not + // track sizes, `clean --max-crate-size` should populate the db with the + // sizes. + let config = ConfigBuilder::new().unstable_flag("gc").build(); + let cache = paths::home().join(".cargo/registry/cache/example.com-a6c4a5adcb232b9a"); + cache.mkdir_p(); + paths::home() + .join(".cargo/registry/index/example.com-a6c4a5adcb232b9a") + .mkdir_p(); + // Create the `.crate files. + let test_crates = [ + // name, size + ("a-1.0.0.crate", 1234), + ("b-1.0.0.crate", 42), + ("c-1.0.0.crate", 0), + ]; + for (name, size) in test_crates { + std::fs::write(cache.join(name), "x".repeat(size as usize)).unwrap() + } + // This should scan the directory and populate the db with the size information. + cargo_process("clean gc -Zgc -v --max-crate-size=100000") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr("[REMOVED] 0 files") + .run(); + // Check that it stored the size data. + let _lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let tracker = GlobalCacheTracker::new(&config).unwrap(); + let crates = tracker.registry_crate_all().unwrap(); + let mut actual: Vec<_> = crates + .iter() + .map(|(rc, _time)| (rc.crate_filename.as_str(), rc.size)) + .collect(); + actual.sort(); + assert_eq!(test_crates, actual.as_slice()); +} + +/// Helper to prepare the max-size test. +fn max_size_untracked_prepare() -> (Config, Project) { + // First, publish and download a dependency. + let p = basic_foo_bar_project(); + p.cargo("fetch").run(); + // Pretend it was an older version that did not track last-use. + let config = ConfigBuilder::new().unstable_flag("gc").build(); + GlobalCacheTracker::db_path(&config) + .into_path_unlocked() + .rm_rf(); + (config, p) +} + +/// Helper to verify the max-size test. +fn max_size_untracked_verify(config: &Config) { + let actual: Vec<_> = glob::glob( + paths::home() + .join(".cargo/registry/src/*/*") + .to_str() + .unwrap(), + ) + .unwrap() + .map(|p| p.unwrap()) + .collect(); + assert_eq!(actual.len(), 1); + let actual_size = cargo_util::du(&actual[0], &[]).unwrap(); + let lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let tracker = GlobalCacheTracker::new(&config).unwrap(); + let srcs = tracker.registry_src_all().unwrap(); + assert_eq!(srcs.len(), 1); + assert_eq!(srcs[0].0.size, Some(actual_size)); + drop(lock); +} + +#[cargo_test] +fn max_size_untracked_src_from_use() { + // When a src directory exists from an older version of cargo that did not + // track sizes, doing a build should populate the db with an entry with an + // unknown size. `clean --max-src-size` should then fix the size. + let (config, p) = max_size_untracked_prepare(); + + // Run a command that will update the db with an unknown src size. + p.cargo("tree -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + // Check that it is None. + let lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let tracker = GlobalCacheTracker::new(&config).unwrap(); + let srcs = tracker.registry_src_all().unwrap(); + assert_eq!(srcs.len(), 1); + assert_eq!(srcs[0].0.size, None); + drop(lock); + + // Fix the size. + p.cargo("clean gc -v --max-src-size=10000 -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr("[REMOVED] 0 files") + .run(); + max_size_untracked_verify(&config); +} + +#[cargo_test] +fn max_size_untracked_src_from_clean() { + // When a src directory exists from an older version of cargo that did not + // track sizes, `clean --max-src-size` should populate the db with the + // sizes. + let (config, p) = max_size_untracked_prepare(); + + // Clean should scan the src and update the db. + p.cargo("clean gc -v --max-src-size=10000 -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr("[REMOVED] 0 files") + .run(); + max_size_untracked_verify(&config); +} + +#[cargo_test] +fn max_download_size() { + // --max-download-size + // + // This creates some sample crates of specific sizes, and then tries + // deleting at various specific size thresholds that exercise different + // edge conditions. + let config = ConfigBuilder::new().unstable_flag("gc").build(); + + let test_crates = [ + // name, age, crate_size, src_size + ("d-1.0.0", 4, 4, 5), + ("c-1.0.0", 3, 3, 3), + ("a-1.0.0", 1, 2, 5), + ("b-1.0.0", 1, 1, 7), + ]; + + for (max_size, num_deleted, files_deleted, bytes) in [ + (30, 0, 0, 0), + (29, 1, 1, 5), + (24, 2, 2, 9), + (20, 3, 3, 12), + (1, 7, 7, 29), + (0, 8, 8, 30), + ] { + populate_cache(&config, &test_crates); + // Determine the order things will be deleted. + let delete_order: Vec = test_crates + .iter() + .flat_map(|(name, _, _, _)| [name.to_string(), format!("{name}.crate")]) + .collect(); + let (removed, _kept) = delete_order.split_at(num_deleted); + let mut stderr = String::new(); + for name in removed { + writeln!(stderr, "[REMOVING] [..]{name}").unwrap(); + } + let files_display = if files_deleted == 1 { + format!("1 file") + } else { + format!("{files_deleted} files") + }; + let total_display = if removed.is_empty() { + String::new() + } else { + format!(", {bytes}B total") + }; + write!(stderr, "[REMOVED] {files_display}{total_display}",).unwrap(); + cargo_process(&format!("clean gc -Zgc -v --max-download-size={max_size}")) + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr_unordered(&stderr) + .run(); + } +} + +#[cargo_test] +fn package_cache_lock_during_build() { + // Verifies that a shared lock is held during a build. Resolution and + // downloads should be OK while that is held, but mutation should block. + // + // This works by launching a build with a build script that will pause. + // Then it performs other cargo commands and verifies their behavior. + Package::new("bar", "1.0.0").publish(); + let p_foo = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = "1.0" + "#, + ) + .file("src/lib.rs", "") + .file( + "build.rs", + r#" + fn main() { + std::fs::write("blocking", "").unwrap(); + let path = std::path::Path::new("ready"); + loop { + if path.exists() { + break; + } else { + std::thread::sleep(std::time::Duration::from_millis(100)) + } + } + } + "#, + ) + .build(); + let p_foo2 = project() + .at("foo2") + .file( + "Cargo.toml", + r#" + [package] + name = "foo2" + version = "0.1.0" + + [dependencies] + bar = "1.0" + "#, + ) + .file("src/lib.rs", "") + .build(); + + // Start a build that will pause once the build starts. + let mut foo_child = p_foo + .cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .build_command() + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + // Wait for it to enter build script. + retry(100, || p_foo.root().join("blocking").exists().then_some(())); + + // Start a build with a different target directory. It should not block, + // even though it gets a download lock, and then a shared lock. + // + // Also verify that auto-gc gets disabled. + p_foo2 + .cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("CARGO_GC_AUTO_FREQUENCY", "always") + .env("CARGO_LOG", "gc=debug") + .with_stderr_contains("[UPDATING] `dummy-registry` index") + .with_stderr_contains("[CHECKING] bar v1.0.0") + .with_stderr_contains("[CHECKING] foo2 v0.1.0 [..]") + .with_stderr_contains("[FINISHED] [..]") + .with_stderr_contains("[..]unable to acquire mutate lock, auto gc disabled") + .run(); + + // Ensure that the first build really blocked. + assert!(matches!(foo_child.try_wait(), Ok(None))); + + // Cleaning while a command is running should block. + let mut clean_cmd = p_foo2 + .cargo("clean gc --max-download-size=0 -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .build_command(); + clean_cmd.stderr(Stdio::piped()); + let mut clean_child = clean_cmd.spawn().unwrap(); + + // Give the clean command a chance to finish (it shouldn't). + sleep_ms(500); + // They should both still be running. + assert!(matches!(foo_child.try_wait(), Ok(None))); + assert!(matches!(clean_child.try_wait(), Ok(None))); + + // Let the original build finish. + p_foo.change_file("ready", ""); + + // Wait for clean to finish. + let thread = std::thread::spawn(|| clean_child.wait_with_output().unwrap()); + let output = thread_wait_timeout(100, thread); + assert!(output.status.success()); + // Validate the output of the clean. + execs() + .with_stderr( + "\ +[BLOCKING] waiting for file lock on package cache mutation +[REMOVED] [..] +", + ) + .run_output(&output); +} + +#[cargo_test] +fn read_only_locking_auto_gc() { + // Tests the behavior for auto-gc on a read-only directory. + let p = basic_foo_bar_project(); + // Populate cache. + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + let cargo_home = paths::home().join(".cargo"); + let mut perms = std::fs::metadata(&cargo_home).unwrap().permissions(); + // Test when it can't update auto-gc db. + perms.set_readonly(true); + std::fs::set_permissions(&cargo_home, perms.clone()).unwrap(); + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[CHECKING] bar v1.0.0 +[CHECKING] foo v0.1.0 [..] +[FINISHED] [..] +", + ) + .run(); + // Try again without the last-use existing (such as if the cache was + // populated by an older version of cargo). + perms.set_readonly(false); + std::fs::set_permissions(&cargo_home, perms.clone()).unwrap(); + let config = ConfigBuilder::new().build(); + GlobalCacheTracker::db_path(&config) + .into_path_unlocked() + .rm_rf(); + perms.set_readonly(true); + std::fs::set_permissions(&cargo_home, perms.clone()).unwrap(); + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr("[FINISHED] [..]") + .run(); + perms.set_readonly(false); + std::fs::set_permissions(&cargo_home, perms).unwrap(); +} + +#[cargo_test] +fn delete_index_also_deletes_crates() { + // Checks that when an index is delete that src and cache directories also get deleted. + let p = basic_foo_bar_project(); + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + + assert_eq!(get_registry_names("src"), ["bar-1.0.0"]); + assert_eq!(get_registry_names("cache"), ["bar-1.0.0.crate"]); + + p.cargo("clean gc") + .arg("--max-index-age=0 days") + .arg("-Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr("[REMOVED] [..]") + .run(); + + assert_eq!(get_registry_names("src").len(), 0); + assert_eq!(get_registry_names("cache").len(), 0); +} + +#[cargo_test] +fn clean_syncs_missing_files() { + // When files go missing in the cache, clean operations that need to track + // the size should also remove them from the database. + Package::new("bar", "1.0.0").publish(); + Package::new("baz", "1.0.0").publish(); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = "1.0" + baz = "1.0" + "#, + ) + .file("src/lib.rs", "") + .build(); + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + + // Verify things are tracked. + let config = ConfigBuilder::new().unstable_flag("gc").build(); + let lock = config + .acquire_package_cache_lock(CacheLockMode::MutateExclusive) + .unwrap(); + let tracker = GlobalCacheTracker::new(&config).unwrap(); + let crates = tracker.registry_crate_all().unwrap(); + assert_eq!(crates.len(), 2); + let srcs = tracker.registry_src_all().unwrap(); + assert_eq!(srcs.len(), 2); + drop(lock); + + // Remove the files. + for pattern in [ + ".cargo/registry/cache/*/bar-1.0.0.crate", + ".cargo/registry/src/*/bar-1.0.0", + ] { + p.glob(paths::home().join(pattern)) + .map(|p| p.unwrap()) + .next() + .unwrap() + .rm_rf(); + } + + // Clean should update the db. + p.cargo("clean gc -v --max-download-size=1GB -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr("[REMOVED] 0 files") + .run(); + + // Verify + let crates = tracker.registry_crate_all().unwrap(); + assert_eq!(crates.len(), 1); + let srcs = tracker.registry_src_all().unwrap(); + assert_eq!(srcs.len(), 1); +} + +#[cargo_test] +fn offline_doesnt_auto_gc() { + // When running offline, auto-gc shouldn't run. + let p = basic_foo_bar_project(); + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + // Remove the dependency. + p.change_file("Cargo.toml", &basic_manifest("foo", "0.1.0")); + // Run offline, make sure it doesn't delete anything + p.cargo("check --offline -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr("[CHECKING] foo v0.1.0[..]\n[FINISHED][..]") + .run(); + assert_eq!(get_registry_names("src"), ["bar-1.0.0"]); + assert_eq!(get_registry_names("cache"), ["bar-1.0.0.crate"]); + // Run online, make sure auto-gc runs. + p.cargo("check -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr("[FINISHED][..]") + .run(); + assert_eq!(get_registry_names("src"), &[] as &[String]); + assert_eq!(get_registry_names("cache"), &[] as &[String]); +} + +#[cargo_test] +fn can_handle_future_schema() -> anyhow::Result<()> { + // It should work when a future version of cargo has made schema changes + // to the database. + let p = basic_foo_bar_project(); + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4)) + .run(); + // Modify the schema to pretend this is done by a future version of cargo. + let config = ConfigBuilder::new().build(); + let db_path = GlobalCacheTracker::db_path(&config).into_path_unlocked(); + let conn = rusqlite::Connection::open(&db_path)?; + let user_version: u32 = + conn.query_row("SELECT user_version FROM pragma_user_version", [], |row| { + row.get(0) + })?; + conn.execute("ALTER TABLE global_data ADD COLUMN foo DEFAULT 123", [])?; + conn.pragma_update(None, "user_version", &(user_version + 1))?; + drop(conn); + // Verify it doesn't blow up. + p.cargo("clean gc --max-download-size=0 -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr("[REMOVED] 4 files, [..] total") + .run(); + Ok(()) +} + +#[cargo_test] +fn clean_max_git_age() { + // --max-git-*-age flags + let (git_a, git_a_repo) = git::new_repo("git_a", |p| { + p.file("Cargo.toml", &basic_manifest("git_a", "1.0.0")) + .file("src/lib.rs", "") + }); + let p = project() + .file( + "Cargo.toml", + &format!( + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + git_a = {{ git = '{}' }} + "#, + git_a.url() + ), + ) + .file("src/lib.rs", "") + .build(); + // Populate last-use tracking. + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(4)) + .run(); + // Update git_a to create a separate checkout. + git_a.change_file("src/lib.rs", "// test"); + git::add(&git_a_repo); + git::commit(&git_a_repo); + // Update last-use tracking, where the first git checkout will stay "old". + p.cargo("update -p git_a -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(2)) + .with_stderr( + "\ +[UPDATING] git repository [..] +[UPDATING] git_a v1.0.0 [..] +", + ) + .run(); + + let db_names = get_git_db_names(); + assert_eq!(db_names.len(), 1); + let db_name = &db_names[0]; + let co_names = get_git_checkout_names(&db_name); + assert_eq!(co_names.len(), 2); + + // Delete the first checkout + p.cargo("clean gc -v -Zgc") + .arg("--max-git-co-age=3 days") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [ROOT]/home/.cargo/git/checkouts/git_a-[..]/[..] +[REMOVED] [..] +", + ) + .run(); + + let db_names = get_git_db_names(); + assert_eq!(db_names.len(), 1); + let co_names = get_git_checkout_names(&db_name); + assert_eq!(co_names.len(), 1); + + // delete the second checkout + p.cargo("clean gc -v -Zgc") + .arg("--max-git-co-age=0 days") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [ROOT]/home/.cargo/git/checkouts/git_a-[..]/[..] +[REMOVED] [..] +", + ) + .run(); + + let db_names = get_git_db_names(); + assert_eq!(db_names.len(), 1); + let co_names = get_git_checkout_names(&db_name); + assert_eq!(co_names.len(), 0); + + // delete the db + p.cargo("clean gc -v -Zgc") + .arg("--max-git-db-age=1 days") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [ROOT]/home/.cargo/git/db/git_a-[..] +[REMOVING] [ROOT]/home/.cargo/git/checkouts/git_a-[..] +[REMOVED] [..] +", + ) + .run(); + + let db_names = get_git_db_names(); + assert_eq!(db_names.len(), 0); + let co_names = get_git_checkout_names(&db_name); + assert_eq!(co_names.len(), 0); +} + +#[cargo_test] +fn clean_max_src_crate_age() { + // --max-src-age and --max-crate-age flags + let p = basic_foo_bar_project(); + // Populate last-use tracking. + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(4)) + .run(); + // Update bar to create a separate copy with a different timestamp. + Package::new("bar", "1.0.1").publish(); + p.cargo("update -p bar -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(2)) + .with_stderr( + "\ +[UPDATING] `dummy-registry` index +[UPDATING] bar v1.0.0 -> v1.0.1 +", + ) + .run(); + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(2)) + .with_stderr( + "\ +[DOWNLOADING] crates ... +[DOWNLOADED] bar v1.0.1 [..] +", + ) + .run(); + + assert_eq!(get_registry_names("src"), ["bar-1.0.0", "bar-1.0.1"]); + assert_eq!( + get_registry_names("cache"), + ["bar-1.0.0.crate", "bar-1.0.1.crate"] + ); + + // Delete the old src. + p.cargo("clean gc -v -Zgc") + .arg("--max-src-age=3 days") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [..]/bar-1.0.0 +[REMOVED] [..] +", + ) + .run(); + + // delete the second src + p.cargo("clean gc -v -Zgc") + .arg("--max-src-age=0 days") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [..]/bar-1.0.1 +[REMOVED] [..] +", + ) + .run(); + + // delete the old crate + p.cargo("clean gc -v -Zgc") + .arg("--max-crate-age=3 days") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [..]/bar-1.0.0.crate +[REMOVED] [..] +", + ) + .run(); + + // delete the seecond crate + p.cargo("clean gc -v -Zgc") + .arg("--max-crate-age=0 days") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [..]/bar-1.0.1.crate +[REMOVED] [..] +", + ) + .run(); +} + +#[cargo_test] +fn clean_max_git_size() { + // clean --max-git-size + // + // Creates two checkouts. The sets a size threshold to delete one. And + // then with 0 max size to delete everything. + let (git_project, git_repo) = git::new_repo("bar", |p| { + p.file("Cargo.toml", &basic_manifest("bar", "1.0.0")) + .file("src/lib.rs", "") + }); + let p = project() + .file( + "Cargo.toml", + &format!( + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = {{ git = '{}' }} + "#, + git_project.url() + ), + ) + .file("src/lib.rs", "") + .build(); + // Fetch and populate db. + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(3)) + .run(); + + // Figure out the name of the first checkout. + let git_root = paths::home().join(".cargo/git"); + let db_names = get_git_db_names(); + assert_eq!(db_names.len(), 1); + let db_name = &db_names[0]; + let co_names = get_git_checkout_names(&db_name); + assert_eq!(co_names.len(), 1); + let first_co_name = &co_names[0]; + + // Make an update and create a new checkout. + git_project.change_file("src/lib.rs", "// modified"); + git::add(&git_repo); + git::commit(&git_repo); + p.cargo("update -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + // Use a different time so that the first checkout timestamp is less + // than the second. + .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(2)) + .run(); + + // Figure out the threshold to use. + let mut co_names = get_git_checkout_names(&db_name); + assert_eq!(co_names.len(), 2); + co_names.retain(|name| name != first_co_name); + assert_eq!(co_names.len(), 1); + let second_co_name = &co_names[0]; + let second_co_path = git_root + .join("checkouts") + .join(db_name) + .join(second_co_name); + let second_co_size = cargo_util::du(&second_co_path, &["!.git"]).unwrap(); + + let db_size = cargo_util::du(&git_root.join("db").join(db_name), &[]).unwrap(); + + let threshold = db_size + second_co_size; + + p.cargo(&format!("clean gc --max-git-size={threshold} -Zgc -v")) + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr(&format!( + "\ +[REMOVING] [ROOT]/home/.cargo/git/checkouts/{db_name}/{first_co_name} +[REMOVED] [..] +" + )) + .run(); + + // And then try cleaning everything. + p.cargo("clean gc --max-git-size=0 -Zgc -v") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr_unordered(&format!( + "\ +[REMOVING] [ROOT]/home/.cargo/git/checkouts/{db_name}/{second_co_name} +[REMOVING] [ROOT]/home/.cargo/git/db/{db_name} +[REMOVED] [..] +" + )) + .run(); +} + +// Helper for setting up fake git sizes for git size cleaning. +fn setup_fake_git_sizes(db_name: &str, db_size: usize, co_sizes: &[usize]) { + let base_git = paths::home().join(".cargo/git"); + let db_path = base_git.join("db").join(db_name); + db_path.mkdir_p(); + std::fs::write(db_path.join("test"), "x".repeat(db_size)).unwrap(); + let base_co = base_git.join("checkouts").join(db_name); + for (i, size) in co_sizes.iter().enumerate() { + let co_name = format!("co{i}"); + let co_path = base_co.join(co_name); + co_path.mkdir_p(); + std::fs::write(co_path.join("test"), "x".repeat(*size)).unwrap(); + } +} + +#[cargo_test] +fn clean_max_git_size_untracked() { + // If there are git directories that aren't tracked in the database, + // `--max-git-size` should pick it up. + // + // The db_name of "example" depends on the sorting order of the names ("e" + // should be after "c"), so that the db comes after the checkouts. + setup_fake_git_sizes("example", 5000, &[1000, 2000]); + cargo_process(&format!("clean gc -Zgc -v --max-git-size=7000")) + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [ROOT]/home/.cargo/git/checkouts/example/co0 +[REMOVED] [..] +", + ) + .run(); + cargo_process(&format!("clean gc -Zgc -v --max-git-size=5000")) + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [ROOT]/home/.cargo/git/checkouts/example/co1 +[REMOVED] [..] +", + ) + .run(); + cargo_process(&format!("clean gc -Zgc -v --max-git-size=0")) + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [ROOT]/home/.cargo/git/db/example +[REMOVED] [..] +", + ) + .run(); +} + +#[cargo_test] +fn clean_max_git_size_deletes_co_from_db() { + // In the scenario where it thinks it needs to delete the db, it should + // also delete all the checkouts. + // + // The db_name of "abc" depends on the sorting order of the names ("a" + // should be before "c"), so that the db comes before the checkouts. + setup_fake_git_sizes("abc", 5000, &[1000, 2000]); + // This deletes everything because it tries to delete the db, which then + // deletes all checkouts. + cargo_process(&format!("clean gc -Zgc -v --max-git-size=3000")) + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [ROOT]/home/.cargo/git/db/abc +[REMOVING] [ROOT]/home/.cargo/git/checkouts/abc/co1 +[REMOVING] [ROOT]/home/.cargo/git/checkouts/abc/co0 +[REMOVED] [..] +", + ) + .run(); +} + +#[cargo_test] +fn handles_missing_index() { + // Checks behavior when index is missing. + let p = basic_foo_bar_project(); + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + paths::home().join(".cargo/registry/index").rm_rf(); + cargo_process("clean gc -v --max-download-size=0 -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr_unordered( + "\ +[REMOVING] [ROOT]/home/.cargo/registry/cache/[..] +[REMOVING] [ROOT]/home/.cargo/registry/src/[..] +[REMOVED] [..] +", + ) + .run(); +} + +#[cargo_test] +fn handles_missing_git_db() { + // Checks behavior when git db is missing. + let git_project = git::new("bar", |p| { + p.file("Cargo.toml", &basic_manifest("bar", "1.0.0")) + .file("src/lib.rs", "") + }); + let p = project() + .file( + "Cargo.toml", + &format!( + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = {{ git = '{}' }} + "#, + git_project.url() + ), + ) + .file("src/lib.rs", "") + .build(); + p.cargo("fetch -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .run(); + paths::home().join(".cargo/git/db").rm_rf(); + cargo_process("clean gc -v --max-git-size=0 -Zgc") + .masquerade_as_nightly_cargo(&["gc"]) + .with_stderr( + "\ +[REMOVING] [ROOT]/home/.cargo/git/checkouts/[..] +[REMOVED] [..] +", + ) + .run(); +} diff --git a/tests/testsuite/main.rs b/tests/testsuite/main.rs index 07f749e3438..e2e46c400a4 100644 --- a/tests/testsuite/main.rs +++ b/tests/testsuite/main.rs @@ -98,6 +98,7 @@ mod git_auth; mod git_gc; mod git_shallow; mod glob_targets; +mod global_cache_tracker; mod help; mod https; mod inheritable_workspace_fields;