Skip to content

Commit

Permalink
Add backoff for transient Windows failures (#2419)
Browse files Browse the repository at this point in the history
## Summary

This may be required elsewhere, but all the traces in that issue are
related to persisting the temporary directory to our persistent cache,
so lets start there.

See: #1491.
  • Loading branch information
charliermarsh authored Mar 13, 2024
1 parent 94f94ba commit d9b160b
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 9 deletions.
15 changes: 15 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ async-trait = { version = "0.1.77" }
async-recursion = { version = "1.0.5" }
async_http_range_reader = { version = "0.7.0" }
async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "d76801da0943de985254fc6255c0e476b57c5836", features = ["deflate"] }
backoff = { version = "0.4.0" }
base64 = { version = "0.21.7" }
cachedir = { version = "0.3.1" }
cargo-util = { version = "0.2.8" }
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ impl Cache {
}

/// Persist a temporary directory to the artifact store.
pub fn persist(
pub async fn persist(
&self,
temp_dir: impl AsRef<Path>,
path: impl AsRef<Path>,
Expand All @@ -218,7 +218,7 @@ impl Cache {
// Move the temporary directory into the directory store.
let archive_entry = self.entry(CacheBucket::Archive, "", id);
fs_err::create_dir_all(archive_entry.dir())?;
fs_err::rename(temp_dir.as_ref(), archive_entry.path())?;
uv_fs::rename_with_retry(temp_dir.as_ref(), archive_entry.path()).await?;

// Create a symlink to the directory store.
fs_err::create_dir_all(path.as_ref().parent().expect("Cache entry to have parent"))?;
Expand Down
2 changes: 2 additions & 0 deletions crates/uv-distribution/src/distribution_database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
let archive = self
.cache
.persist(temp_dir.into_path(), wheel_entry.path())
.await
.map_err(Error::CacheRead)?;
Ok(archive)
}
Expand Down Expand Up @@ -532,6 +533,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
let archive = self
.cache
.persist(temp_dir.into_path(), wheel_entry.path())
.await
.map_err(Error::CacheRead)?;
Ok(archive)
}
Expand Down
3 changes: 2 additions & 1 deletion crates/uv-fs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ workspace = true
[dependencies]
uv-warnings = { path = "../uv-warnings" }

backoff = { workspace = true }
dunce = { workspace = true }
encoding_rs_io = { workspace = true }
fs-err = { workspace = true }
Expand All @@ -27,4 +28,4 @@ urlencoding = { workspace = true }

[features]
default = []
tokio = ["fs-err/tokio", "dep:tokio"]
tokio = ["dep:tokio", "fs-err/tokio", "backoff/tokio"]
41 changes: 41 additions & 0 deletions crates/uv-fs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,47 @@ pub fn force_remove_all(path: impl AsRef<Path>) -> Result<bool, std::io::Error>
Ok(true)
}

/// Rename a file, retrying (on Windows) if it fails due to transient operating system errors.
#[cfg(feature = "tokio")]
pub async fn rename_with_retry(
from: impl AsRef<Path>,
to: impl AsRef<Path>,
) -> Result<(), std::io::Error> {
if cfg!(windows) {
// On Windows, antivirus software can lock files temporarily, making them inaccessible.
// This is most common for DLLs, and the common suggestion is to retry the operation with
// some backoff.
//
// See: <https://github.com/astral-sh/uv/issues/1491>
let from = from.as_ref();
let to = to.as_ref();

let backoff = backoff::ExponentialBackoffBuilder::default()
.with_initial_interval(std::time::Duration::from_millis(10))
.with_max_elapsed_time(Some(std::time::Duration::from_secs(10)))
.build();

backoff::future::retry(backoff, || async move {
match fs_err::rename(from, to) {
Ok(()) => Ok(()),
Err(err) if err.kind() == std::io::ErrorKind::PermissionDenied => {
warn!(
"Retrying rename from {} to {} due to transient error: {}",
from.display(),
to.display(),
err
);
Err(backoff::Error::transient(err))
}
Err(err) => Err(backoff::Error::permanent(err)),
}
})
.await
} else {
fs_err::tokio::rename(from, to).await
}
}

/// Iterate over the subdirectories of a directory.
///
/// If the directory does not exist, returns an empty iterator.
Expand Down
20 changes: 14 additions & 6 deletions crates/uv-installer/src/downloader.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use std::cmp::Reverse;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::sync::Arc;

use futures::{FutureExt, Stream, StreamExt, TryFutureExt, TryStreamExt};
use tempfile::TempDir;
use tokio::task::JoinError;
use tracing::instrument;
use url::Url;
Expand All @@ -27,6 +28,8 @@ pub enum Error {
Join(#[from] JoinError),
#[error(transparent)]
Editable(#[from] uv_distribution::Error),
#[error("Failed to write to the client cache")]
CacheWrite(#[source] std::io::Error),
#[error("Unzip failed in another thread: {0}")]
Thread(String),
}
Expand Down Expand Up @@ -197,21 +200,26 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> {
}

// Unzip the wheel.
let archive = tokio::task::spawn_blocking({
let temp_dir = tokio::task::spawn_blocking({
let download = download.clone();
let cache = self.cache.clone();
move || -> Result<PathBuf, uv_extract::Error> {
move || -> Result<TempDir, uv_extract::Error> {
// Unzip the wheel into a temporary directory.
let temp_dir = tempfile::tempdir_in(cache.root())?;
download.unzip(temp_dir.path())?;

// Persist the temporary directory to the directory store.
Ok(cache.persist(temp_dir.into_path(), download.target())?)
Ok(temp_dir)
}
})
.await?
.map_err(|err| Error::Unzip(download.remote().clone(), err))?;

// Persist the temporary directory to the directory store.
let archive = self
.cache
.persist(temp_dir.into_path(), download.target())
.map_err(Error::CacheWrite)
.await?;

Ok(download.into_cached_dist(archive))
}
}
Expand Down

0 comments on commit d9b160b

Please sign in to comment.