From 869642bb0984ef31e3b8913d1917b136aba1d92a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 15 Jun 2020 13:36:23 -0700 Subject: [PATCH 1/3] Cut down on data fetch from git dependencies Currently Cargo pretty heavily over-approximates data fetch for git dependencies. For the index it fetches precisely one branch, but for all other git dependencies Cargo will fetch all branches and all tags all the time. In each of these situations, however, Cargo knows if one branch is desired or if only one tag is desired. This commit updates Cargo's fetching logic to plumb the desired `GitReference` all the way down to `fetch`. In that one location we then determine what to fetch. Namely if a branch or tag is explicitly selected then we only fetch that one reference from the remote, cutting down on the amount of traffic to the git remote. Additionally a bugfix included here is that the GitHub fast path for checking if a repository is up-to-date now works for non-`master`-based branch dependencies. --- src/cargo/sources/git/utils.rs | 232 ++++++++++++++++----------- src/cargo/sources/registry/remote.rs | 14 +- 2 files changed, 146 insertions(+), 100 deletions(-) diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index f8190a2ddae..cd812c8982e 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -3,7 +3,8 @@ use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::paths; use crate::util::process_builder::process; use crate::util::{network, Config, IntoUrl, Progress}; -use curl::easy::{Easy, List}; +use anyhow::anyhow; +use curl::easy::List; use git2::{self, ErrorClass, ObjectType}; use log::{debug, info}; use serde::ser; @@ -15,7 +16,7 @@ use std::process::Command; use url::Url; #[derive(PartialEq, Clone, Debug)] -pub struct GitRevision(git2::Oid); +pub struct GitRevision(pub git2::Oid); impl ser::Serialize for GitRevision { fn serialize(&self, s: S) -> Result { @@ -131,7 +132,7 @@ impl GitRemote { let mut repo_and_rev = None; if let Ok(mut repo) = git2::Repository::open(into) { - self.fetch_into(&mut repo, cargo_config) + self.fetch_into(&mut repo, reference, cargo_config) .map_err(|e| format_error(e, "fetch"))?; if let Ok(rev) = reference.resolve(&repo) { repo_and_rev = Some((repo, rev)); @@ -141,7 +142,7 @@ impl GitRemote { Some(pair) => pair, None => { let repo = self - .clone_into(into, cargo_config) + .clone_into(into, reference, cargo_config) .map_err(|e| format_error(e, "clone"))?; let rev = reference.resolve(&repo)?; (repo, rev) @@ -167,24 +168,27 @@ impl GitRemote { }) } - fn fetch_into(&self, dst: &mut git2::Repository, cargo_config: &Config) -> CargoResult<()> { - // Create a local anonymous remote in the repository to fetch the url - let refspec = "refs/heads/*:refs/heads/*"; - fetch(dst, self.url.as_str(), refspec, cargo_config) + fn fetch_into( + &self, + dst: &mut git2::Repository, + reference: &GitReference, + cargo_config: &Config, + ) -> CargoResult<()> { + fetch(dst, self.url.as_str(), reference, cargo_config) } - fn clone_into(&self, dst: &Path, cargo_config: &Config) -> CargoResult { + fn clone_into( + &self, + dst: &Path, + reference: &GitReference, + cargo_config: &Config, + ) -> CargoResult { if dst.exists() { paths::remove_dir_all(dst)?; } paths::create_dir_all(dst)?; let mut repo = init(dst, true)?; - fetch( - &mut repo, - self.url.as_str(), - "refs/heads/*:refs/heads/*", - cargo_config, - )?; + fetch(&mut repo, self.url.as_str(), reference, cargo_config)?; Ok(repo) } } @@ -200,15 +204,19 @@ impl GitDatabase { if let Ok(repo) = git2::Repository::open(dest) { let mut co = GitCheckout::new(dest, self, rev.clone(), repo); if !co.is_fresh() { - // After a successful fetch operation do a sanity check to - // ensure we've got the object in our database to reset to. This - // can fail sometimes for corrupt repositories where the fetch - // operation succeeds but the object isn't actually there. + // After a successful fetch operation the subsequent reset can + // fail sometimes for corrupt repositories where the fetch + // operation succeeds but the object isn't actually there in one + // way or another. In these situations just skip the error and + // try blowing away the whole repository and trying with a + // clone. co.fetch(cargo_config)?; - if co.has_object() { - co.reset(cargo_config)?; - assert!(co.is_fresh()); - checkout = Some(co); + match co.reset(cargo_config) { + Ok(()) => { + assert!(co.is_fresh()); + checkout = Some(co); + } + Err(e) => debug!("failed reset after fetch {:?}", e), } } else { checkout = Some(co); @@ -226,33 +234,34 @@ impl GitDatabase { let obj = self.repo.find_object(revision.0, None)?; Ok(GitShortID(obj.short_id()?)) } - - pub fn has_ref(&self, reference: &str) -> CargoResult<()> { - self.repo.revparse_single(reference)?; - Ok(()) - } } impl GitReference { - fn resolve(&self, repo: &git2::Repository) -> CargoResult { - let id = match *self { - GitReference::Tag(ref s) => (|| -> CargoResult { - let refname = format!("refs/tags/{}", s); + pub fn resolve(&self, repo: &git2::Repository) -> CargoResult { + let id = match self { + // Note that we resolve the named tag here in sync with where it's + // fetched into via `fetch` below. + GitReference::Tag(s) => (|| -> CargoResult { + let refname = format!("refs/remotes/origin/tags/{}", s); let id = repo.refname_to_id(&refname)?; let obj = repo.find_object(id, None)?; let obj = obj.peel(ObjectType::Commit)?; Ok(obj.id()) })() .chain_err(|| format!("failed to find tag `{}`", s))?, - GitReference::Branch(ref s) => { + + // Resolve the remote name since that's all we're configuring in + // `fetch` below. + GitReference::Branch(s) => { + let name = format!("origin/{}", s); let b = repo - .find_branch(s, git2::BranchType::Local) + .find_branch(&name, git2::BranchType::Remote) .chain_err(|| format!("failed to find branch `{}`", s))?; b.get() .target() .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))? } - GitReference::Rev(ref s) => { + GitReference::Rev(s) => { let obj = repo.revparse_single(s)?; match obj.as_tag() { Some(tag) => tag.target_id(), @@ -314,7 +323,6 @@ impl<'a> GitCheckout<'a> { .clone_local(git2::build::CloneLocal::Local) .with_checkout(checkout) .fetch_options(fopts) - // .remote_create(|repo, _name, url| repo.remote_anonymous(url)) .clone(url.as_str(), into)?; repo = Some(r); Ok(()) @@ -339,15 +347,11 @@ impl<'a> GitCheckout<'a> { fn fetch(&mut self, cargo_config: &Config) -> CargoResult<()> { info!("fetch {}", self.repo.path().display()); let url = self.database.path.into_url()?; - let refspec = "refs/heads/*:refs/heads/*"; - fetch(&mut self.repo, url.as_str(), refspec, cargo_config)?; + let reference = GitReference::Rev(self.revision.to_string()); + fetch(&mut self.repo, url.as_str(), &reference, cargo_config)?; Ok(()) } - fn has_object(&self) -> bool { - self.repo.find_object(self.revision.0, None).is_ok() - } - fn reset(&self, config: &Config) -> CargoResult<()> { // If we're interrupted while performing this reset (e.g., we die because // of a signal) Cargo needs to be sure to try to check out this repo @@ -422,11 +426,11 @@ impl<'a> GitCheckout<'a> { } }; // Fetch data from origin and reset to the head commit - let refspec = "refs/heads/*:refs/heads/*"; + let reference = GitReference::Rev(head.to_string()); cargo_config .shell() .status("Updating", format!("git submodule `{}`", url))?; - fetch(&mut repo, url, refspec, cargo_config).chain_err(|| { + fetch(&mut repo, url, &reference, cargo_config).chain_err(|| { format!( "failed to fetch submodule `{}` from {}", child.name().unwrap_or(""), @@ -663,7 +667,9 @@ fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, config: &Config) -> Ca opts.progress(|_, cur, max| { drop(pb.tick(cur, max)); }); + debug!("doing reset"); repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?; + debug!("reset done"); Ok(()) } @@ -688,8 +694,7 @@ pub fn with_fetch_options( // Create a local anonymous remote in the repository to fetch the // url let mut opts = git2::FetchOptions::new(); - opts.remote_callbacks(rcb) - .download_tags(git2::AutotagOption::All); + opts.remote_callbacks(rcb); cb(opts) })?; Ok(()) @@ -699,7 +704,7 @@ pub fn with_fetch_options( pub fn fetch( repo: &mut git2::Repository, url: &str, - refspec: &str, + reference: &GitReference, config: &Config, ) -> CargoResult<()> { if config.frozen() { @@ -714,19 +719,10 @@ pub fn fetch( // If we're fetching from GitHub, attempt GitHub's special fast path for // testing if we've already got an up-to-date copy of the repository - - if let Ok(url) = Url::parse(url) { - if url.host_str() == Some("github.com") { - if let Ok(oid) = repo.refname_to_id("refs/remotes/origin/master") { - let mut handle = config.http()?.borrow_mut(); - debug!("attempting GitHub fast path for {}", url); - if github_up_to_date(&mut handle, &url, &oid) { - return Ok(()); - } else { - debug!("fast path failed, falling back to a git fetch"); - } - } - } + match github_up_to_date(repo, url, reference, config) { + Ok(true) => return Ok(()), + Ok(false) => {} + Err(e) => debug!("failed to check github {:?}", e), } // We reuse repositories quite a lot, so before we go through and update the @@ -735,6 +731,29 @@ pub fn fetch( // request we're about to issue. maybe_gc_repo(repo)?; + // Translate the reference desired here into an actual list of refspecs + // which need to get fetched. Additionally record if we're fetching tags. + let mut refspecs = Vec::new(); + let mut tags = false; + match reference { + // For branches and tags we can fetch simply one reference and copy it + // locally, no need to fetch other branches/tags. + GitReference::Branch(b) => { + refspecs.push(format!("refs/heads/{0}:refs/remotes/origin/{0}", b)); + } + GitReference::Tag(t) => { + refspecs.push(format!("refs/tags/{0}:refs/remotes/origin/tags/{0}", t)); + } + + // For `rev` dependencies we don't know what the rev will point to. To + // handle this situation we fetch all branches and tags, and then we + // pray it's somewhere in there. + GitReference::Rev(_) => { + refspecs.push(format!("refs/heads/*:refs/remotes/origin/*")); + tags = true; + } + } + // Unfortunately `libgit2` is notably lacking in the realm of authentication // when compared to the `git` command line. As a result, allow an escape // hatch for users that would prefer to use `git`-the-CLI for fetching @@ -742,12 +761,15 @@ pub fn fetch( // flavors of authentication possible while also still giving us all the // speed and portability of using `libgit2`. if let Some(true) = config.net_config()?.git_fetch_with_cli { - return fetch_with_cli(repo, url, refspec, config); + return fetch_with_cli(repo, url, &refspecs, tags, config); } debug!("doing a fetch for {}", url); let git_config = git2::Config::open_default()?; with_fetch_options(&git_config, url, config, &mut |mut opts| { + if tags { + opts.download_tags(git2::AutotagOption::All); + } // The `fetch` operation here may fail spuriously due to a corrupt // repository. It could also fail, however, for a whole slew of other // reasons (aka network related reasons). We want Cargo to automatically @@ -760,10 +782,10 @@ pub fn fetch( // blown away the repository, then we want to return the error as-is. let mut repo_reinitialized = false; loop { - debug!("initiating fetch of {} from {}", refspec, url); + debug!("initiating fetch of {:?} from {}", refspecs, url); let res = repo .remote_anonymous(url)? - .fetch(&[refspec], Some(&mut opts), None); + .fetch(&refspecs, Some(&mut opts), None); let err = match res { Ok(()) => break, Err(e) => e, @@ -790,16 +812,19 @@ pub fn fetch( fn fetch_with_cli( repo: &mut git2::Repository, url: &str, - refspec: &str, + refspecs: &[String], + tags: bool, config: &Config, ) -> CargoResult<()> { let mut cmd = process("git"); - cmd.arg("fetch") - .arg("--tags") // fetch all tags - .arg("--force") // handle force pushes + cmd.arg("fetch"); + if tags { + cmd.arg("--tags"); + } + cmd.arg("--force") // handle force pushes .arg("--update-head-ok") // see discussion in #2078 .arg(url) - .arg(refspec) + .args(refspecs) // If cargo is run by git (for example, the `exec` command in `git // rebase`), the GIT_DIR is set by git and will point to the wrong // location (this takes precedence over the cwd). Make sure this is @@ -919,9 +944,9 @@ fn init(path: &Path, bare: bool) -> CargoResult { /// made. /// /// This function will attempt to hit that fast path and verify that the `oid` -/// is actually the current `master` branch of the repository. If `true` is -/// returned then no update needs to be performed, but if `false` is returned -/// then the standard update logic still needs to happen. +/// is actually the current branch of the repository. If `true` is returned then +/// no update needs to be performed, but if `false` is returned then the +/// standard update logic still needs to happen. /// /// [1]: https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference /// @@ -929,37 +954,54 @@ fn init(path: &Path, bare: bool) -> CargoResult { /// just a fast path. As a result all errors are ignored in this function and we /// just return a `bool`. Any real errors will be reported through the normal /// update path above. -fn github_up_to_date(handle: &mut Easy, url: &Url, oid: &git2::Oid) -> bool { - macro_rules! r#try { - ($e:expr) => { - match $e { - Some(e) => e, - None => return false, - } - }; - } +fn github_up_to_date( + repo: &mut git2::Repository, + url: &str, + reference: &GitReference, + config: &Config, +) -> CargoResult { + let url = Url::parse(url)?; + if url.host_str() != Some("github.com") { + return Ok(false); + } + + let github_branch_name = match reference { + GitReference::Branch(branch) => branch, + GitReference::Tag(tag) => tag, + GitReference::Rev(_) => { + debug!("can't use github fast path with `rev`"); + return Ok(false); + } + }; // This expects GitHub urls in the form `github.com/user/repo` and nothing // else - let mut pieces = r#try!(url.path_segments()); - let username = r#try!(pieces.next()); - let repo = r#try!(pieces.next()); + let mut pieces = url + .path_segments() + .ok_or_else(|| anyhow!("no path segments on url"))?; + let username = pieces + .next() + .ok_or_else(|| anyhow!("couldn't find username"))?; + let repository = pieces + .next() + .ok_or_else(|| anyhow!("couldn't find repository name"))?; if pieces.next().is_some() { - return false; + anyhow::bail!("too many segments on URL"); } let url = format!( - "https://api.github.com/repos/{}/{}/commits/master", - username, repo + "https://api.github.com/repos/{}/{}/commits/{}", + username, repository, github_branch_name, ); - r#try!(handle.get(true).ok()); - r#try!(handle.url(&url).ok()); - r#try!(handle.useragent("cargo").ok()); + let mut handle = config.http()?.borrow_mut(); + debug!("attempting GitHub fast path for {}", url); + handle.get(true)?; + handle.url(&url)?; + handle.useragent("cargo")?; let mut headers = List::new(); - r#try!(headers.append("Accept: application/vnd.github.3.sha").ok()); - r#try!(headers.append(&format!("If-None-Match: \"{}\"", oid)).ok()); - r#try!(handle.http_headers(headers).ok()); - r#try!(handle.perform().ok()); - - r#try!(handle.response_code().ok()) == 304 + headers.append("Accept: application/vnd.github.3.sha")?; + headers.append(&format!("If-None-Match: \"{}\"", reference.resolve(repo)?))?; + handle.http_headers(headers)?; + handle.perform()?; + Ok(handle.response_code()? == 304) } diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index 79c4a60d586..03bf8ea2d18 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -1,4 +1,4 @@ -use crate::core::{InternedString, PackageId, SourceId}; +use crate::core::{GitReference, InternedString, PackageId, SourceId}; use crate::sources::git; use crate::sources::registry::MaybeLock; use crate::sources::registry::{ @@ -32,6 +32,7 @@ pub struct RemoteRegistry<'cfg> { index_path: Filesystem, cache_path: Filesystem, source_id: SourceId, + index_git_ref: GitReference, config: &'cfg Config, tree: RefCell>>, repo: LazyCell, @@ -46,6 +47,8 @@ impl<'cfg> RemoteRegistry<'cfg> { cache_path: config.registry_cache_path().join(name), source_id, config, + // TODO: we should probably make this configurable + index_git_ref: GitReference::Branch("master".to_string()), tree: RefCell::new(None), repo: LazyCell::new(), head: Cell::new(None), @@ -97,7 +100,8 @@ impl<'cfg> RemoteRegistry<'cfg> { fn head(&self) -> CargoResult { if self.head.get().is_none() { - let oid = self.repo()?.refname_to_id("refs/remotes/origin/master")?; + let repo = self.repo()?; + let oid = self.index_git_ref.resolve(repo)?.0; self.head.set(Some(oid)); } Ok(self.head.get().unwrap()) @@ -227,11 +231,11 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { .shell() .status("Updating", self.source_id.display_index())?; - // git fetch origin master + // Fetch the latest version of our `index_git_ref` into the index + // checkout. let url = self.source_id.url(); - let refspec = "refs/heads/master:refs/remotes/origin/master"; let repo = self.repo.borrow_mut().unwrap(); - git::fetch(repo, url.as_str(), refspec, self.config) + git::fetch(repo, url.as_str(), &self.index_git_ref, self.config) .chain_err(|| format!("failed to fetch `{}`", url))?; self.config.updated_sources().insert(self.source_id); From 437e5d7ee1433aa0238417781c1ddaa3abc596e0 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 16 Jun 2020 08:31:19 -0700 Subject: [PATCH 2/3] Strip `.git` from urls on GitHub fast-path --- src/cargo/sources/git/utils.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index cd812c8982e..acd1c07c646 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -989,6 +989,14 @@ fn github_up_to_date( anyhow::bail!("too many segments on URL"); } + // Trim off the `.git` from the repository, if present, since that's + // optional for GitHub and won't work when we try to use the API as well. + let repository = if repository.ends_with(".git") { + &repository[..repository.len() - 4] + } else { + repository + }; + let url = format!( "https://api.github.com/repos/{}/{}/commits/{}", username, repository, github_branch_name, From ddc27999c17c55170e1812b50d27121cf43894ee Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 16 Jun 2020 09:02:01 -0700 Subject: [PATCH 3/3] Update how locked git commits are fetched This commit refactors various logic of the git source internals to ensure that if we have a locked revision that we plumb the desired branch/tag all the way through to the `fetch`. Previously we'd switch to `Rev` very early on, but the fetching logic for `Rev` is very eager and fetches too much, so instead we only resolve the locked revision later on. Internally this does some various refactoring to try to make various bits and pieces of logic a bit easyer to grok, although it's still perhaps not the cleanest implementation. --- src/cargo/sources/git/mod.rs | 2 +- src/cargo/sources/git/source.rs | 111 ++++++++++++++----------- src/cargo/sources/git/utils.rs | 119 ++++++++++++--------------- src/cargo/sources/registry/remote.rs | 2 +- tests/testsuite/git.rs | 6 +- 5 files changed, 124 insertions(+), 116 deletions(-) diff --git a/src/cargo/sources/git/mod.rs b/src/cargo/sources/git/mod.rs index bd6d7ea7d6e..b32dbb17be1 100644 --- a/src/cargo/sources/git/mod.rs +++ b/src/cargo/sources/git/mod.rs @@ -1,4 +1,4 @@ pub use self::source::GitSource; -pub use self::utils::{fetch, GitCheckout, GitDatabase, GitRemote, GitRevision}; +pub use self::utils::{fetch, GitCheckout, GitDatabase, GitRemote}; mod source; mod utils; diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index b433de3ea6d..be8dd4dd2e8 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -1,23 +1,22 @@ -use std::fmt::{self, Debug, Formatter}; - -use log::trace; -use url::Url; - use crate::core::source::{MaybePackage, Source, SourceId}; use crate::core::GitReference; use crate::core::{Dependency, Package, PackageId, Summary}; -use crate::sources::git::utils::{GitRemote, GitRevision}; +use crate::sources::git::utils::GitRemote; use crate::sources::PathSource; use crate::util::errors::CargoResult; use crate::util::hex::short_hash; use crate::util::Config; +use anyhow::Context; +use log::trace; +use std::fmt::{self, Debug, Formatter}; +use url::Url; pub struct GitSource<'cfg> { remote: GitRemote, - reference: GitReference, + manifest_reference: GitReference, + locked_rev: Option, source_id: SourceId, path_source: Option>, - rev: Option, ident: String, config: &'cfg Config, } @@ -29,17 +28,17 @@ impl<'cfg> GitSource<'cfg> { let remote = GitRemote::new(source_id.url()); let ident = ident(&source_id); - let reference = match source_id.precise() { - Some(s) => GitReference::Rev(s.to_string()), - None => source_id.git_reference().unwrap().clone(), - }; - let source = GitSource { remote, - reference, + manifest_reference: source_id.git_reference().unwrap().clone(), + locked_rev: match source_id.precise() { + Some(s) => Some(git2::Oid::from_str(s).with_context(|| { + format!("precise value for git is not a git revision: {}", s) + })?), + None => None, + }, source_id, path_source: None, - rev: None, ident, config, }; @@ -76,7 +75,7 @@ impl<'cfg> Debug for GitSource<'cfg> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "git repo at {}", self.remote.url())?; - match self.reference.pretty_ref() { + match self.manifest_reference.pretty_ref() { Some(s) => write!(f, " ({})", s), None => Ok(()), } @@ -117,52 +116,70 @@ impl<'cfg> Source for GitSource<'cfg> { let git_path = self.config.assert_package_cache_locked(&git_path); let db_path = git_path.join("db").join(&self.ident); - if self.config.offline() && !db_path.exists() { - anyhow::bail!( - "can't checkout from '{}': you are in the offline mode (--offline)", - self.remote.url() - ); - } - - // Resolve our reference to an actual revision, and check if the - // database already has that revision. If it does, we just load a - // database pinned at that revision, and if we don't we issue an update - // to try to find the revision. - let actual_rev = self.remote.rev_for(&db_path, &self.reference); - let should_update = actual_rev.is_err() || self.source_id.precise().is_none(); - - let (db, actual_rev) = if should_update && !self.config.offline() { - self.config.shell().status( - "Updating", - format!("git repository `{}`", self.remote.url()), - )?; - - trace!("updating git source `{:?}`", self.remote); - - self.remote - .checkout(&db_path, &self.reference, self.config)? - } else { - (self.remote.db_at(&db_path)?, actual_rev.unwrap()) + let db = self.remote.db_at(&db_path).ok(); + let (db, actual_rev) = match (self.locked_rev, db) { + // If we have a locked revision, and we have a preexisting database + // which has that revision, then no update needs to happen. + (Some(rev), Some(db)) if db.contains(rev) => (db, rev), + + // If we're in offline mode, we're not locked, and we have a + // database, then try to resolve our reference with the preexisting + // repository. + (None, Some(db)) if self.config.offline() => { + let rev = db.resolve(&self.manifest_reference).with_context(|| { + "failed to lookup reference in preexisting repository, and \ + can't check for updates in offline mode (--offline)" + })?; + (db, rev) + } + + // ... otherwise we use this state to update the git database. Note + // that we still check for being offline here, for example in the + // situation that we have a locked revision but the database + // doesn't have it. + (locked_rev, db) => { + if self.config.offline() { + anyhow::bail!( + "can't checkout from '{}': you are in the offline mode (--offline)", + self.remote.url() + ); + } + self.config.shell().status( + "Updating", + format!("git repository `{}`", self.remote.url()), + )?; + + trace!("updating git source `{:?}`", self.remote); + + self.remote.checkout( + &db_path, + db, + &self.manifest_reference, + locked_rev, + self.config, + )? + } }; // Don’t use the full hash, in order to contribute less to reaching the // path length limit on Windows. See // . - let short_id = db.to_short_id(&actual_rev).unwrap(); + let short_id = db.to_short_id(actual_rev)?; + // Check out `actual_rev` from the database to a scoped location on the + // filesystem. This will use hard links and such to ideally make the + // checkout operation here pretty fast. let checkout_path = git_path .join("checkouts") .join(&self.ident) .join(short_id.as_str()); - - // Copy the database to the checkout location. db.copy_to(actual_rev.clone(), &checkout_path, self.config)?; let source_id = self.source_id.with_precise(Some(actual_rev.to_string())); let path_source = PathSource::new_recursive(&checkout_path, source_id, self.config); self.path_source = Some(path_source); - self.rev = Some(actual_rev); + self.locked_rev = Some(actual_rev); self.path_source.as_mut().unwrap().update() } @@ -183,7 +200,7 @@ impl<'cfg> Source for GitSource<'cfg> { } fn fingerprint(&self, _pkg: &Package) -> CargoResult { - Ok(self.rev.as_ref().unwrap().to_string()) + Ok(self.locked_rev.as_ref().unwrap().to_string()) } fn describe(&self) -> String { diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index acd1c07c646..53bda10f3bd 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -15,15 +15,6 @@ use std::path::{Path, PathBuf}; use std::process::Command; use url::Url; -#[derive(PartialEq, Clone, Debug)] -pub struct GitRevision(pub git2::Oid); - -impl ser::Serialize for GitRevision { - fn serialize(&self, s: S) -> Result { - serialize_str(self, s) - } -} - fn serialize_str(t: &T, s: S) -> Result where T: fmt::Display, @@ -32,12 +23,6 @@ where s.collect_str(t) } -impl fmt::Display for GitRevision { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.0, f) - } -} - pub struct GitShortID(git2::Buf); impl GitShortID { @@ -71,7 +56,8 @@ pub struct GitDatabase { pub struct GitCheckout<'a> { database: &'a GitDatabase, location: PathBuf, - revision: GitRevision, + #[serde(serialize_with = "serialize_str")] + revision: git2::Oid, #[serde(skip_serializing)] repo: git2::Repository, } @@ -87,16 +73,18 @@ impl GitRemote { &self.url } - pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult { + pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult { reference.resolve(&self.db_at(path)?.repo) } pub fn checkout( &self, into: &Path, + db: Option, reference: &GitReference, + locked_rev: Option, cargo_config: &Config, - ) -> CargoResult<(GitDatabase, GitRevision)> { + ) -> CargoResult<(GitDatabase, git2::Oid)> { let format_error = |e: anyhow::Error, operation| { let may_be_libgit_fault = e .chain() @@ -130,23 +118,40 @@ impl GitRemote { e.context(msg) }; - let mut repo_and_rev = None; - if let Ok(mut repo) = git2::Repository::open(into) { - self.fetch_into(&mut repo, reference, cargo_config) + // If we have a previous instance of `GitDatabase` then fetch into that + // if we can. If that can successfully load our revision then we've + // populated the database with the latest version of `reference`, so + // return that database and the rev we resolve to. + if let Some(mut db) = db { + fetch(&mut db.repo, self.url.as_str(), reference, cargo_config) .map_err(|e| format_error(e, "fetch"))?; - if let Ok(rev) = reference.resolve(&repo) { - repo_and_rev = Some((repo, rev)); + match locked_rev { + Some(rev) => { + if db.contains(rev) { + return Ok((db, rev)); + } + } + None => { + if let Ok(rev) = reference.resolve(&db.repo) { + return Ok((db, rev)); + } + } } } - let (repo, rev) = match repo_and_rev { - Some(pair) => pair, - None => { - let repo = self - .clone_into(into, reference, cargo_config) - .map_err(|e| format_error(e, "clone"))?; - let rev = reference.resolve(&repo)?; - (repo, rev) - } + + // Otherwise start from scratch to handle corrupt git repositories. + // After our fetch (which is interpreted as a clone now) we do the same + // resolution to figure out what we cloned. + if into.exists() { + paths::remove_dir_all(into)?; + } + paths::create_dir_all(into)?; + let mut repo = init(into, true)?; + fetch(&mut repo, self.url.as_str(), reference, cargo_config) + .map_err(|e| format_error(e, "clone"))?; + let rev = match locked_rev { + Some(rev) => rev, + None => reference.resolve(&repo)?, }; Ok(( @@ -167,36 +172,12 @@ impl GitRemote { repo, }) } - - fn fetch_into( - &self, - dst: &mut git2::Repository, - reference: &GitReference, - cargo_config: &Config, - ) -> CargoResult<()> { - fetch(dst, self.url.as_str(), reference, cargo_config) - } - - fn clone_into( - &self, - dst: &Path, - reference: &GitReference, - cargo_config: &Config, - ) -> CargoResult { - if dst.exists() { - paths::remove_dir_all(dst)?; - } - paths::create_dir_all(dst)?; - let mut repo = init(dst, true)?; - fetch(&mut repo, self.url.as_str(), reference, cargo_config)?; - Ok(repo) - } } impl GitDatabase { pub fn copy_to( &self, - rev: GitRevision, + rev: git2::Oid, dest: &Path, cargo_config: &Config, ) -> CargoResult> { @@ -230,14 +211,22 @@ impl GitDatabase { Ok(checkout) } - pub fn to_short_id(&self, revision: &GitRevision) -> CargoResult { - let obj = self.repo.find_object(revision.0, None)?; + pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult { + let obj = self.repo.find_object(revision, None)?; Ok(GitShortID(obj.short_id()?)) } + + pub fn contains(&self, oid: git2::Oid) -> bool { + self.repo.revparse_single(&oid.to_string()).is_ok() + } + + pub fn resolve(&self, r: &GitReference) -> CargoResult { + r.resolve(&self.repo) + } } impl GitReference { - pub fn resolve(&self, repo: &git2::Repository) -> CargoResult { + pub fn resolve(&self, repo: &git2::Repository) -> CargoResult { let id = match self { // Note that we resolve the named tag here in sync with where it's // fetched into via `fetch` below. @@ -269,7 +258,7 @@ impl GitReference { } } }; - Ok(GitRevision(id)) + Ok(id) } } @@ -277,7 +266,7 @@ impl<'a> GitCheckout<'a> { fn new( path: &Path, database: &'a GitDatabase, - revision: GitRevision, + revision: git2::Oid, repo: git2::Repository, ) -> GitCheckout<'a> { GitCheckout { @@ -291,7 +280,7 @@ impl<'a> GitCheckout<'a> { fn clone_into( into: &Path, database: &'a GitDatabase, - revision: GitRevision, + revision: git2::Oid, config: &Config, ) -> CargoResult> { let dirname = into.parent().unwrap(); @@ -336,7 +325,7 @@ impl<'a> GitCheckout<'a> { fn is_fresh(&self) -> bool { match self.repo.revparse_single("HEAD") { - Ok(ref head) if head.id() == self.revision.0 => { + Ok(ref head) if head.id() == self.revision => { // See comments in reset() for why we check this self.location.join(".cargo-ok").exists() } @@ -364,7 +353,7 @@ impl<'a> GitCheckout<'a> { let ok_file = self.location.join(".cargo-ok"); let _ = paths::remove_file(&ok_file); info!("reset {} to {}", self.repo.path().display(), self.revision); - let object = self.repo.find_object(self.revision.0, None)?; + let object = self.repo.find_object(self.revision, None)?; reset(&self.repo, &object, config)?; paths::create(ok_file)?; Ok(()) diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index 03bf8ea2d18..6f1ae1e58dd 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -101,7 +101,7 @@ impl<'cfg> RemoteRegistry<'cfg> { fn head(&self) -> CargoResult { if self.head.get().is_none() { let repo = self.repo()?; - let oid = self.index_git_ref.resolve(repo)?.0; + let oid = self.index_git_ref.resolve(repo)?; self.head.set(Some(oid)); } Ok(self.head.get().unwrap()) diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index dd6a6193a0c..2feab38884b 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -776,11 +776,13 @@ fn update_with_shared_deps() { .with_status(101) .with_stderr( "\ -[UPDATING] git repository [..] [ERROR] Unable to update [..] Caused by: - revspec '0.1.2' not found; [..] + precise value for git is not a git revision: 0.1.2 + +Caused by: + unable to parse OID - contains invalid characters; class=Invalid (3) ", ) .run();