From 4be3614264c28487091ed7f8cb8af30d33f41f16 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Thu, 4 Jan 2024 10:01:34 -0500
Subject: [PATCH 1/5] test(cargo-update): `--precise` not work with tag or
 short id

This demonstrates the bug described in 13188
---
 tests/testsuite/update.rs | 67 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/tests/testsuite/update.rs b/tests/testsuite/update.rs
index 40bc0b476a0..0d233aa2f40 100644
--- a/tests/testsuite/update.rs
+++ b/tests/testsuite/update.rs
@@ -1272,3 +1272,70 @@ rustdns.workspace = true
         )
         .run();
 }
+
+#[cargo_test]
+fn update_precise_git_revisions() {
+    let (git_project, git_repo) = git::new_repo("git", |p| {
+        p.file("Cargo.toml", &basic_lib_manifest("git"))
+            .file("src/lib.rs", "")
+    });
+    let tag_name = "Nazgûl";
+    git::tag(&git_repo, tag_name);
+
+    git_project.change_file("src/lib.rs", "fn f() {}");
+    git::add(&git_repo);
+    let head_id = git::commit(&git_repo).to_string();
+    let short_id = &head_id[..8];
+    let url = git_project.url();
+
+    let p = project()
+        .file(
+            "Cargo.toml",
+            &format!(
+                r#"
+                    [package]
+                    name = "foo"
+                    version = "0.1.0"
+
+                    [dependencies]
+                    git = {{ git = '{url}' }}
+                "#
+            ),
+        )
+        .file("src/lib.rs", "")
+        .build();
+
+    p.cargo("fetch")
+        .with_stderr(format!("[UPDATING] git repository `{url}`"))
+        .run();
+
+    assert!(p.read_lockfile().contains(&head_id));
+
+    p.cargo("update git --precise")
+        .arg(tag_name)
+        .with_status(101)
+        .with_stderr(format!(
+            "\
+[ERROR] Unable to update {url}#{tag_name}
+
+Caused by:
+  precise value for git is not a git revision: {tag_name}
+
+Caused by:
+[..]"
+        ))
+        .run();
+
+    p.cargo("update git --precise")
+        .arg(short_id)
+        .with_status(101)
+        .with_stderr(format!(
+            "\
+[UPDATING] git repository `{url}`
+[ERROR] Unable to update {url}#{short_id}
+
+Caused by:
+  object not found - no match for id ({short_id}00000000000000[..]); [..]",
+        ))
+        .run();
+}

From 2734097f125f374b63002e4a76e5a7da4be182c4 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Thu, 4 Jan 2024 11:00:09 -0500
Subject: [PATCH 2/5] fix(git): represent deferred and resolved revision with
 an enum

`cargo update --precise` might pass in any arbitrary Git reference,
and `git2::Oid::from_str` would always zero-pad the given str if it is
not a full SHA hex string.

This introduces an enum `Revision` to represent `locked_rev`
that is either deferred or resolved to an actual object ID.
When `locked_rev` is a short ID or any other Git reference,
Cargo first performs a Git fetch to resolve it (without --offline),
and then locks it to an actual commit object ID.
---
 src/cargo/core/source_id.rs     | 17 ++-----
 src/cargo/sources/git/source.rs | 81 ++++++++++++++++++++++++++-------
 src/cargo/sources/git/utils.rs  | 16 +------
 tests/testsuite/git.rs          |  6 +--
 tests/testsuite/update.rs       | 37 ++++++++++-----
 5 files changed, 99 insertions(+), 58 deletions(-)

diff --git a/src/cargo/core/source_id.rs b/src/cargo/core/source_id.rs
index 3b1cad94211..6e6d9d9d84b 100644
--- a/src/cargo/core/source_id.rs
+++ b/src/cargo/core/source_id.rs
@@ -460,21 +460,14 @@ impl SourceId {
     }
 
     pub fn precise_git_fragment(self) -> Option<&'static str> {
-        match &self.inner.precise {
-            Some(Precise::GitUrlFragment(s)) => Some(&s[..8]),
-            _ => None,
-        }
+        self.precise_full_git_fragment().map(|s| &s[..8])
     }
 
-    pub fn precise_git_oid(self) -> CargoResult<Option<git2::Oid>> {
-        Ok(match self.inner.precise.as_ref() {
-            Some(Precise::GitUrlFragment(s)) => {
-                Some(git2::Oid::from_str(s).with_context(|| {
-                    format!("precise value for git is not a git revision: {}", s)
-                })?)
-            }
+    pub fn precise_full_git_fragment(self) -> Option<&'static str> {
+        match &self.inner.precise {
+            Some(Precise::GitUrlFragment(s)) => Some(&s),
             _ => None,
-        })
+        }
     }
 
     /// Creates a new `SourceId` from this source with the given `precise`.
diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs
index 7d303f0c25f..3be8fc4567f 100644
--- a/src/cargo/sources/git/source.rs
+++ b/src/cargo/sources/git/source.rs
@@ -71,8 +71,9 @@ pub struct GitSource<'cfg> {
     /// The Git reference from the manifest file.
     manifest_reference: GitReference,
     /// The revision which a git source is locked to.
-    /// This is expected to be set after the Git repository is fetched.
-    locked_rev: Option<git2::Oid>,
+    ///
+    /// Expected to always be [`Revision::Locked`] after the Git repository is fetched.
+    locked_rev: Revision,
     /// The unique identifier of this source.
     source_id: SourceId,
     /// The underlying path source to discover packages inside the Git repository.
@@ -103,7 +104,11 @@ impl<'cfg> GitSource<'cfg> {
 
         let remote = GitRemote::new(source_id.url());
         let manifest_reference = source_id.git_reference().unwrap().clone();
-        let locked_rev = source_id.precise_git_oid()?;
+        let locked_rev = source_id
+            .precise_full_git_fragment()
+            .map(|s| Revision::new(s.into()))
+            .unwrap_or_else(|| source_id.git_reference().unwrap().clone().into());
+
         let ident = ident_shallow(
             &source_id,
             config
@@ -155,6 +160,49 @@ impl<'cfg> GitSource<'cfg> {
     }
 }
 
+/// Indicates a [Git revision] that might be locked or deferred to be resolved.
+///
+/// [Git revision]: https://git-scm.com/docs/revisions
+#[derive(Clone, Debug)]
+enum Revision {
+    /// A [Git reference] that would trigger extra fetches when being resolved.
+    ///
+    /// [Git reference]: https://git-scm.com/book/en/v2/Git-Internals-Git-References
+    Deferred(GitReference),
+    /// A locked revision of the actual Git commit object ID.
+    Locked(git2::Oid),
+}
+
+impl Revision {
+    fn new(rev: &str) -> Revision {
+        let oid = git2::Oid::from_str(rev).ok();
+        match oid {
+            // Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
+            // Its length must be double to the underlying bytes (40 or 64),
+            // otherwise libgit2 would happily zero-pad the returned oid.
+            // See rust-lang/cargo#13188
+            Some(oid) if oid.as_bytes().len() * 2 == rev.len() => Revision::Locked(oid),
+            _ => Revision::Deferred(GitReference::Rev(rev.to_string())),
+        }
+    }
+}
+
+impl From<GitReference> for Revision {
+    fn from(value: GitReference) -> Self {
+        Revision::Deferred(value)
+    }
+}
+
+impl From<Revision> for GitReference {
+    fn from(value: Revision) -> Self {
+        match value {
+            Revision::Deferred(git_ref) => git_ref,
+            Revision::Locked(oid) => GitReference::Rev(oid.to_string()),
+        }
+    }
+}
+
+
 /// Create an identifier from a URL,
 /// essentially turning `proto://host/path/repo` into `repo-<hash-of-url>`.
 fn ident(id: &SourceId) -> String {
@@ -252,16 +300,17 @@ impl<'cfg> Source for GitSource<'cfg> {
         let db_path = db_path.into_path_unlocked();
 
         let db = self.remote.db_at(&db_path).ok();
-        let (db, actual_rev) = match (self.locked_rev, db) {
+
+        let (db, actual_rev) = match (&self.locked_rev, db) {
             // If we have a locked revision, and we have a preexisting database
             // which has that revision, then no update needs to happen.
-            (Some(rev), Some(db)) if db.contains(rev) => (db, rev),
+            (Revision::Locked(oid), Some(db)) if db.contains(*oid) => (db, *oid),
 
             // If we're in offline mode, we're not locked, and we have a
             // database, then try to resolve our reference with the preexisting
             // repository.
-            (None, Some(db)) if self.config.offline() => {
-                let rev = db.resolve(&self.manifest_reference).with_context(|| {
+            (Revision::Deferred(git_ref), Some(db)) if self.config.offline() => {
+                let rev = db.resolve(&git_ref).with_context(|| {
                     "failed to lookup reference in preexisting repository, and \
                          can't check for updates in offline mode (--offline)"
                 })?;
@@ -279,6 +328,7 @@ impl<'cfg> Source for GitSource<'cfg> {
                         self.remote.url()
                     );
                 }
+
                 if !self.quiet {
                     self.config.shell().status(
                         "Updating",
@@ -288,13 +338,9 @@ impl<'cfg> Source for GitSource<'cfg> {
 
                 trace!("updating git source `{:?}`", self.remote);
 
-                self.remote.checkout(
-                    &db_path,
-                    db,
-                    &self.manifest_reference,
-                    locked_rev,
-                    self.config,
-                )?
+                let locked_rev = locked_rev.clone().into();
+                self.remote
+                    .checkout(&db_path, db, &locked_rev, self.config)?
             }
         };
 
@@ -321,7 +367,7 @@ impl<'cfg> Source for GitSource<'cfg> {
 
         self.path_source = Some(path_source);
         self.short_id = Some(short_id.as_str().into());
-        self.locked_rev = Some(actual_rev);
+        self.locked_rev = Revision::Locked(actual_rev);
         self.path_source.as_mut().unwrap().update()?;
 
         // Hopefully this shouldn't incur too much of a performance hit since
@@ -350,7 +396,10 @@ impl<'cfg> Source for GitSource<'cfg> {
     }
 
     fn fingerprint(&self, _pkg: &Package) -> CargoResult<String> {
-        Ok(self.locked_rev.as_ref().unwrap().to_string())
+        match &self.locked_rev {
+            Revision::Locked(oid) => Ok(oid.to_string()),
+            _ => unreachable!("locked_rev must be resolved when computing fingerprint"),
+        }
     }
 
     fn describe(&self) -> String {
diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs
index 7ce43d9bd1f..20ef6ddedda 100644
--- a/src/cargo/sources/git/utils.rs
+++ b/src/cargo/sources/git/utils.rs
@@ -95,8 +95,6 @@ impl GitRemote {
     /// This ensures that it gets the up-to-date commit when a named reference
     /// is given (tag, branch, refs/*). Thus, network connection is involved.
     ///
-    /// When `locked_rev` is provided, it takes precedence over `reference`.
-    ///
     /// If we have a previous instance of [`GitDatabase`] then fetch into that
     /// if we can. If that can successfully load our revision then we've
     /// populated the database with the latest version of `reference`, so
@@ -106,11 +104,8 @@ impl GitRemote {
         into: &Path,
         db: Option<GitDatabase>,
         reference: &GitReference,
-        locked_rev: Option<git2::Oid>,
         cargo_config: &Config,
     ) -> CargoResult<(GitDatabase, git2::Oid)> {
-        let locked_ref = locked_rev.map(|oid| GitReference::Rev(oid.to_string()));
-        let reference = locked_ref.as_ref().unwrap_or(reference);
         if let Some(mut db) = db {
             fetch(
                 &mut db.repo,
@@ -121,11 +116,7 @@ impl GitRemote {
             )
             .with_context(|| format!("failed to fetch into: {}", into.display()))?;
 
-            let resolved_commit_hash = match locked_rev {
-                Some(rev) => db.contains(rev).then_some(rev),
-                None => resolve_ref(reference, &db.repo).ok(),
-            };
-            if let Some(rev) = resolved_commit_hash {
+            if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
                 return Ok((db, rev));
             }
         }
@@ -146,10 +137,7 @@ impl GitRemote {
             RemoteKind::GitDependency,
         )
         .with_context(|| format!("failed to clone into: {}", into.display()))?;
-        let rev = match locked_rev {
-            Some(rev) => rev,
-            None => resolve_ref(reference, &repo)?,
-        };
+        let rev = resolve_ref(reference, &repo)?;
 
         Ok((
             GitDatabase {
diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs
index d9289acc6ba..7ad04e6805e 100644
--- a/tests/testsuite/git.rs
+++ b/tests/testsuite/git.rs
@@ -757,13 +757,11 @@ fn update_with_shared_deps() {
         .with_status(101)
         .with_stderr(
             "\
+[UPDATING] git repository [..]
 [ERROR] Unable to update [..]
 
 Caused by:
-  precise value for git is not a git revision: 0.1.2
-
-Caused by:
-  unable to parse OID - contains invalid characters; class=Invalid (3)
+  revspec '0.1.2' not found; class=Reference (4); code=NotFound (-3)
 ",
         )
         .run();
diff --git a/tests/testsuite/update.rs b/tests/testsuite/update.rs
index 0d233aa2f40..7ef9ca97edc 100644
--- a/tests/testsuite/update.rs
+++ b/tests/testsuite/update.rs
@@ -1281,6 +1281,7 @@ fn update_precise_git_revisions() {
     });
     let tag_name = "Nazgûl";
     git::tag(&git_repo, tag_name);
+    let tag_commit_id = git_repo.head().unwrap().target().unwrap().to_string();
 
     git_project.change_file("src/lib.rs", "fn f() {}");
     git::add(&git_repo);
@@ -1313,29 +1314,41 @@ fn update_precise_git_revisions() {
 
     p.cargo("update git --precise")
         .arg(tag_name)
-        .with_status(101)
         .with_stderr(format!(
             "\
-[ERROR] Unable to update {url}#{tag_name}
-
-Caused by:
-  precise value for git is not a git revision: {tag_name}
-
-Caused by:
-[..]"
+[UPDATING] git repository `{url}`
+[UPDATING] git v0.5.0 ([..]) -> #{}",
+            &tag_commit_id[..8],
         ))
         .run();
 
+    assert!(p.read_lockfile().contains(&tag_commit_id));
+    assert!(!p.read_lockfile().contains(&head_id));
+
     p.cargo("update git --precise")
         .arg(short_id)
-        .with_status(101)
         .with_stderr(format!(
             "\
 [UPDATING] git repository `{url}`
-[ERROR] Unable to update {url}#{short_id}
+[UPDATING] git v0.5.0 ([..]) -> #{short_id}",
+        ))
+        .run();
 
-Caused by:
-  object not found - no match for id ({short_id}00000000000000[..]); [..]",
+    assert!(p.read_lockfile().contains(&head_id));
+    assert!(!p.read_lockfile().contains(&tag_commit_id));
+
+    // updating back to tag still requires a git fetch,
+    // as the ref may change over time.
+    p.cargo("update git --precise")
+        .arg(tag_name)
+        .with_stderr(format!(
+            "\
+[UPDATING] git repository `{url}`
+[UPDATING] git v0.5.0 ([..]) -> #{}",
+            &tag_commit_id[..8],
         ))
         .run();
+
+    assert!(p.read_lockfile().contains(&tag_commit_id));
+    assert!(!p.read_lockfile().contains(&head_id));
 }

From ce551d73b86759c924376c7ea15cf5d17f58e833 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Thu, 4 Jan 2024 16:00:06 -0500
Subject: [PATCH 3/5] refactor(git): remove `manifest_reference` from GitSource

---
 src/cargo/sources/git/source.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs
index 3be8fc4567f..8cfd520cddd 100644
--- a/src/cargo/sources/git/source.rs
+++ b/src/cargo/sources/git/source.rs
@@ -68,8 +68,6 @@ use url::Url;
 pub struct GitSource<'cfg> {
     /// The git remote which we're going to fetch from.
     remote: GitRemote,
-    /// The Git reference from the manifest file.
-    manifest_reference: GitReference,
     /// The revision which a git source is locked to.
     ///
     /// Expected to always be [`Revision::Locked`] after the Git repository is fetched.
@@ -103,7 +101,7 @@ impl<'cfg> GitSource<'cfg> {
         assert!(source_id.is_git(), "id is not git, id={}", source_id);
 
         let remote = GitRemote::new(source_id.url());
-        let manifest_reference = source_id.git_reference().unwrap().clone();
+        // Fallback to git ref from mainfest if there is no locked revision.
         let locked_rev = source_id
             .precise_full_git_fragment()
             .map(|s| Revision::new(s.into()))
@@ -119,7 +117,6 @@ impl<'cfg> GitSource<'cfg> {
 
         let source = GitSource {
             remote,
-            manifest_reference,
             locked_rev,
             source_id,
             path_source: None,
@@ -239,9 +236,12 @@ impl<'cfg> Debug for GitSource<'cfg> {
         // TODO(-Znext-lockfile-bump): set it to true when stabilizing
         // lockfile v4, because we want Source ID serialization to be
         // consistent with lockfile.
-        match self.manifest_reference.pretty_ref(false) {
-            Some(s) => write!(f, " ({})", s),
-            None => Ok(()),
+        match &self.locked_rev {
+            Revision::Deferred(git_ref) => match git_ref.pretty_ref(false) {
+                Some(s) => write!(f, " ({})", s),
+                None => Ok(()),
+            },
+            Revision::Locked(oid) => write!(f, " ({oid})"),
         }
     }
 }

From 093f7c837026a9f75cced4b637cfafef51c92a2a Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Thu, 4 Jan 2024 16:29:56 -0500
Subject: [PATCH 4/5] refactor: rename precise_full_git_fragment to
 precise_git_fragment

---
 src/cargo/core/source_id.rs              | 4 ----
 src/cargo/ops/cargo_generate_lockfile.rs | 2 +-
 src/cargo/sources/git/source.rs          | 3 +--
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/cargo/core/source_id.rs b/src/cargo/core/source_id.rs
index 6e6d9d9d84b..1c4982ceb9c 100644
--- a/src/cargo/core/source_id.rs
+++ b/src/cargo/core/source_id.rs
@@ -460,10 +460,6 @@ impl SourceId {
     }
 
     pub fn precise_git_fragment(self) -> Option<&'static str> {
-        self.precise_full_git_fragment().map(|s| &s[..8])
-    }
-
-    pub fn precise_full_git_fragment(self) -> Option<&'static str> {
         match &self.inner.precise {
             Some(Precise::GitUrlFragment(s)) => Some(&s),
             _ => None,
diff --git a/src/cargo/ops/cargo_generate_lockfile.rs b/src/cargo/ops/cargo_generate_lockfile.rs
index 1bba64925ec..e638c69390f 100644
--- a/src/cargo/ops/cargo_generate_lockfile.rs
+++ b/src/cargo/ops/cargo_generate_lockfile.rs
@@ -167,7 +167,7 @@ pub fn update_lockfile(ws: &Workspace<'_>, opts: &UpdateOptions<'_>) -> CargoRes
                 format!(
                     "{} -> #{}",
                     removed[0],
-                    &added[0].source_id().precise_git_fragment().unwrap()
+                    &added[0].source_id().precise_git_fragment().unwrap()[..8],
                 )
             } else {
                 format!("{} -> v{}", removed[0], added[0].version())
diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs
index 8cfd520cddd..c8f14737c0a 100644
--- a/src/cargo/sources/git/source.rs
+++ b/src/cargo/sources/git/source.rs
@@ -103,7 +103,7 @@ impl<'cfg> GitSource<'cfg> {
         let remote = GitRemote::new(source_id.url());
         // Fallback to git ref from mainfest if there is no locked revision.
         let locked_rev = source_id
-            .precise_full_git_fragment()
+            .precise_git_fragment()
             .map(|s| Revision::new(s.into()))
             .unwrap_or_else(|| source_id.git_reference().unwrap().clone().into());
 
@@ -199,7 +199,6 @@ impl From<Revision> for GitReference {
     }
 }
 
-
 /// Create an identifier from a URL,
 /// essentially turning `proto://host/path/repo` into `repo-<hash-of-url>`.
 fn ident(id: &SourceId) -> String {

From e6f2dfd4524ae50eaf6c243077aae3b72a545c1e Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Thu, 4 Jan 2024 17:43:19 -0500
Subject: [PATCH 5/5] test(cargo-update): verify a oid-like tag triggers a
 re-fetch

---
 tests/testsuite/update.rs | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/testsuite/update.rs b/tests/testsuite/update.rs
index 7ef9ca97edc..93299e910dd 100644
--- a/tests/testsuite/update.rs
+++ b/tests/testsuite/update.rs
@@ -1351,4 +1351,22 @@ fn update_precise_git_revisions() {
 
     assert!(p.read_lockfile().contains(&tag_commit_id));
     assert!(!p.read_lockfile().contains(&head_id));
+
+    // Now make a tag looks like an oid.
+    // It requires a git fetch, as the oid cannot be found in preexisting git db.
+    let arbitrary_tag: String = std::iter::repeat('a').take(head_id.len()).collect();
+    git::tag(&git_repo, &arbitrary_tag);
+
+    p.cargo("update git --precise")
+        .arg(&arbitrary_tag)
+        .with_stderr(format!(
+            "\
+[UPDATING] git repository `{url}`
+[UPDATING] git v0.5.0 ([..]) -> #{}",
+            &head_id[..8],
+        ))
+        .run();
+
+    assert!(p.read_lockfile().contains(&head_id));
+    assert!(!p.read_lockfile().contains(&tag_commit_id));
 }