From 326c2478f641fac2fcd8c6f6c389ae8390bf5907 Mon Sep 17 00:00:00 2001 From: messense Date: Thu, 6 Oct 2022 23:22:18 +0800 Subject: [PATCH] refactor: bookkeeping added file list in `SDistWriter` --- src/module_writer.rs | 36 ++++++++++++++++++------ src/source_distribution.rs | 57 ++++++++++++-------------------------- 2 files changed, 44 insertions(+), 49 deletions(-) diff --git a/src/module_writer.rs b/src/module_writer.rs index d15f75c20..59db498fb 100644 --- a/src/module_writer.rs +++ b/src/module_writer.rs @@ -8,7 +8,7 @@ use fs_err as fs; use fs_err::File; use ignore::WalkBuilder; use sha2::{Digest, Sha256}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::ffi::OsStr; use std::fmt::Write as _; #[cfg(target_family = "unix")] @@ -312,6 +312,7 @@ impl WheelWriter { pub struct SDistWriter { tar: tar::Builder>, path: PathBuf, + files: HashSet, } impl ModuleWriter for SDistWriter { @@ -325,35 +326,48 @@ impl ModuleWriter for SDistWriter { bytes: &[u8], permissions: u32, ) -> Result<()> { + let target = target.as_ref(); + if self.files.contains(target) { + // Ignore duplicate files + return Ok(()); + } let mut header = tar::Header::new_gnu(); header.set_size(bytes.len() as u64); header.set_mode(permissions); header.set_cksum(); self.tar - .append_data(&mut header, &target, bytes) + .append_data(&mut header, target, bytes) .context(format!( "Failed to add {} bytes to sdist as {}", bytes.len(), - target.as_ref().display() + target.display() ))?; + self.files.insert(target.to_path_buf()); Ok(()) } fn add_file(&mut self, target: impl AsRef, source: impl AsRef) -> Result<()> { - if source.as_ref() == self.path { + let source = source.as_ref(); + let target = target.as_ref(); + if source == self.path { bail!( "Attempting to include the sdist output tarball {} into itself! Check 'cargo package --list' output.", - source.as_ref().display() + source.display() ); } + if self.files.contains(target) { + // Ignore duplicate files + return Ok(()); + } self.tar - .append_path_with_name(&source, &target) + .append_path_with_name(source, target) .context(format!( "Failed to add file from {} to sdist as {}", - source.as_ref().display(), - target.as_ref().display(), + source.display(), + target.display(), ))?; + self.files.insert(target.to_path_buf()); Ok(()) } } @@ -371,7 +385,11 @@ impl SDistWriter { let enc = GzEncoder::new(tar_gz, Compression::default()); let tar = tar::Builder::new(enc); - Ok(Self { tar, path }) + Ok(Self { + tar, + path, + files: HashSet::new(), + }) } /// Finished the .tar.gz archive diff --git a/src/source_distribution.rs b/src/source_distribution.rs index 997a89e7c..eae4244b6 100644 --- a/src/source_distribution.rs +++ b/src/source_distribution.rs @@ -3,7 +3,7 @@ use crate::{BuildContext, PyProjectToml, SDistWriter}; use anyhow::{bail, Context, Result}; use cargo_metadata::{Metadata, MetadataCommand}; use fs_err as fs; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::process::Command; use std::str; @@ -236,7 +236,7 @@ fn add_crate_to_source_distribution( prefix: impl AsRef, known_path_deps: &HashMap, root_crate: bool, -) -> Result> { +) -> Result<()> { let manifest_path = manifest_path.as_ref(); let pyproject_toml_path = pyproject_toml_path.as_ref(); let output = Command::new("cargo") @@ -336,24 +336,18 @@ fn add_crate_to_source_distribution( let prefix = prefix.as_ref(); writer.add_directory(prefix)?; - let mut added_files = HashSet::new(); - added_files.insert(prefix.to_path_buf()); - let cargo_toml = if cargo_toml_in_subdir { prefix.join(manifest_path) } else { prefix.join(manifest_path.file_name().unwrap()) }; - writer.add_bytes(&cargo_toml, rewritten_cargo_toml.as_bytes())?; - added_files.insert(cargo_toml); + writer.add_bytes(cargo_toml, rewritten_cargo_toml.as_bytes())?; for (target, source) in target_source { - let target = prefix.join(target); - writer.add_file(&target, source)?; - added_files.insert(target); + writer.add_file(prefix.join(target), source)?; } - Ok(added_files) + Ok(()) } /// Finds all path dependencies of the crate @@ -464,7 +458,7 @@ pub fn source_distribution( } // Add the main crate - let mut added_files = add_crate_to_source_distribution( + add_crate_to_source_distribution( &mut writer, &pyproject_toml_path, &manifest_path, @@ -480,10 +474,7 @@ pub fn source_distribution( let cargo_lock_required = build_context.cargo_options.locked || build_context.cargo_options.frozen; if cargo_lock_required || cargo_lock_path.exists() { - if !added_files.contains(&target) { - writer.add_file(&target, &cargo_lock_path)?; - added_files.insert(target); - } + writer.add_file(&target, &cargo_lock_path)?; } else { println!( "⚠️ Warning: Cargo.lock is not found, it is recommended \ @@ -495,34 +486,23 @@ pub fn source_distribution( let pyproject_dir = pyproject_toml_path.parent().unwrap(); if let Some(project) = pyproject.project.as_ref() { if let Some(pyproject_toml::ReadMe::RelativePath(readme)) = project.readme.as_ref() { - let target = root_dir.join(readme); - if !added_files.contains(&target) { - writer.add_file(&target, pyproject_dir.join(readme))?; - added_files.insert(target); - } + writer.add_file(root_dir.join(readme), pyproject_dir.join(readme))?; } if let Some(pyproject_toml::License { file: Some(license), text: None, }) = project.license.as_ref() { - let target = root_dir.join(license); - if !added_files.contains(&target) { - writer.add_file(&target, pyproject_dir.join(license))?; - added_files.insert(target); - } + writer.add_file(root_dir.join(license), pyproject_dir.join(license))?; } if let Some(python_source) = pyproject.python_source() { for entry in ignore::Walk::new(pyproject_dir.join(python_source)) { let source = entry?.into_path(); let target = root_dir.join(source.strip_prefix(&pyproject_dir)?); - if !added_files.contains(&target) { - if source.is_dir() { - writer.add_directory(&target)?; - } else { - writer.add_file(&target, &source)?; - } - added_files.insert(target); + if source.is_dir() { + writer.add_directory(target)?; + } else { + writer.add_file(target, &source)?; } } } @@ -535,13 +515,10 @@ pub fn source_distribution( .filter_map(Result::ok) { let target = root_dir.join(&source.strip_prefix(pyproject_dir)?); - if !added_files.contains(&target) { - if source.is_dir() { - writer.add_directory(&target)?; - } else { - writer.add_file(&target, source)?; - } - added_files.insert(target); + if source.is_dir() { + writer.add_directory(target)?; + } else { + writer.add_file(target, source)?; } } }