diff --git a/crates/pypi-types/src/requirement.rs b/crates/pypi-types/src/requirement.rs index 0c3d0ce8629bd..61e3144c75813 100644 --- a/crates/pypi-types/src/requirement.rs +++ b/crates/pypi-types/src/requirement.rs @@ -8,7 +8,9 @@ use pep508_rs::{MarkerEnvironment, MarkerTree, RequirementOrigin, VerbatimUrl, V use uv_git::{GitReference, GitSha}; use uv_normalize::{ExtraName, PackageName}; -use crate::{ParsedUrl, VerbatimParsedUrl}; +use crate::{ + ParsedArchiveUrl, ParsedDirectoryUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, VerbatimParsedUrl, +}; /// A representation of dependency on a package, an extension over a PEP 508's requirement. /// @@ -251,6 +253,66 @@ impl RequirementSource { } } + pub fn to_verbatim_parsed_url(&self) -> Option { + Some(match &self { + Self::Registry { .. } => { + return None; + } + Self::Url { + subdirectory, + location, + url, + } => VerbatimParsedUrl { + parsed_url: ParsedUrl::Archive(ParsedArchiveUrl::from_source( + location.clone(), + subdirectory.clone(), + )), + verbatim: url.clone(), + }, + Self::Path { + install_path, + lock_path, + url, + } => VerbatimParsedUrl { + parsed_url: ParsedUrl::Path(ParsedPathUrl::from_source( + install_path.clone(), + lock_path.clone(), + url.to_url(), + )), + verbatim: url.clone(), + }, + Self::Directory { + install_path, + lock_path, + editable, + url, + } => VerbatimParsedUrl { + parsed_url: ParsedUrl::Directory(ParsedDirectoryUrl::from_source( + install_path.clone(), + lock_path.clone(), + *editable, + url.to_url(), + )), + verbatim: url.clone(), + }, + Self::Git { + repository, + reference, + precise, + subdirectory, + url, + } => VerbatimParsedUrl { + parsed_url: ParsedUrl::Git(ParsedGitUrl::from_source( + repository.clone(), + reference.clone(), + *precise, + subdirectory.clone(), + )), + verbatim: url.clone(), + }, + }) + } + /// Returns `true` if the source is editable. pub fn is_editable(&self) -> bool { matches!(self, Self::Directory { editable: true, .. }) diff --git a/crates/uv-resolver/src/error.rs b/crates/uv-resolver/src/error.rs index a2919a71da33a..7c27f2bc038e4 100644 --- a/crates/uv-resolver/src/error.rs +++ b/crates/uv-resolver/src/error.rs @@ -9,7 +9,7 @@ use rustc_hash::{FxHashMap, FxHashSet}; use distribution_types::{BuiltDist, IndexLocations, InstalledDist, SourceDist}; use pep440_rs::Version; -use pep508_rs::Requirement; +use pep508_rs::{MarkerTree, Requirement}; use uv_normalize::PackageName; use crate::candidate_selector::CandidateSelector; @@ -48,11 +48,18 @@ pub enum ResolveError { #[error(transparent)] PubGrubSpecifier(#[from] PubGrubSpecifierError), - #[error("Requirements contain conflicting URLs for package `{0}`:\n- {1}\n- {2}")] - ConflictingUrlsDirect(PackageName, String, String), + #[error("Overrides contain conflicting URLs for package `{0}`:\n- {1}\n- {2}")] + ConflictingOverrideUrls(PackageName, String, String), - #[error("There are conflicting URLs for package `{0}`:\n- {1}\n- {2}")] - ConflictingUrlsTransitive(PackageName, String, String), + #[error("Requirements contain conflicting URLs for package `{0}`:\n- {}", _1.join("\n- "))] + ConflictingUrls(PackageName, Vec), + + #[error("Requirements contain conflicting URLs for package `{package_name}` in split `{fork_markers}`:\n- {}", urls.join("\n- "))] + ConflictingUrlsInFork { + package_name: PackageName, + urls: Vec, + fork_markers: MarkerTree, + }, #[error("Package `{0}` attempted to resolve via URL: {1}. URL dependencies must be expressed as direct requirements or constraints. Consider adding `{0} @ {1}` to your dependencies or constraints file.")] DisallowedUrl(PackageName, String), diff --git a/crates/uv-resolver/src/fork_urls.rs b/crates/uv-resolver/src/fork_urls.rs new file mode 100644 index 0000000000000..102c0048a9853 --- /dev/null +++ b/crates/uv-resolver/src/fork_urls.rs @@ -0,0 +1,56 @@ +use std::collections::hash_map::Entry; + +use rustc_hash::FxHashMap; + +use distribution_types::Verbatim; +use pep508_rs::MarkerTree; +use pypi_types::VerbatimParsedUrl; +use uv_normalize::PackageName; + +use crate::ResolveError; + +/// See [`crate::resolver::SolveState`]. +#[derive(Default, Clone)] +pub(crate) struct ForkUrls(FxHashMap); + +impl ForkUrls { + /// Get the URL previously used for a package in this fork. + pub(crate) fn get(&self, package_name: &PackageName) -> Option<&VerbatimParsedUrl> { + self.0.get(package_name) + } + + /// Check that this is the only URL used for this package in this fork. + pub(crate) fn insert( + &mut self, + package_name: &PackageName, + url: &VerbatimParsedUrl, + fork_markers: &MarkerTree, + ) -> Result<(), ResolveError> { + match self.0.entry(package_name.clone()) { + Entry::Occupied(previous) => { + if previous.get() != url { + let conflicting_url = vec![ + previous.get().verbatim.verbatim().to_string(), + url.to_string(), + ]; + return if fork_markers == &MarkerTree::And(Vec::new()) { + Err(ResolveError::ConflictingUrls( + package_name.clone(), + conflicting_url, + )) + } else { + Err(ResolveError::ConflictingUrlsInFork { + package_name: package_name.clone(), + urls: conflicting_url, + fork_markers: fork_markers.clone(), + }) + }; + } + } + Entry::Vacant(vacant) => { + vacant.insert(url.clone()); + } + } + Ok(()) + } +} diff --git a/crates/uv-resolver/src/lib.rs b/crates/uv-resolver/src/lib.rs index 65a4223d3aa11..a1496bd13d4cc 100644 --- a/crates/uv-resolver/src/lib.rs +++ b/crates/uv-resolver/src/lib.rs @@ -30,6 +30,7 @@ mod error; mod exclude_newer; mod exclusions; mod flat_index; +mod fork_urls; mod lock; mod manifest; mod marker; diff --git a/crates/uv-resolver/src/manifest.rs b/crates/uv-resolver/src/manifest.rs index 22ae0df8bbbb2..3b89069b0915f 100644 --- a/crates/uv-resolver/src/manifest.rs +++ b/crates/uv-resolver/src/manifest.rs @@ -97,6 +97,16 @@ impl Manifest { &'a self, markers: Option<&'a MarkerEnvironment>, mode: DependencyMode, + ) -> impl Iterator + 'a { + self.requirements_no_overrides(markers, mode) + .chain(self.overrides(markers, mode)) + } + + /// Like [`Self::requirements`], but without the overrides. + pub fn requirements_no_overrides<'a>( + &'a self, + markers: Option<&'a MarkerEnvironment>, + mode: DependencyMode, ) -> impl Iterator + 'a { match mode { // Include all direct and transitive requirements, with constraints and overrides applied. @@ -119,11 +129,6 @@ impl Manifest { self.constraints .requirements() .filter(move |requirement| requirement.evaluate_markers(markers, &[])), - ) - .chain( - self.overrides - .requirements() - .filter(move |requirement| requirement.evaluate_markers(markers, &[])), ), ), // Include direct requirements, with constraints and overrides applied. @@ -131,7 +136,28 @@ impl Manifest { self.overrides .apply(&self.requirements) .chain(self.constraints.requirements()) - .chain(self.overrides.requirements()) + .filter(move |requirement| requirement.evaluate_markers(markers, &[])), + ), + } + } + + /// Only the overrides from [`Self::requirements`]. + pub fn overrides<'a>( + &'a self, + markers: Option<&'a MarkerEnvironment>, + mode: DependencyMode, + ) -> impl Iterator + 'a { + match mode { + // Include all direct and transitive requirements, with constraints and overrides applied. + DependencyMode::Transitive => Either::Left( + self.overrides + .requirements() + .filter(move |requirement| requirement.evaluate_markers(markers, &[])), + ), + // Include direct requirements, with constraints and overrides applied. + DependencyMode::Direct => Either::Right( + self.overrides + .requirements() .filter(move |requirement| requirement.evaluate_markers(markers, &[])), ), } diff --git a/crates/uv-resolver/src/pubgrub/dependencies.rs b/crates/uv-resolver/src/pubgrub/dependencies.rs index fced9adb5a052..bea613bd948bb 100644 --- a/crates/uv-resolver/src/pubgrub/dependencies.rs +++ b/crates/uv-resolver/src/pubgrub/dependencies.rs @@ -1,9 +1,10 @@ +use std::iter; + use itertools::Itertools; use pubgrub::range::Range; -use tracing::warn; +use tracing::{debug, warn}; -use distribution_types::Verbatim; -use pep440_rs::Version; +use pep440_rs::{Version, VersionSpecifiers}; use pypi_types::{ ParsedArchiveUrl, ParsedDirectoryUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, Requirement, RequirementSource, @@ -11,6 +12,8 @@ use pypi_types::{ use uv_git::GitResolver; use uv_normalize::{ExtraName, PackageName}; +use crate::fork_urls::ForkUrls; +use crate::marker::is_disjoint; use crate::pubgrub::{PubGrubPackage, PubGrubPackageInner}; use crate::resolver::{Locals, Urls}; use crate::{PubGrubSpecifier, ResolveError}; @@ -24,6 +27,7 @@ impl PubGrubDependencies { pub(crate) fn from_requirements( flattened_requirements: &[&Requirement], source_name: Option<&PackageName>, + fork_urls: &ForkUrls, urls: &Urls, locals: &Locals, git: &GitResolver, @@ -31,15 +35,25 @@ impl PubGrubDependencies { let mut dependencies = Vec::new(); for requirement in flattened_requirements { // Add the package, plus any extra variants. - for result in std::iter::once(PubGrubRequirement::from_requirement( + for result in iter::once(PubGrubRequirement::from_requirement( requirement, None, + flattened_requirements, + fork_urls, urls, locals, git, )) .chain(requirement.extras.clone().into_iter().map(|extra| { - PubGrubRequirement::from_requirement(requirement, Some(extra), urls, locals, git) + PubGrubRequirement::from_requirement( + requirement, + Some(extra), + flattened_requirements, + fork_urls, + urls, + locals, + git, + ) })) { let PubGrubRequirement { package, version } = result?; @@ -97,77 +111,74 @@ pub(crate) struct PubGrubRequirement { impl PubGrubRequirement { /// Convert a [`Requirement`] to a PubGrub-compatible package and range. + /// + /// # Handling URLs + /// + /// Consider the following two cases: + /// requirements.in: + /// ```text + /// werkzeug==2.0.0 + /// werkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl + /// ``` + /// pyproject.toml: + /// ```toml + /// dependencies = [ + /// "iniconfig == 1.1.1 ; python_version < '3.12'", + /// "iniconfig @ git+https://github.com/pytest-dev/iniconfig@93f5930e668c0d1ddf4597e38dd0dea4e2665e7a ; python_version >= '3.12'", + /// ] + /// ``` + /// + /// In the former case, we want the URL to override the registry dependency, in the latter + /// case we want to fork and have one branch use the registry and the other the URL. + /// We have to know about this in `PubGrubRequirement::from_requirement`, but we only + /// fork after the current method. Instead, we can check if the registry and the URL + /// requirement for the same name are disjoint: If they are, they must end up in + /// different branches on forking, if they aren't, we know the URL overrides the + /// registry. We don't have to check for duplicates/collisions here, `ForkUrls` will + /// handle this after forking. + /// + /// For a registry requirement: + /// * If there is a URL override: Use that override, done. + /// * If there is a fork-specific URL: Use that, done. + /// * If there is another requirement in the same package with a URL requirement find a matching URL in `Urls`, use it to canonicalize the current URL. + /// * After forking: Check that this URL is the only URL in this fork. + /// + /// For a URL requirement: + /// * If there is a URL override: Use that override, done. + /// * Find a matching URL in `Urls`, use it to canonicalize the current URL. + /// * After forking: Check that this URL is the only URL in this fork. pub(crate) fn from_requirement( requirement: &Requirement, extra: Option, + flattened_requirements: &[&Requirement], + fork_urls: &ForkUrls, urls: &Urls, locals: &Locals, git: &GitResolver, ) -> Result { - match &requirement.source { + let (verbatim_url, parsed_url) = match &requirement.source { RequirementSource::Registry { specifier, .. } => { - // TODO(konsti): We're currently losing the index information here, but we need - // either pass it to `PubGrubPackage` or the `ResolverProvider` beforehand. - // If the specifier is an exact version, and the user requested a local version that's - // more precise than the specifier, use the local version instead. - let version = if let Some(expected) = locals.get(&requirement.name) { - specifier - .iter() - .map(|specifier| { - Locals::map(expected, specifier) - .map_err(ResolveError::InvalidVersion) - .and_then(|specifier| Ok(PubGrubSpecifier::try_from(&specifier)?)) - }) - .fold_ok(Range::full(), |range, specifier| { - range.intersection(&specifier.into()) - })? - } else { - PubGrubSpecifier::try_from(specifier)?.into() - }; - - Ok(Self { - package: PubGrubPackage::from_package( - requirement.name.clone(), - extra, - requirement.marker.clone(), - urls, - ), - version, - }) + return Self::from_registry_requirement( + specifier, + extra, + requirement, + flattened_requirements, + fork_urls, + urls, + locals, + git, + ); } RequirementSource::Url { subdirectory, location, url, } => { - let Some(expected) = urls.get(&requirement.name) else { - return Err(ResolveError::DisallowedUrl( - requirement.name.clone(), - url.to_string(), - )); - }; - let parsed_url = ParsedUrl::Archive(ParsedArchiveUrl::from_source( location.clone(), subdirectory.clone(), )); - if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) { - return Err(ResolveError::ConflictingUrlsTransitive( - requirement.name.clone(), - expected.verbatim.verbatim().to_string(), - url.verbatim().to_string(), - )); - } - - Ok(Self { - package: PubGrubPackage::from_url( - requirement.name.clone(), - extra, - requirement.marker.clone(), - expected.clone(), - ), - version: Range::full(), - }) + (url, parsed_url) } RequirementSource::Git { repository, @@ -176,71 +187,25 @@ impl PubGrubRequirement { url, subdirectory, } => { - let Some(expected) = urls.get(&requirement.name) else { - return Err(ResolveError::DisallowedUrl( - requirement.name.clone(), - url.to_string(), - )); - }; - let parsed_url = ParsedUrl::Git(ParsedGitUrl::from_source( repository.clone(), reference.clone(), *precise, subdirectory.clone(), )); - if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) { - return Err(ResolveError::ConflictingUrlsTransitive( - requirement.name.clone(), - expected.verbatim.verbatim().to_string(), - url.verbatim().to_string(), - )); - } - - Ok(Self { - package: PubGrubPackage::from_url( - requirement.name.clone(), - extra, - requirement.marker.clone(), - expected.clone(), - ), - version: Range::full(), - }) + (url, parsed_url) } RequirementSource::Path { url, install_path, lock_path, } => { - let Some(expected) = urls.get(&requirement.name) else { - return Err(ResolveError::DisallowedUrl( - requirement.name.clone(), - url.to_string(), - )); - }; - let parsed_url = ParsedUrl::Path(ParsedPathUrl::from_source( install_path.clone(), lock_path.clone(), url.to_url(), )); - if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) { - return Err(ResolveError::ConflictingUrlsTransitive( - requirement.name.clone(), - expected.verbatim.verbatim().to_string(), - url.verbatim().to_string(), - )); - } - - Ok(Self { - package: PubGrubPackage::from_url( - requirement.name.clone(), - extra, - requirement.marker.clone(), - expected.clone(), - ), - version: Range::full(), - }) + (url, parsed_url) } RequirementSource::Directory { editable, @@ -248,37 +213,115 @@ impl PubGrubRequirement { install_path, lock_path, } => { - let Some(expected) = urls.get(&requirement.name) else { - return Err(ResolveError::DisallowedUrl( - requirement.name.clone(), - url.to_string(), - )); - }; - let parsed_url = ParsedUrl::Directory(ParsedDirectoryUrl::from_source( install_path.clone(), lock_path.clone(), *editable, url.to_url(), )); - if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) { - return Err(ResolveError::ConflictingUrlsTransitive( - requirement.name.clone(), - expected.verbatim.verbatim().to_string(), - url.verbatim().to_string(), - )); - } + (url, parsed_url) + } + }; - Ok(Self { - package: PubGrubPackage::from_url( - requirement.name.clone(), - extra, - requirement.marker.clone(), - expected.clone(), - ), - version: Range::full(), + // If we have a URL override, apply it unconditionally. Otherwise, check that the URL is + // allowed and pick it's canonical from. We will check for conflicts (different URLs in + // the same fork) after forking using the `fork_urls` map. + let url = if let Some(override_url) = urls.get_override(&requirement.name) { + override_url + } else { + urls.canonicalize_allowed_url(&requirement.name, git, verbatim_url, &parsed_url)? + }; + Ok(Self { + package: PubGrubPackage::from_url( + requirement.name.clone(), + extra, + requirement.marker.clone(), + url.clone(), + ), + version: Range::full(), + }) + } + + #[allow(clippy::too_many_arguments)] + fn from_registry_requirement( + specifier: &VersionSpecifiers, + extra: Option, + requirement: &Requirement, + flattened_requirements: &[&Requirement], + fork_urls: &ForkUrls, + urls: &Urls, + locals: &Locals, + git: &GitResolver, + ) -> Result { + // If the specifier is an exact version, and the user requested a local version that's + // more precise than the specifier, use the local version instead. + let version = if let Some(expected) = locals.get(&requirement.name) { + specifier + .iter() + .map(|specifier| { + Locals::map(expected, specifier) + .map_err(ResolveError::InvalidVersion) + .and_then(|specifier| Ok(PubGrubSpecifier::try_from(&specifier)?)) }) - } + .fold_ok(Range::full(), |range, specifier| { + range.intersection(&specifier.into()) + })? + } else { + PubGrubSpecifier::try_from(specifier)?.into() + }; + + // See docstring, registry case. + let url = if let Some(override_url) = urls.get_override(&requirement.name) { + // The override URL is the canonical form. + Some(override_url.clone()) + } else if let Some(fork_url) = fork_urls.get(&requirement.name) { + // Fork URLs are already canonicalized. + Some(fork_url.clone()) + } else if let Some(url) = flattened_requirements + .iter() + // Filter for other requirements with the same name ... + .filter(|other| other.name == requirement.name && other != &&requirement) + // ... that will not be in a different fork by having disjoint markers. + .filter( + |other| match (requirement.marker.as_ref(), other.marker.as_ref()) { + (Some(requirement_marker), Some(other_marker)) => { + !is_disjoint(requirement_marker, other_marker) + } + // Either having a universal marker means they in every fork. + _ => true, + }, + ) + .find_map(|other| other.source.to_verbatim_parsed_url()) + { + Some( + urls.canonicalize_allowed_url( + &requirement.name, + git, + &url.verbatim, + &url.parsed_url, + )? + .clone(), + ) + } else { + None + }; + + if url.is_none() && urls.get_regular(&requirement.name).is_some() { + debug!( + "Fork uses registry instead of URL distribution for {}", + requirement.name + ); } + + let requirement = Self { + package: PubGrubPackage::from_package( + requirement.name.clone(), + extra, + requirement.marker.clone(), + url, + ), + version, + }; + Ok(requirement) } } diff --git a/crates/uv-resolver/src/pubgrub/package.rs b/crates/uv-resolver/src/pubgrub/package.rs index afbc3a908dea0..f3c78a0abeb26 100644 --- a/crates/uv-resolver/src/pubgrub/package.rs +++ b/crates/uv-resolver/src/pubgrub/package.rs @@ -6,8 +6,6 @@ use pep508_rs::MarkerTree; use pypi_types::VerbatimParsedUrl; use uv_normalize::{ExtraName, GroupName, PackageName}; -use crate::resolver::Urls; - /// [`Arc`] wrapper around [`PubGrubPackageInner`] to make cloning (inside PubGrub) cheap. #[derive(Debug, Clone, Eq, Hash, PartialEq, PartialOrd, Ord)] pub struct PubGrubPackage(Arc); @@ -131,14 +129,13 @@ pub enum PubGrubPackageInner { } impl PubGrubPackage { - /// Create a [`PubGrubPackage`] from a package name and version. + /// Create a [`PubGrubPackage`] from a package name and, optionally, a URL. pub(crate) fn from_package( name: PackageName, extra: Option, mut marker: Option, - urls: &Urls, + url: Option, ) -> Self { - let url = urls.get(&name).cloned(); // Remove all extra expressions from the marker, since we track extras // separately. This also avoids an issue where packages added via // extras end up having two distinct marker expressions, which in turn diff --git a/crates/uv-resolver/src/pubgrub/priority.rs b/crates/uv-resolver/src/pubgrub/priority.rs index e293e80114730..fd82d81d130a3 100644 --- a/crates/uv-resolver/src/pubgrub/priority.rs +++ b/crates/uv-resolver/src/pubgrub/priority.rs @@ -140,6 +140,10 @@ pub(crate) enum PubGrubPriority { Singleton(Reverse), /// The package was specified via a direct URL. + /// + /// N.B.: URLs need to have priority over registry distributions for correctly matching registry + /// distributions to URLs, see [`PubGrubPackage::from_package`] an + /// [`crate::fork_urls::ForkUrls`]. DirectUrl(Reverse), /// The package is the root package. diff --git a/crates/uv-resolver/src/resolver/mod.rs b/crates/uv-resolver/src/resolver/mod.rs index d2bd5ece30b78..da72951922673 100644 --- a/crates/uv-resolver/src/resolver/mod.rs +++ b/crates/uv-resolver/src/resolver/mod.rs @@ -40,6 +40,7 @@ use uv_types::{BuildContext, HashStrategy, InstalledPackagesProvider}; use crate::candidate_selector::{CandidateDist, CandidateSelector}; use crate::dependency_provider::UvDependencyProvider; use crate::error::ResolveError; +use crate::fork_urls::ForkUrls; use crate::manifest::Manifest; use crate::pins::FilePins; use crate::preferences::Preferences; @@ -79,7 +80,7 @@ pub struct Resolver { project: Option, requirements: Vec, @@ -324,6 +325,7 @@ impl ResolverState ResolverState ResolverState, ) -> Result { - let result = self.get_dependencies(package, version, markers, priorities, request_sink); + let result = self.get_dependencies( + package, + version, + fork_urls, + markers, + priorities, + request_sink, + ); if self.markers.is_some() { return result.map(|deps| match deps { Dependencies::Available(deps) => ForkedDependencies::Unforked(deps), @@ -930,6 +941,7 @@ impl ResolverState, @@ -949,6 +961,7 @@ impl ResolverState ResolverState ResolverState)>, prefetcher: &BatchPrefetcher, ) -> Result<(), ResolveError> { + // Check for self-dependencies. if dependencies .iter() .any(|(dependency, _)| dependency == &self.next) @@ -1680,6 +1702,18 @@ impl SolveState { } .into()); } + // Check for conflicting URLs. + for (package, _version) in &dependencies { + if let PubGrubPackageInner::Package { + name, + url: Some(url), + .. + } = &**package + { + self.urls.insert(name, url, &self.markers)?; + }; + } + self.pubgrub.add_package_version_dependencies( self.next.clone(), version.clone(), diff --git a/crates/uv-resolver/src/resolver/urls.rs b/crates/uv-resolver/src/resolver/urls.rs index eb787e60de8f6..2c05dbf9aae7d 100644 --- a/crates/uv-resolver/src/resolver/urls.rs +++ b/crates/uv-resolver/src/resolver/urls.rs @@ -1,22 +1,30 @@ use rustc_hash::FxHashMap; use same_file::is_same_file; +use std::iter; use tracing::debug; use cache_key::CanonicalUrl; use distribution_types::Verbatim; -use pep508_rs::MarkerEnvironment; -use pypi_types::{ - ParsedArchiveUrl, ParsedDirectoryUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, - RequirementSource, VerbatimParsedUrl, -}; +use pep508_rs::{MarkerEnvironment, VerbatimUrl}; +use pypi_types::{ParsedDirectoryUrl, ParsedUrl, VerbatimParsedUrl}; use uv_git::GitResolver; use uv_normalize::PackageName; use crate::{DependencyMode, Manifest, ResolveError}; -/// A map of package names to their associated, required URLs. +/// The URLs that are allowed for packages. +/// +/// These are the URLs used in the root package or by other URL dependencies (including path +/// dependencies). They take precedence over requirements by version (except for the special case +/// where we are in a fork that doesn't use any of the URL(s) used in other forks). Each fork may +/// only use a single URL. #[derive(Debug, Default)] -pub(crate) struct Urls(FxHashMap); +pub(crate) struct Urls { + /// URL requirements in the root package, from other URL requirements or from constraints. + regular: FxHashMap>, + /// URL requirements in overrides. + overrides: FxHashMap, +} impl Urls { pub(crate) fn from_manifest( @@ -25,155 +33,132 @@ impl Urls { git: &GitResolver, dependencies: DependencyMode, ) -> Result { - let mut urls: FxHashMap = FxHashMap::default(); + let mut urls: FxHashMap> = FxHashMap::default(); + let mut overrides: FxHashMap = FxHashMap::default(); // Add all direct requirements and constraints. If there are any conflicts, return an error. - for requirement in manifest.requirements(markers, dependencies) { - match &requirement.source { - RequirementSource::Registry { .. } => {} - RequirementSource::Url { - subdirectory, - location, - url, - } => { - let url = VerbatimParsedUrl { - parsed_url: ParsedUrl::Archive(ParsedArchiveUrl::from_source( - location.clone(), - subdirectory.clone(), - )), - verbatim: url.clone(), - }; - if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) { - if !Self::same_resource(&previous.parsed_url, &url.parsed_url, git) { - return Err(ResolveError::ConflictingUrlsDirect( - requirement.name.clone(), - previous.verbatim.verbatim().to_string(), - url.verbatim.verbatim().to_string(), - )); + for requirement in manifest.requirements_no_overrides(markers, dependencies) { + let Some(url) = requirement.source.to_verbatim_parsed_url() else { + // Registry requirement + continue; + }; + + let package_urls = urls.entry(requirement.name.clone()).or_default(); + if let Some(package_url) = package_urls + .iter_mut() + .find(|package_url| same_resource(&package_url.parsed_url, &url.parsed_url, git)) + { + // Allow editables to override non-editables. + let previous_editable = package_url.is_editable(); + *package_url = url; + if previous_editable { + if let VerbatimParsedUrl { + parsed_url: ParsedUrl::Directory(ParsedDirectoryUrl { editable, .. }), + verbatim: _, + } = package_url + { + if !*editable { + debug!("Allowing an editable variant of {}", &package_url.verbatim); + *editable = true; } - } - } - RequirementSource::Path { - install_path, - lock_path, - url, - } => { - let url = VerbatimParsedUrl { - parsed_url: ParsedUrl::Path(ParsedPathUrl::from_source( - install_path.clone(), - lock_path.clone(), - url.to_url(), - )), - verbatim: url.clone(), }; - if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) { - if !Self::same_resource(&previous.parsed_url, &url.parsed_url, git) { - return Err(ResolveError::ConflictingUrlsDirect( - requirement.name.clone(), - previous.verbatim.verbatim().to_string(), - url.verbatim.verbatim().to_string(), - )); - } - } } - RequirementSource::Directory { - install_path, - lock_path, - editable, - url, - } => { - let url = VerbatimParsedUrl { - parsed_url: ParsedUrl::Directory(ParsedDirectoryUrl::from_source( - install_path.clone(), - lock_path.clone(), - *editable, - url.to_url(), - )), - verbatim: url.clone(), - }; - match urls.entry(requirement.name.clone()) { - std::collections::hash_map::Entry::Occupied(mut entry) => { - let previous = entry.get(); - if Self::same_resource(&previous.parsed_url, &url.parsed_url, git) { - // Allow editables to override non-editables. - if previous.parsed_url.is_editable() && !editable { - debug!( - "Allowing {} as an editable variant of {}", - &previous.verbatim, url.verbatim - ); - } else { - entry.insert(url.clone()); - } - continue; - } + } else { + package_urls.push(url); + } + } - return Err(ResolveError::ConflictingUrlsDirect( - requirement.name.clone(), - previous.verbatim.verbatim().to_string(), - url.verbatim.verbatim().to_string(), - )); - } - std::collections::hash_map::Entry::Vacant(entry) => { - entry.insert(url.clone()); - } - } - } - RequirementSource::Git { - repository, - reference, - precise, - subdirectory, - url, - } => { - let url = VerbatimParsedUrl { - parsed_url: ParsedUrl::Git(ParsedGitUrl::from_source( - repository.clone(), - reference.clone(), - *precise, - subdirectory.clone(), - )), - verbatim: url.clone(), - }; - if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) { - if !Self::same_resource(&previous.parsed_url, &url.parsed_url, git) { - return Err(ResolveError::ConflictingUrlsDirect( - requirement.name.clone(), - previous.verbatim.verbatim().to_string(), - url.verbatim.verbatim().to_string(), - )); - } - } + for requirement in manifest.overrides(markers, dependencies) { + let Some(url) = requirement.source.to_verbatim_parsed_url() else { + // Registry requirement + continue; + }; + // With an override `anyio==0.0.0` and a requirements.txt entry `./anyio`, we still use + // the URL. See `allow_recursive_url_local_path_override_constraint` + urls.remove(&requirement.name); + let previous = overrides.insert(requirement.name.clone(), url.clone()); + if let Some(previous) = previous { + if !same_resource(&previous.parsed_url, &url.parsed_url, git) { + return Err(ResolveError::ConflictingOverrideUrls( + requirement.name.clone(), + previous.verbatim.verbatim().to_string(), + url.verbatim.verbatim().to_string(), + )); } } } - Ok(Self(urls)) + Ok(Self { + regular: urls, + overrides, + }) } - /// Return the [`VerbatimUrl`] associated with the given package name, if any. - pub(crate) fn get(&self, package: &PackageName) -> Option<&VerbatimParsedUrl> { - self.0.get(package) + /// Return the allowed [`VerbatimUrl`]s for given package from regular requirements and + /// constraints (but not overrides), if any. + /// + /// It's more than one more URL if they are in different forks (or conflict after forking). + pub(crate) fn get_regular(&self, package: &PackageName) -> Option<&[VerbatimParsedUrl]> { + self.regular.get(package).map(Vec::as_slice) } - /// Returns `true` if the [`ParsedUrl`] instances point to the same resource. - pub(crate) fn same_resource(a: &ParsedUrl, b: &ParsedUrl, git: &GitResolver) -> bool { - match (a, b) { - (ParsedUrl::Archive(a), ParsedUrl::Archive(b)) => { - a.subdirectory == b.subdirectory - && CanonicalUrl::new(&a.url) == CanonicalUrl::new(&b.url) - } - (ParsedUrl::Git(a), ParsedUrl::Git(b)) => { - a.subdirectory == b.subdirectory && git.same_ref(&a.url, &b.url) - } - (ParsedUrl::Path(a), ParsedUrl::Path(b)) => { - a.install_path == b.install_path - || is_same_file(&a.install_path, &b.install_path).unwrap_or(false) - } - (ParsedUrl::Directory(a), ParsedUrl::Directory(b)) => { - a.install_path == b.install_path - || is_same_file(&a.install_path, &b.install_path).unwrap_or(false) - } - _ => false, + /// Return the [`VerbatimUrl`] override for the given package, if any. + pub(crate) fn get_override(&self, package: &PackageName) -> Option<&VerbatimParsedUrl> { + self.overrides.get(package) + } + + /// Check if a URL is allowed (known), and if so, return its canonical form. + pub(crate) fn canonicalize_allowed_url<'a>( + &'a self, + package_name: &'a PackageName, + git: &'a GitResolver, + verbatim_url: &'a VerbatimUrl, + parsed_url: &'a ParsedUrl, + ) -> Result<&'a VerbatimParsedUrl, ResolveError> { + let Some(expected) = self.get_regular(package_name) else { + return Err(ResolveError::DisallowedUrl( + package_name.clone(), + verbatim_url.to_string(), + )); + }; + + let matching_urls: Vec<_> = expected + .iter() + .filter(|requirement| same_resource(&requirement.parsed_url, parsed_url, git)) + .collect(); + + let [allowed_url] = matching_urls.as_slice() else { + return Err(ResolveError::ConflictingUrls( + package_name.clone(), + matching_urls + .into_iter() + .map(|parsed_url| parsed_url.verbatim.verbatim().to_string()) + .chain(iter::once(verbatim_url.verbatim().to_string())) + .collect(), + )); + }; + Ok(*allowed_url) + } +} + +/// Returns `true` if the [`ParsedUrl`] instances point to the same resource. +fn same_resource(a: &ParsedUrl, b: &ParsedUrl, git: &GitResolver) -> bool { + match (a, b) { + (ParsedUrl::Archive(a), ParsedUrl::Archive(b)) => { + a.subdirectory == b.subdirectory + && CanonicalUrl::new(&a.url) == CanonicalUrl::new(&b.url) + } + (ParsedUrl::Git(a), ParsedUrl::Git(b)) => { + a.subdirectory == b.subdirectory && git.same_ref(&a.url, &b.url) + } + (ParsedUrl::Path(a), ParsedUrl::Path(b)) => { + a.install_path == b.install_path + || is_same_file(&a.install_path, &b.install_path).unwrap_or(false) + } + (ParsedUrl::Directory(a), ParsedUrl::Directory(b)) => { + a.install_path == b.install_path + || is_same_file(&a.install_path, &b.install_path).unwrap_or(false) } + _ => false, } } diff --git a/crates/uv/tests/pip_compile.rs b/crates/uv/tests/pip_compile.rs index b35d921eacbb6..8c1a6e7481e6d 100644 --- a/crates/uv/tests/pip_compile.rs +++ b/crates/uv/tests/pip_compile.rs @@ -8308,6 +8308,8 @@ requires-python = ">3.8" /// Allow URL dependencies recursively for local source trees, but respect both overrides _and_ /// constraints. +/// +/// We have app -> lib -> anyio and root has a directory requirement on app. #[test] fn allow_recursive_url_local_path_override_constraint() -> Result<()> { let context = TestContext::new("3.12");