diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b7a170aa..249d6b19 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,4 +65,4 @@ repos: - id: insert-license files: \.py$ args: [--license-filepath, .github/disclaimer.txt, --no-extra-eol] - exclude: ^(tests/channel_testing_utils\.py|mamba_utils\.py) # extend global exclude + exclude: ^(tests/repodata_time_machine.py|mamba_utils\.py) # extend global exclude diff --git a/docs/libmamba-vs-classic.md b/docs/libmamba-vs-classic.md index ef77edb6..eb8ec4b9 100644 --- a/docs/libmamba-vs-classic.md +++ b/docs/libmamba-vs-classic.md @@ -251,6 +251,52 @@ It runs in C, using memory-efficient data structures. `conda-libmamba-solver` uses the same IO stack as `conda` classic. In the past, we relied on `libmamba`'s IO for repodata fetching, but this is not the case anymore. +## Practical examples of solver differences + +### Python 3.11 + very old Pydantic + +> Case study inspired by [issue #115](https://github.com/conda/conda-libmamba-solver/issues/115) + +The following environment file will give different solutions with `classic` and `conda-libmamba-solver`. + +```yaml +name: gmso +channels: + - conda-forge +dependencies: + - numpy + - sympy + - unyt <=2.8 + - boltons + - lxml + - pydantic <1.9.0 + - networkx + - ele >=0.2.0 + - forcefield-utilities +``` + +- `classic`: `python 3.10` + `pydantic 1.8.2` +- `conda-libmamba-solver`: `python 3.11` + `pydantic 0.18.2` + +This is an example of an underspecified input. There's no `python` dependency (or version) listed +in the environment file, so the solver has to figure it out. The solver doesn't necessarily know +which dependency is more "important". `classic` will prioritize getting a more recent `pydantic` at +the expense of an older `python`, and `conda-libmamba-solver` will prefer having `python 3.11`, +even if it means going all the way down to `pydantic 0.18.2` (which was packaged as `noarch`) and +thus compatible with _any_ Python version. + +### cudatoolkit present in a `cpuonly` environment + +> Originally reported in [issue #131](https://github.com/conda/conda-libmamba-solver/issues/131) + +This is an example of a [known limitation in how `libsolv` processes the `track_features` +metadata](https://mamba.readthedocs.io/en/latest/advanced_usage/package_resolution.html). `libsolv` +will only "see" the first level of `track_features`, which down-prioritize packages. If you depend +on 2nd-order dependencies to track prioritized variants (which conda `classic` successfully +processes), you will get mixed results. This can be solved at the packaging level, where all the +variants rely on the package _mutex_ directly, instead of relying on packages that depend on the +mutex. + ## More information If you want to read (even more) about this, please check the following resources: diff --git a/news/197-solver-differences b/news/197-solver-differences new file mode 100644 index 00000000..d8ff9281 --- /dev/null +++ b/news/197-solver-differences @@ -0,0 +1,19 @@ +### Enhancements + +* + +### Bug fixes + +* + +### Deprecations + +* + +### Docs + +* Document known solver behavior differences. (#115, #131 via #197) + +### Other + +* Add tests reproducing the known solver differences. (#115, #131 via #197) diff --git a/tests/repodata_time_machine.py b/tests/repodata_time_machine.py new file mode 100644 index 00000000..c050ab36 --- /dev/null +++ b/tests/repodata_time_machine.py @@ -0,0 +1,166 @@ +# Copyright (C) 2023 conda +# SPDX-License-Identifier: BSD-3-Clause +""" +conda repodata time machine + +Given a date and a channel, this script will: + +- Download a local copy of the (unpatched) repodata +- Trim to the closest timestamp +- Download the closest repodata patches for that channel +- Apply the patches +- Generate a ready-to-use local channel +""" +import bz2 +import json +import os +import urllib.request +from argparse import ArgumentParser +from datetime import datetime + +import requests +from conda.base.context import context +from conda.models.channel import Channel +from conda_index.index import _apply_instructions +from conda_package_handling.api import extract as cph_extract + +PATCHED_CHANNELS = {"defaults", "main", "conda-forge"} + + +def cli(): + p = ArgumentParser() + p.add_argument("channels", nargs="+", metavar="channel") + p.add_argument("-t", "--timestamp", required=True, help="YYYY-MM-DD HH:MM:SS. Assumes UTC.") + p.add_argument( + "-s", + "--subdirs", + default=f"{context.subdir},noarch", + help="Comma-separated list of subdirs to download. Include 'noarch' explicitly if needed.", + ) + return p.parse_args() + + +def download_repodata(channel, subdirs=None): + "Download remote repodata JSON payload to a temporary location in disk" + c = Channel(channel) + if c.canonical_name in PATCHED_CHANNELS: + repodata_fn = "repodata_from_packages" + else: + repodata_fn = "repodata" + subdirs = subdirs or context.subdirs + for url in c.urls(with_credentials=True, subdirs=subdirs): + subdir = url.strip("/").split("/")[-1] + urllib.request.urlretrieve(f"{url}/{repodata_fn}.json.bz2", f"{repodata_fn}.json.bz2") + + with open(f"{repodata_fn}.json.bz2", "rb") as f: + with open(f"{repodata_fn}.json", "wb") as g: + g.write(bz2.decompress(f.read())) + + yield f"{repodata_fn}.json", subdir + + +def trim_to_timestamp(repodata, timestamp: float): + trimmed_tar_pkgs = {} + trimmed_conda_pkgs = {} + with open(repodata) as f: + data = json.load(f) + for name, pkg in data["packages"].items(): + if pkg.get("timestamp", 0) <= timestamp: + trimmed_tar_pkgs[name] = pkg + for name, pkg in data["packages.conda"].items(): + if pkg.get("timestamp", 0) <= timestamp: + trimmed_conda_pkgs[name] = pkg + data["packages"] = trimmed_tar_pkgs + data["packages.conda"] = trimmed_conda_pkgs + fn = f"trimmed.{os.path.basename(repodata)}" + with open(fn, "w") as f: + json.dump(data, f) + return fn + + +def download_patches(channel, timestamp: float): + name = Channel(channel).canonical_name + if name != "conda-forge": + raise NotImplementedError("Only conda-forge is supported for now") + + url = "https://api.anaconda.org/package/conda-forge/conda-forge-repodata-patches/files" + r = requests.get(url) + r.raise_for_status() + pkgs = r.json() + closest_older = None + for pkg in sorted(pkgs, key=lambda pkg: pkg["attrs"]["timestamp"]): + if pkg["attrs"]["timestamp"] <= timestamp: + closest_older = pkg + else: + break + if closest_older is None: + raise ValueError(f"No patch found for timestamp {timestamp}") + + fn = closest_older["basename"].split("/")[-1] + urllib.request.urlretrieve(f"https:{closest_older['download_url']}", fn) + + extract_path = f"conda-forge-repodata-patches-{closest_older['version']}" + cph_extract(fn, dest_dir=extract_path) + return extract_path + + +def apply_patch(repodata_file, patch): + with open(repodata_file) as f, open(patch) as g: + repodata = json.load(f) + instructions = json.load(g) + fn = f"patched.{os.path.basename(repodata_file)}" + with open(fn, "w") as f: + patched = _apply_instructions(None, repodata, instructions) + json.dump(patched, f, indent=2) + return fn + + +def repodata_time_machine(channels, timestamp_str, subdirs=None): + horizon = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S") + timestamp = horizon.timestamp() * 1000 + original_dir = os.getcwd() + try: + workdir = f"repodata-{timestamp_str.replace(' ', '-').replace(':','-').replace('.', '-')}" + os.makedirs(workdir, exist_ok=True) + os.chdir(workdir) + # Download repodata + for channel in channels: + print("Rolling back", channel, "to", horizon) + channel_name = Channel(channel).canonical_name + os.makedirs(channel_name, exist_ok=True) + os.chdir(channel_name) + must_patch = channel_name in PATCHED_CHANNELS + if must_patch: + print(" Getting patches") + patch_dir = os.path.abspath(download_patches(channel, timestamp)) + for repodata, subdir in download_repodata(channel, subdirs=subdirs): + print(" Downloaded", repodata, "for", subdir) + print(" Trimming...") + abs_repodata = os.path.abspath(repodata) + os.makedirs(subdir, exist_ok=True) + os.chdir(subdir) + trimmed = trim_to_timestamp(abs_repodata, timestamp) + if must_patch: + print(" Patching...") + instructions = f"{patch_dir}/{subdir}/patch_instructions.json" + patched = apply_patch(trimmed, instructions) + if not os.path.exists("repodata.json"): + os.symlink(patched, "repodata.json") + else: + if not os.path.exists("repodata.json"): + os.symlink(trimmed, "repodata.json") + os.chdir("..") + os.chdir("..") + return workdir + finally: + os.chdir(original_dir) + + +def main(): + args = cli() + return repodata_time_machine(args.channels, args.timestamp, args.subdirs.split(",")) + + +if __name__ == "__main__": + main() + print("Done!") diff --git a/tests/requirements.txt b/tests/requirements.txt index a52781a4..8a252fc1 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,4 @@ conda-forge::pytest-xprocess +conda-index # needed for many conda tests flask diff --git a/tests/test_solver_differences.py b/tests/test_solver_differences.py new file mode 100644 index 00000000..0d10a415 --- /dev/null +++ b/tests/test_solver_differences.py @@ -0,0 +1,222 @@ +# Copyright (C) 2022 Anaconda, Inc +# Copyright (C) 2023 conda +# SPDX-License-Identifier: BSD-3-Clause +""" +This module collects examples of environments that were hard to solve, required +workarounds or didn't meet users' expectations... specially if compared to conda classic. +""" +import json +import os + +import pytest +from conda.common.compat import on_linux + +from .repodata_time_machine import repodata_time_machine +from .utils import conda_subprocess + + +@pytest.mark.skipif(not on_linux, reason="Only relevant on Linux") +def test_pydantic_182_not_on_python_311(): + """ + See daico007's report on https://github.com/conda/conda-libmamba-solver/issues/115 + + - The original environment file didn't specify a Python version. + - conda classic does find that Python 3.10 and pydantic 1.8.2 are compatible + - libsolv has to try all Python versions, starting with 3.11 as of writing, + and then tries different pydantic versions. It finds 0.18.2 is compatible, but + because it's noarch with an open-ended upper bound. + - If we do specify that we want a Python version for which pydantic 1.8.2 is available, + libsolv correctly finds it. + """ + env = os.environ.copy() + env["CONDA_SUBDIR"] = "linux-64" + args = ( + "create", + "-n", + "unused", + "--dry-run", + "--override-channels", + "-c", + "conda-forge", + "--json", + ) + pkgs = ( + "numpy", + "sympy", + "unyt<=2.8", + "boltons", + "lxml", + "pydantic<1.9.0", + "networkx", + "ele>=0.2.0", + "forcefield-utilities", + ) + p = conda_subprocess( + *args, + "--solver=classic", + *pkgs, + env=env, + ) + data = json.loads(p.stdout) + pydantic = next(pkg for pkg in data["actions"]["LINK"] if pkg["name"] == "pydantic") + assert pydantic["version"] == "1.8.2" + + p = conda_subprocess( + *args, + "--solver=libmamba", + *pkgs, + ) + data = json.loads(p.stdout) + pydantic = next(pkg for pkg in data["actions"]["LINK"] if pkg["name"] == "pydantic") + assert pydantic["version"] != "1.8.2" + + p = conda_subprocess( + *args, + "--solver=libmamba", + *pkgs, + "python<3.11", + ) + data = json.loads(p.stdout) + pydantic = next(pkg for pkg in data["actions"]["LINK"] if pkg["name"] == "pydantic") + assert pydantic["version"] == "1.8.2" + + +@pytest.mark.skipif(not on_linux, reason="Only relevant on Linux") +def test_gpu_cpu_mutexes(): + """ + See: + - https://github.com/conda/conda-libmamba-solver/issues/115#issuecomment-1399040871 + - https://github.com/conda/conda-libmamba-solver/issues/115#issuecomment-1399040867 + - https://github.com/conda/conda-libmamba-solver/issues/131 + + This behaviour difference is known and explained at + https://github.com/conda/conda-libmamba-solver/issues/131#issuecomment-1440745813. + + If at some point this changes (e.g. libmamba fix), this test will capture it. + """ + args = ( + "create", + "-n", + "unused", + "--dry-run", + "--json", + "--override-channels", + "-c", + "conda-forge", + "-c", + "pyg", + "-c", + "pytorch", + ) + pkgs = ( + "cpuonly", + "pyg=2.1.0", + "python=3.9", + "pytorch::pytorch=1.12", + ) + env = os.environ.copy() + env["CONDA_SUBDIR"] = "linux-64" + p = conda_subprocess( + *args, + "--solver=classic", + *pkgs, + env=env, + ) + data = json.loads(p.stdout) + found = 0 + target_pkgs = ("pytorch", "pyg") + for pkg in data["actions"]["LINK"]: + if pkg["name"] in target_pkgs: + found += 1 + assert "cpu" in pkg["build_string"] + elif pkg["name"] == "cudatoolkit": + raise AssertionError("CUDA shouldn't be installed due to 'cpuonly'") + assert found == len(target_pkgs) + + p = conda_subprocess( + *args, + "--solver=libmamba", + *pkgs, + env=env, + ) + data = json.loads(p.stdout) + # This should not happen, but it does. See docstring. + assert next(pkg for pkg in data["actions"]["LINK"] if pkg["name"] == "cudatoolkit") + + p = conda_subprocess( + *args, + "--solver=libmamba", + "cpuonly", + "pyg=2.1.0", + "python=3.9", + "pytorch::pytorch", # more recent pytorch versions seem to be properly packaged + env=env, + ) + data = json.loads(p.stdout) + # This should not happen, but it does. See docstring. + assert not next((pkg for pkg in data["actions"]["LINK"] if pkg["name"] == "cudatoolkit"), None) + + +@pytest.mark.skipif(not on_linux, reason="Slow test, only run on Linux") +def test_old_panel(tmp_path): + """ + https://github.com/conda/conda-libmamba-solver/issues/64 + + Note we cannot reproduce the original error even with the "time-machine'd" repodata. + """ + os.chdir(tmp_path) + print("Patching repodata...") + old_repodata = os.path.abspath( + repodata_time_machine( + channels=["conda-forge", "pyviz/label/dev"], + timestamp_str="2022-06-16 12:31:00", + subdirs=("osx-64", "noarch"), + ) + ) + with open(f"{old_repodata}/conda-forge/osx-64/repodata.json") as f: + data = json.load(f) + # Make sure we have patched the repodata correctly + # Python 3.11 only appeared in October 2022 + assert "python-3.11.0" not in data + + channel_prefix = f"file://{old_repodata}/" + env = os.environ.copy() + env["CONDA_SUBDIR"] = "osx-64" + env["CONDA_REPODATA_THREADS"] = "1" + env["CONDA_DEFAULT_THREADS"] = "1" + env["CONDA_FETCH_THREADS"] = "1" + env["CONDA_REMOTE_CONNECT_TIMEOUT_SECS"] = "1" + env["CONDA_REMOTE_MAX_RETRIES"] = "1" + env["CONDA_REMOTE_BACKOFF_FACTOR"] = "1" + env["CONDA_REMOTE_READ_TIMEOUT_SECS"] = "1" + args = ( + "create", + "-n", + "unused", + "--dry-run", + "--json", + "--override-channels", + "-c", + f"{channel_prefix}pyviz/label/dev", + "-c", + f"{channel_prefix}conda-forge", + "--repodata-fn=repodata.json", + ) + pkgs = ( + "python=3.8", + "lumen", + ) + + # conda-libmamba-solver gets an older version of panel + for solver in ("classic", "libmamba"): + print("Solving with", solver) + p = conda_subprocess( + *args, + "--solver", + solver, + *pkgs, + env=env, + ) + data = json.loads(p.stdout) + panel = next(pkg for pkg in data["actions"]["LINK"] if pkg["name"] == "panel") + assert panel["version"] == "0.14.0a2" diff --git a/tests/utils.py b/tests/utils.py index 8ceb69d0..5ace7629 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -8,20 +8,20 @@ from ruamel.yaml import YAML -def conda_subprocess(*args, explain=False, **kwargs) -> CompletedProcess: +def conda_subprocess(*args, explain=False, capture_output=True, **kwargs) -> CompletedProcess: cmd = [sys.executable, "-m", "conda", *[str(a) for a in args]] if explain: print("+", " ".join(cmd)) p = run( cmd, - capture_output=True, - text=True, + capture_output=capture_output, + text=kwargs.pop("text", capture_output), **kwargs, ) - if p.returncode: + if capture_output and p.returncode: print(p.stdout) print(p.stderr, file=sys.stderr) - p.check_returncode() + p.check_returncode() return p