From c425779c9c69e118cab8dd9bde5a92ca8fc6a13b Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sun, 10 Nov 2024 01:29:18 +0100 Subject: [PATCH] rewrite the `min_deps_check` script (#9754) * rewrite the `min_deps_check` script * call the new script * unpin `micromamba` * install `rich-click` * enforce a minimum width of 120 * remove the background colors * remove old min-deps script * more changing of colors * some more styling * ... aaand some more styling * move the style definition in one place * compare versions *before* formatting * move the definition `console` into `main` * properly add two columns to the warnings tables * define the styles using the class and RGB values --- .github/workflows/ci-additional.yaml | 15 +- ci/min_deps_check.py | 218 ------------------ ci/minimum_versions.py | 323 +++++++++++++++++++++++++++ 3 files changed, 332 insertions(+), 224 deletions(-) delete mode 100755 ci/min_deps_check.py create mode 100644 ci/minimum_versions.py diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index b665d20b40a..9ef315f505c 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -303,6 +303,9 @@ jobs: run: shell: bash -l {0} + env: + COLUMNS: 120 + steps: - uses: actions/checkout@v4 with: @@ -311,20 +314,20 @@ jobs: - name: Setup micromamba uses: mamba-org/setup-micromamba@v2 with: - # run with micromamba 1.5.10 together with conda - # conda.api is not API compatible with libmambapy - micromamba-version: "1.5.10-0" environment-name: xarray-tests create-args: >- python=3.12 pyyaml python-dateutil - conda + cytoolz + rich + rich-click + py-rattler - name: All-deps minimum versions policy run: | - python ci/min_deps_check.py ci/requirements/min-all-deps.yml + python ci/minimum_versions.py ci/requirements/min-all-deps.yml - name: Bare minimum versions policy run: | - python ci/min_deps_check.py ci/requirements/bare-minimum.yml + python ci/minimum_versions.py ci/requirements/bare-minimum.yml diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py deleted file mode 100755 index 443ab7d5a40..00000000000 --- a/ci/min_deps_check.py +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/env python -"""Fetch from conda database all available versions of the xarray dependencies and their -publication date. Compare it against requirements/min-all-deps.yml to verify the -policy on obsolete dependencies is being followed. Print a pretty report :) -""" - -from __future__ import annotations - -import itertools -import sys -from collections.abc import Iterator -from datetime import datetime - -import conda.api # type: ignore[import] -import yaml -from dateutil.relativedelta import relativedelta - -CHANNELS = ["conda-forge", "defaults"] -IGNORE_DEPS = { - "coveralls", - "flake8", - "hypothesis", - "isort", - "mypy", - "pip", - "pytest", - "pytest-cov", - "pytest-env", - "pytest-timeout", - "pytest-xdist", - "setuptools", -} - -POLICY_MONTHS = {"python": 30, "numpy": 18} -POLICY_MONTHS_DEFAULT = 12 -POLICY_OVERRIDE: dict[str, tuple[int, int]] = {} -errors = [] - - -def error(msg: str) -> None: - global errors - errors.append(msg) - print("ERROR:", msg) - - -def warning(msg: str) -> None: - print("WARNING:", msg) - - -def parse_requirements(fname) -> Iterator[tuple[str, int, int, int | None]]: - """Load requirements/min-all-deps.yml - - Yield (package name, major version, minor version, [patch version]) - """ - global errors - - with open(fname) as fh: - contents = yaml.safe_load(fh) - for row in contents["dependencies"]: - if isinstance(row, dict) and list(row) == ["pip"]: - continue - pkg, eq, version = row.partition("=") - if pkg.rstrip("<>") in IGNORE_DEPS: - continue - if pkg.endswith(("<", ">")) or eq != "=": - error("package should be pinned with exact version: " + row) - continue - - try: - version_tup = tuple(int(x) for x in version.split(".")) - except ValueError as err: - raise ValueError("non-numerical version: " + row) from err - - if len(version_tup) == 2: - yield (pkg, *version_tup, None) # type: ignore[misc] - elif len(version_tup) == 3: - yield (pkg, *version_tup) # type: ignore[misc] - else: - raise ValueError("expected major.minor or major.minor.patch: " + row) - - -def query_conda(pkg: str) -> dict[tuple[int, int], datetime]: - """Query the conda repository for a specific package - - Return map of {(major version, minor version): publication date} - """ - - def metadata(entry): - version = entry.version - - time = datetime.fromtimestamp(entry.timestamp) - major, minor = map(int, version.split(".")[:2]) - - return (major, minor), time - - raw_data = conda.api.SubdirData.query_all(pkg, channels=CHANNELS) - data = sorted(metadata(entry) for entry in raw_data if entry.timestamp != 0) - - release_dates = { - version: [time for _, time in group if time is not None] - for version, group in itertools.groupby(data, key=lambda x: x[0]) - } - out = {version: min(dates) for version, dates in release_dates.items() if dates} - - # Hardcoded fix to work around incorrect dates in conda - if pkg == "python": - out.update( - { - (2, 7): datetime(2010, 6, 3), - (3, 5): datetime(2015, 9, 13), - (3, 6): datetime(2016, 12, 23), - (3, 7): datetime(2018, 6, 27), - (3, 8): datetime(2019, 10, 14), - (3, 9): datetime(2020, 10, 5), - (3, 10): datetime(2021, 10, 4), - (3, 11): datetime(2022, 10, 24), - } - ) - - return out - - -def process_pkg( - pkg: str, req_major: int, req_minor: int, req_patch: int | None -) -> tuple[str, str, str, str, str, str]: - """Compare package version from requirements file to available versions in conda. - Return row to build pandas dataframe: - - - package name - - major.minor.[patch] version in requirements file - - publication date of version in requirements file (YYYY-MM-DD) - - major.minor version suggested by policy - - publication date of version suggested by policy (YYYY-MM-DD) - - status ("<", "=", "> (!)") - """ - print(f"Analyzing {pkg}...") - versions = query_conda(pkg) - - try: - req_published = versions[req_major, req_minor] - except KeyError: - error("not found in conda: " + pkg) - return pkg, fmt_version(req_major, req_minor, req_patch), "-", "-", "-", "(!)" - - policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT) - policy_published = datetime.now() - relativedelta(months=policy_months) - - filtered_versions = [ - version - for version, published in versions.items() - if published < policy_published - ] - policy_major, policy_minor = max(filtered_versions, default=(req_major, req_minor)) - - try: - policy_major, policy_minor = POLICY_OVERRIDE[pkg] - except KeyError: - pass - policy_published_actual = versions[policy_major, policy_minor] - - if (req_major, req_minor) < (policy_major, policy_minor): - status = "<" - elif (req_major, req_minor) > (policy_major, policy_minor): - status = "> (!)" - delta = relativedelta(datetime.now(), req_published).normalized() - n_months = delta.years * 12 + delta.months - warning( - f"Package is too new: {pkg}={req_major}.{req_minor} was " - f"published on {req_published:%Y-%m-%d} " - f"which was {n_months} months ago (policy is {policy_months} months)" - ) - else: - status = "=" - - if req_patch is not None: - warning("patch version should not appear in requirements file: " + pkg) - status += " (w)" - - return ( - pkg, - fmt_version(req_major, req_minor, req_patch), - req_published.strftime("%Y-%m-%d"), - fmt_version(policy_major, policy_minor), - policy_published_actual.strftime("%Y-%m-%d"), - status, - ) - - -def fmt_version(major: int, minor: int, patch: int | None = None) -> str: - if patch is None: - return f"{major}.{minor}" - else: - return f"{major}.{minor}.{patch}" - - -def main() -> None: - fname = sys.argv[1] - rows = [ - process_pkg(pkg, major, minor, patch) - for pkg, major, minor, patch in parse_requirements(fname) - ] - - print("\nPackage Required Policy Status") - print("----------------- -------------------- -------------------- ------") - fmt = "{:17} {:7} ({:10}) {:7} ({:10}) {}" - for row in rows: - print(fmt.format(*row)) - - if errors: - print("\nErrors:") - print("-------") - for i, e in enumerate(errors): - print(f"{i+1}. {e}") - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/ci/minimum_versions.py b/ci/minimum_versions.py new file mode 100644 index 00000000000..c226e304769 --- /dev/null +++ b/ci/minimum_versions.py @@ -0,0 +1,323 @@ +import asyncio +import bisect +import datetime +import pathlib +import sys +from dataclasses import dataclass, field + +import rich_click as click +import yaml +from dateutil.relativedelta import relativedelta +from rattler import Gateway, Version +from rich.console import Console +from rich.panel import Panel +from rich.style import Style +from rich.table import Column, Table +from tlz.functoolz import curry, pipe +from tlz.itertoolz import concat, groupby + +click.rich_click.SHOW_ARGUMENTS = True + +channels = ["conda-forge"] +platforms = ["noarch", "linux-64"] +ignored_packages = [ + "coveralls", + "pip", + "pytest", + "pytest-cov", + "pytest-env", + "pytest-xdist", + "pytest-timeout", + "hypothesis", +] + + +@dataclass +class Policy: + package_months: dict + default_months: int + overrides: dict[str, Version] = field(default_factory=dict) + + def minimum_version(self, package_name, releases): + if (override := self.overrides.get(package_name)) is not None: + return override + + policy_months = self.package_months.get(package_name, self.default_months) + today = datetime.date.today() + + cutoff_date = today - relativedelta(months=policy_months) + + index = bisect.bisect_left( + releases, cutoff_date, key=lambda x: x.timestamp.date() + ) + return releases[index - 1 if index > 0 else 0] + + +@dataclass +class Spec: + name: str + version: Version | None + + @classmethod + def parse(cls, spec_text): + warnings = [] + if ">" in spec_text or "<" in spec_text: + warnings.append( + f"package should be pinned with an exact version: {spec_text!r}" + ) + + spec_text = spec_text.replace(">", "").replace("<", "") + + if "=" in spec_text: + name, version_text = spec_text.split("=", maxsplit=1) + version = Version(version_text) + segments = version.segments() + + if len(segments) != 2 or (len(segments) == 3 and segments[2] != 0): + warnings.append( + f"package should be pinned to a minor version (got {version})" + ) + else: + name = spec_text + version = None + + return cls(name, version), (name, warnings) + + +@dataclass(order=True) +class Release: + version: Version + build_number: int + timestamp: datetime.datetime = field(compare=False) + + @classmethod + def from_repodata_record(cls, repo_data): + return cls( + version=repo_data.version, + build_number=repo_data.build_number, + timestamp=repo_data.timestamp, + ) + + +def parse_environment(text): + env = yaml.safe_load(text) + + specs = [] + warnings = [] + for dep in env["dependencies"]: + spec, warnings_ = Spec.parse(dep) + + warnings.append(warnings_) + specs.append(spec) + + return specs, warnings + + +def is_preview(version): + candidates = ["rc", "beta", "alpha"] + + *_, last_segment = version.segments() + return any(candidate in last_segment for candidate in candidates) + + +def group_packages(records): + groups = groupby(lambda r: r.name.normalized, records) + return { + name: sorted(map(Release.from_repodata_record, group)) + for name, group in groups.items() + } + + +def filter_releases(predicate, releases): + return { + name: [r for r in records if predicate(r)] for name, records in releases.items() + } + + +def deduplicate_releases(package_info): + def deduplicate(releases): + return min(releases, key=lambda p: p.timestamp) + + return { + name: list(map(deduplicate, groupby(lambda p: p.version, group).values())) + for name, group in package_info.items() + } + + +def find_policy_versions(policy, releases): + return { + name: policy.minimum_version(name, package_releases) + for name, package_releases in releases.items() + } + + +def is_suitable_release(release): + if release.timestamp is None: + return False + + segments = release.version.extend_to_length(3).segments() + + return segments[2] == [0] + + +def lookup_spec_release(spec, releases): + version = spec.version.extend_to_length(3) + + return releases[spec.name][version] + + +def compare_versions(environments, policy_versions): + status = {} + for env, specs in environments.items(): + env_status = any( + spec.version > policy_versions[spec.name].version for spec in specs + ) + status[env] = env_status + return status + + +def version_comparison_symbol(required, policy): + if required < policy: + return "<" + elif required > policy: + return ">" + else: + return "=" + + +def format_bump_table(specs, policy_versions, releases, warnings): + table = Table( + Column("Package", width=20), + Column("Required", width=8), + "Required (date)", + Column("Policy", width=8), + "Policy (date)", + "Status", + ) + + heading_style = Style(color="#ff0000", bold=True) + warning_style = Style(color="#ffff00", bold=True) + styles = { + ">": Style(color="#ff0000", bold=True), + "=": Style(color="#008700", bold=True), + "<": Style(color="#d78700", bold=True), + } + + for spec in specs: + policy_release = policy_versions[spec.name] + policy_version = policy_release.version.with_segments(0, 2) + policy_date = policy_release.timestamp + + required_version = spec.version + required_date = lookup_spec_release(spec, releases).timestamp + + status = version_comparison_symbol(required_version, policy_version) + style = styles[status] + + table.add_row( + spec.name, + str(required_version), + f"{required_date:%Y-%m-%d}", + str(policy_version), + f"{policy_date:%Y-%m-%d}", + status, + style=style, + ) + + grid = Table.grid(expand=True, padding=(0, 2)) + grid.add_column(style=heading_style, vertical="middle") + grid.add_column() + grid.add_row("Version summary", table) + + if any(warnings.values()): + warning_table = Table(width=table.width, expand=True) + warning_table.add_column("Package") + warning_table.add_column("Warning") + + for package, messages in warnings.items(): + if not messages: + continue + warning_table.add_row(package, messages[0], style=warning_style) + for message in messages[1:]: + warning_table.add_row("", message, style=warning_style) + + grid.add_row("Warnings", warning_table) + + return grid + + +@click.command() +@click.argument( + "environment_paths", + type=click.Path(exists=True, readable=True, path_type=pathlib.Path), + nargs=-1, +) +def main(environment_paths): + console = Console() + + parsed_environments = { + path.stem: parse_environment(path.read_text()) for path in environment_paths + } + + warnings = { + env: dict(warnings_) for env, (_, warnings_) in parsed_environments.items() + } + environments = { + env: [spec for spec in specs if spec.name not in ignored_packages] + for env, (specs, _) in parsed_environments.items() + } + + all_packages = list( + dict.fromkeys(spec.name for spec in concat(environments.values())) + ) + + policy_months = { + "python": 30, + "numpy": 18, + } + policy_months_default = 12 + overrides = {} + + policy = Policy( + policy_months, default_months=policy_months_default, overrides=overrides + ) + + gateway = Gateway() + query = gateway.query(channels, platforms, all_packages, recursive=False) + records = asyncio.run(query) + + package_releases = pipe( + records, + concat, + group_packages, + curry(filter_releases, lambda r: r.timestamp is not None), + deduplicate_releases, + ) + policy_versions = pipe( + package_releases, + curry(filter_releases, is_suitable_release), + curry(find_policy_versions, policy), + ) + status = compare_versions(environments, policy_versions) + + release_lookup = { + n: {r.version: r for r in releases} for n, releases in package_releases.items() + } + grids = { + env: format_bump_table(specs, policy_versions, release_lookup, warnings[env]) + for env, specs in environments.items() + } + root_grid = Table.grid() + root_grid.add_column() + + for env, grid in grids.items(): + root_grid.add_row(Panel(grid, title=env, expand=True)) + + console.print(root_grid) + + status_code = 1 if any(status.values()) else 0 + sys.exit(status_code) + + +if __name__ == "__main__": + main()