diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a46176299..f924f6457 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -203,13 +203,16 @@ jobs: if: ${{ needs.eval.outputs.testCode == 'True' }} strategy: matrix: - os: [ubuntu-latest, windows-latest, macos-12] + os: [ubuntu-latest, windows-latest, macos-latest] python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] kind: [serial, other, dml, main, treatment, ray] exclude: # Serial tests fail randomly on mac sometimes, so we don't run them there - - os: macos-12 + - os: macos-latest kind: serial + # Mac ARM doesn't support tensorflow versions compatible with python 3.8 + - os: macos-latest + python-version: '3.8' # Ray tests run out of memory on Windows - os: windows-latest kind: ray @@ -249,6 +252,10 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - name: Install libomp + # lightgbm needs libomp on mac + run: brew install libomp + if: matrix.os == 'macos-latest' - name: Install uv # check if we're running on windows run: ${{ runner.os == 'Windows' && 'irm https://astral.sh/uv/install.ps1 | iex' || 'curl -LsSf https://astral.sh/uv/install.sh | sh' }} @@ -293,6 +300,56 @@ jobs: name: tests-${{ env.id_string }} path: ${{ env.id_string }}-test-results.xml + store-reqs-per-env: + name: Store requirements for LKG updates + if: (success() || failure()) && (github.event_name == 'workflow_dispatch' && !inputs.use_lkg || github.event_name == 'schedule') + strategy: + matrix: + kind: [tests] + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + extras: ["[tf,plt,dowhy,ray]"] + include: + # explicitly add the two notebook extras + - kind: notebooks + os: ubuntu-latest + python-version: '3.8' + extras: "[tf,plt,ray]" + - kind: notebooks + os: ubuntu-latest + python-version: '3.9' + extras: "[plt,dowhy]" + exclude: + # Mac ARM doesn't support tensorflow versions compatible with python 3.8 + - os: macos-latest + python-version: '3.8' + runs-on: ${{ matrix.os }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ env.ref }} + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install uv + # check if we're running on windows + run: ${{ runner.os == 'Windows' && 'irm https://astral.sh/uv/install.ps1 | iex' || 'curl -LsSf https://astral.sh/uv/install.sh | sh' }} + - name: Install econml + # add all extras used by any of the individual tests, which is a superset of what's actually used anywhere + run: uv pip install --system -e .${{ matrix.extras }} + - name: Install notebook requirements + run: uv pip install --system jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm + if: matrix.kind == 'notebooks' + - name: Save installed packages + run: pip freeze --exclude-editable > lkg-${{ matrix.kind }}-${{ matrix.os }}-${{ matrix.python-version }}.txt + - name: Upload installed packages + uses: actions/upload-artifact@v4 + with: + name: lkg-${{ matrix.kind }}-${{ matrix.os }}-${{ matrix.python-version }} + path: lkg-${{ matrix.kind }}-${{ matrix.os }}-${{ matrix.python-version }}.txt + coverage-report: name: "Coverage report" needs: [tests, notebooks] @@ -329,11 +386,11 @@ jobs: merge-artifacts: name: "Merge artifacts" - needs: [coverage-report, tests, notebooks] + needs: [coverage-report, tests, notebooks, store-reqs-per-env] if: success() || failure() strategy: matrix: - artifact: [requirements, tests, coverage, notebooks] + artifact: [requirements, tests, coverage, notebooks, lkg-tests, lkg-notebooks] runs-on: ubuntu-latest steps: - name: "Merge artifacts" @@ -346,7 +403,54 @@ jobs: # Unlike with plain upload-artifact, there's no way to ignore the situation where no files are found when using the v4 merge action # (see https://github.com/actions/upload-artifact/issues/520), so just continue on error isntead continue-on-error: true + + generate-lkg: + name: "Generate updated last known good files" + needs: [merge-artifacts] + if: (success() || failure()) && (github.event_name == 'workflow_dispatch' && !inputs.use_lkg || github.event_name == 'schedule') + strategy: + matrix: + kind: [tests, notebooks] + include: + - kind: tests + output-name: lkg + - kind: notebooks + output-name: lkg-notebook + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ env.ref }} + - name: Setup Python + uses: actions/setup-python@v5 + - name: Download files + uses: actions/download-artifact@v4 + with: + name: lkg-${{ matrix.kind }} + path: requirements + - name: "Generate last known good versions" + run: python .github/workflows/generate_lkg.py requirements "lkg-${{ matrix.kind }}-(?P[a-z]+)-(latest|[0-9.]+)-(?P[0-9.]+).txt" ${{ matrix.output-name }}.txt + - name: "Upload last known good versions" + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.output-name }} + path: ${{ matrix.output-name }}.txt + merge-lkg: + name: "Merge last known good versions" + needs: [generate-lkg] + if: success() || failure() + runs-on: ubuntu-latest + steps: + - name: "Merge last known good versions" + uses: actions/upload-artifact/merge@v4 + with: + # can't use just "lkg" for the name since that artifact already exists and merge won't overwrite it even when delete-merged is true + name: lkg-files + pattern: "lkg*" + delete-merged: true + build: name: Build package needs: [eval] diff --git a/.github/workflows/generate_lkg.py b/.github/workflows/generate_lkg.py new file mode 100644 index 000000000..bc6a98a8d --- /dev/null +++ b/.github/workflows/generate_lkg.py @@ -0,0 +1,217 @@ +# Copyright (c) PyWhy contributors. All rights reserved. +# Licensed under the MIT License. + +import argparse +import re +from collections import defaultdict, namedtuple +from os import listdir, path + +import packaging.version +from packaging.version import Version + +# We have a list of requirements files, one per python version and OS. +# We want to generate a single requirements file that specifies the requirements +# for each package contained in any of those files, along with the constraints on python version +# and OS that apply to each package. + +Combo = namedtuple('Combo', ['os', 'py_version']) + +# For each version of a package (say numpy==0.24.1), we'll have a set of os/py_version combos +# where it was installed; the correct constraint will be the union of all these pairs. +# However, for readability we'd like to simplify that when possible to something more readable. +# For example, if numpy==0.24.1 is installed on all versions of python and all OSes, we can just say +# "numpy==0.24.1"; if it's installed on all versions of python on ubuntu, we can say +# "numpy==0.24.1; platform_system=='Linux'". + + +# We'll precompute a dictionary of simple constraints, mapping from the sets of combos to a string representation +# of the constraint. +# For simplicity, we won't consider all possible constraints, just some easy to generate ones. +# In the most general case we'll OR together constraints grouped by os +def simple_constraint_map(all_combos: frozenset[Combo]) -> tuple[dict[frozenset[Combo], str], + dict[tuple[str, frozenset[Version]], str]]: + """ + Represent simple constraints via dictionaries. + + Parameters + ---------- + all_combos : frozenset[Combo] + All of the possible os/py_version pairs + + Returns + ------- + (d1, d2): tuple[dict[frozenset[Combo], str], dict[tuple[str, frozenset[Version]], str]] + A tuple of two dictionaries. + The first dictionary maps from a constrained set of os/py_version pairs to the string representation of the + constraint. + The second dictionary maps from a tuple of (os, set of py_versions) to the string representation of the + constraint. + """ + all_os = frozenset({combo.os for combo in all_combos}) + all_py_versions = frozenset({combo.py_version for combo in all_combos}) + + # constraint_map will map from sets of os/py_version pairs to a string representation of the constraint + # that would restrict all possible combos to just that set; + # we'll look up the sets of os/py_version pairs that a package is installed on and use this map to generate + # the correct constraint for that package. + constraint_map = {} + + # first generate simple os constraints, like "platform_system=='Linux'" or "platform_system!='Linux'" + for os in all_os: + # Get the set of all os/py_version pairs where the os is the given os and the py_version is anything + filtered_combos = frozenset({combo for combo in all_combos if combo.os == os}) + constraint_map[filtered_combos] = f"; platform_system=='{os}'" + constraint_map[all_combos - filtered_combos] = f"; platform_system!='{os}'" + + # now generate simple python version constraints, + # like "python_version=='3.8'"", "python_version!='3.8'"; "python_version<'3.8'", "python_version>'3.8'" + for i, py_version in enumerate(sorted(all_py_versions)): + # Get the set of all os/py_version pairs where the py_version is the given py_version and the os is anything + filtered_combos = frozenset({combo for combo in all_combos if combo.py_version == py_version}) + constraint_map[filtered_combos] = f"; python_version=='{py_version}'" + constraint_map[all_combos - filtered_combos] = f"; python_version!='{py_version}'" + + if i > 0: + less_than = frozenset({combo for combo in all_combos if combo.py_version < py_version}) + constraint_map[less_than] = f"; python_version<'{py_version}'" + if i < len(all_py_versions)-2: + greater_than = frozenset({combo for combo in all_combos if combo.py_version > py_version}) + constraint_map[greater_than] = f"; python_version>'{py_version}'" + + # if every combination is present, we don't need to add any constraint + constraint_map[all_combos] = "" + + # generate simple per-os python version constraints + # we include the os in the key because we might not have every combination for every os + # (e.g. maybe macos doesn't support python 3.8, in which case there won't be a combo for that, but there might + # be a combo for ubuntu with python 3.8; then if we see all versions of python 3.9 and up on macos, we don't need + # any python version constraint, whereas if we see all versions of python 3.9 and up on ubuntu, + # we still do need a constraint since 3.8 is missing") + os_map = {} + for os in all_os: + for i, py_version in enumerate(all_py_versions): + filtered_combos = frozenset({combo for combo in all_combos + if combo.os == os and combo.py_version == py_version}) + os_map[(os, frozenset({py_version}))] = f"python_version=='{py_version}'" + if i > 0 and i < len(all_py_versions)-1: + os_map[(os, all_py_versions - frozenset({py_version}))] = f"python_version!='{py_version}'" + + if i > 0: + os_map[(os, frozenset({py for py in all_py_versions + if py < py_version}))] = f"python_version<'{py_version}'" + if i < len(all_py_versions)-1: + os_map[(os, frozenset({py for py in all_py_versions + if py > py_version}))] = f"python_version>'{py_version}'" + + # if every combination is present, we don't need to add any constraint for that os + os_map[(os, all_py_versions)] = "" + + return constraint_map, os_map + + +# Convert between GitHub Actions' platform names and Python's platform.system() names +platform_map = {'macos': 'Darwin', 'ubuntu': 'Linux', 'windows': 'Windows'} + + +def make_req_file(requirements_directory, regex): + """ + Make a unified requirements file from a directory of requirements files. + + Parameters + ---------- + requirements_directory : str + Directory containing requirements files + + regex : str + Regex to match requirements file names, must have named groups "os" and "pyversion" + """ + req_regex = r'^(?P.*?)==(?P.*)$' # parses requirements from pip freeze results + files = listdir(requirements_directory) + + all_combos = set() + + # We'll store the requirements for each version of each package in a dictionary + # (e.g. "numpy" -> {0.24.1 -> {Combo1, Combo2, ...}, 0.24.2 -> {Combo3, Combo4, ...}, ...}) + # each entry of the inner dictionary will become a line in the requirements file + # (e.g. "numpy==0.24.1; platform_system=='Linux' and python_version=='3.8' or ...") + req_dict = defaultdict(lambda: defaultdict(set)) # package -> package_version -> set of Combos + + for file in files: + match = re.match(regex, file) + if not match: + print(f"Skipping {file} because it doesn't match the regex") + continue + os = platform_map[match.group('os')] + py_version = packaging.version.parse(match.group('pyversion')) + combo = Combo(os, py_version) + all_combos.add(combo) + + # read each line of the file + with open(path.join(requirements_directory, file)) as lines: + for line in lines: + match = re.search(req_regex, line) + pkg_version = packaging.version.parse(match.group('version')) + req_dict[match.group('pkg')][pkg_version].add(combo) + + constraint_map, os_map = simple_constraint_map(frozenset(all_combos)) + # list of all requirements, sorted by package name and version + reqs = [] + for pkg, versions in sorted(req_dict.items()): + for version, combos in sorted(versions.items()): + combos = frozenset(combos) + req = f"{pkg}=={version}" + + if combos in constraint_map: + suffix = constraint_map[combos] + + else: + # we don't have a simple constraint for this package, so we need to generate a more complex one + # which will generally be of the form: + # "(platform_system=='os1' and (python_version=='py1' or python_version=='py2') or ...) or + # (platform_system=='os2' and (python_version=='py3' or ...) ..." + # + # that is, we will OR together constraints grouped by os + # for some oses, we might find a nice representation for their python version constraints in the os_map + # (e.g. "python_version=='3.8'", or "python_version<'3.8'"), in which case we'll use that; + # for others, we'll have to OR together all of the relevant individual versions + os_constraints = [] + + os_versions = defaultdict(set) # dictionary from os to set of python versions + for combo in combos: + os_versions[combo.os].add(combo.py_version) + + # for each os, generate the corresponding constraint + for os in sorted(os_versions.keys()): + versions = os_versions[os] + os_key = (os, frozenset(os_versions[os])) + if os_key in os_map: + constraint = os_map[os_key] + if constraint == "": + os_constraints.append(f"platform_system=='{os}'") + else: + os_constraints.append(f"platform_system=='{os}' and {constraint}") + else: + version_constraint = " or ".join([f"python_version=='{py_version}'" + for py_version in sorted(os_versions[os])]) + os_constraints.append(f"platform_system=='{os}' and ({version_constraint})") + if len(os_constraints) == 1: # just one os with correspondig python versions, can use it directly + suffix = f"; {os_constraints[0]}" + else: # need to OR them together + suffix = f"; ({') or ('.join(os_constraints)})" + + reqs.append(f"{req}{suffix}") + + return '\n'.join(reqs) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Generate requirements files for CI') + parser.add_argument('requirements_directory', type=str, help='Directory containing requirements files') + parser.add_argument('regex', type=str, + help='Regex to match requirements file names, must have named groups "os" and "pyversion"') + parser.add_argument('output_name', type=str, help='File to write requirements to') + args = parser.parse_args() + + reqs = make_req_file(args.requirements_directory, args.regex) + with open(args.output_name, 'w') as f: + f.write(reqs) diff --git a/econml/tests/test_grf_python.py b/econml/tests/test_grf_python.py index 2f4a5a773..3140dad72 100644 --- a/econml/tests/test_grf_python.py +++ b/econml/tests/test_grf_python.py @@ -494,10 +494,10 @@ def test_projection(self,): forest = RegressionForest(**config).fit(X, y) projector = np.ones((X.shape[0], 2)) / 2.0 mean_proj, var_proj = forest.predict_projection_and_var(X, projector) - np.testing.assert_array_equal(mean_proj, mean) - np.testing.assert_array_equal(var_proj, var) - np.testing.assert_array_equal(var_proj, forest.predict_projection_var(X, projector)) - np.testing.assert_array_equal(mean_proj, forest.predict_projection(X, projector)) + np.testing.assert_allclose(mean_proj, mean) + np.testing.assert_allclose(var_proj, var) + np.testing.assert_allclose(var_proj, forest.predict_projection_var(X, projector)) + np.testing.assert_allclose(mean_proj, forest.predict_projection(X, projector)) return def test_feature_importances(self,): @@ -547,7 +547,7 @@ def test_feature_importances(self,): feature_importances /= tw[0] totest = tree.tree_.compute_feature_importances(normalize=False, max_depth=max_depth, depth_decay=2.0) - np.testing.assert_array_equal(feature_importances, totest) + np.testing.assert_allclose(feature_importances, totest) het_importances = np.zeros(n_features) for it, (feat, depth, left, right, w) in\