Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LKG generation to CI #803

Merged
merged 2 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 108 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -203,13 +203,16 @@ jobs:
if: ${{ needs.eval.outputs.testCode == 'True' }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-12]
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
kind: [serial, other, dml, main, treatment, ray]
exclude:
# Serial tests fail randomly on mac sometimes, so we don't run them there
- os: macos-12
- os: macos-latest
kind: serial
# Mac ARM doesn't support tensorflow versions compatible with python 3.8
- os: macos-latest
python-version: '3.8'
# Ray tests run out of memory on Windows
- os: windows-latest
kind: ray
Expand Down Expand Up @@ -249,6 +252,10 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install libomp
# lightgbm needs libomp on mac
run: brew install libomp
if: matrix.os == 'macos-latest'
- name: Install uv
# check if we're running on windows
run: ${{ runner.os == 'Windows' && 'irm https://astral.sh/uv/install.ps1 | iex' || 'curl -LsSf https://astral.sh/uv/install.sh | sh' }}
Expand Down Expand Up @@ -293,6 +300,56 @@ jobs:
name: tests-${{ env.id_string }}
path: ${{ env.id_string }}-test-results.xml

store-reqs-per-env:
name: Store requirements for LKG updates
if: (success() || failure()) && (github.event_name == 'workflow_dispatch' && !inputs.use_lkg || github.event_name == 'schedule')
strategy:
matrix:
kind: [tests]
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
extras: ["[tf,plt,dowhy,ray]"]
include:
# explicitly add the two notebook extras
- kind: notebooks
os: ubuntu-latest
python-version: '3.8'
extras: "[tf,plt,ray]"
- kind: notebooks
os: ubuntu-latest
python-version: '3.9'
extras: "[plt,dowhy]"
exclude:
# Mac ARM doesn't support tensorflow versions compatible with python 3.8
- os: macos-latest
python-version: '3.8'
runs-on: ${{ matrix.os }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ env.ref }}
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
# check if we're running on windows
run: ${{ runner.os == 'Windows' && 'irm https://astral.sh/uv/install.ps1 | iex' || 'curl -LsSf https://astral.sh/uv/install.sh | sh' }}
- name: Install econml
# add all extras used by any of the individual tests, which is a superset of what's actually used anywhere
run: uv pip install --system -e .${{ matrix.extras }}
- name: Install notebook requirements
run: uv pip install --system jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm
if: matrix.kind == 'notebooks'
- name: Save installed packages
run: pip freeze --exclude-editable > lkg-${{ matrix.kind }}-${{ matrix.os }}-${{ matrix.python-version }}.txt
- name: Upload installed packages
uses: actions/upload-artifact@v4
with:
name: lkg-${{ matrix.kind }}-${{ matrix.os }}-${{ matrix.python-version }}
path: lkg-${{ matrix.kind }}-${{ matrix.os }}-${{ matrix.python-version }}.txt

coverage-report:
name: "Coverage report"
needs: [tests, notebooks]
Expand Down Expand Up @@ -329,11 +386,11 @@ jobs:

merge-artifacts:
name: "Merge artifacts"
needs: [coverage-report, tests, notebooks]
needs: [coverage-report, tests, notebooks, store-reqs-per-env]
if: success() || failure()
strategy:
matrix:
artifact: [requirements, tests, coverage, notebooks]
artifact: [requirements, tests, coverage, notebooks, lkg-tests, lkg-notebooks]
runs-on: ubuntu-latest
steps:
- name: "Merge artifacts"
Expand All @@ -346,7 +403,54 @@ jobs:
# Unlike with plain upload-artifact, there's no way to ignore the situation where no files are found when using the v4 merge action
# (see https://github.com/actions/upload-artifact/issues/520), so just continue on error isntead
continue-on-error: true

generate-lkg:
name: "Generate updated last known good files"
needs: [merge-artifacts]
if: (success() || failure()) && (github.event_name == 'workflow_dispatch' && !inputs.use_lkg || github.event_name == 'schedule')
strategy:
matrix:
kind: [tests, notebooks]
include:
- kind: tests
output-name: lkg
- kind: notebooks
output-name: lkg-notebook
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ env.ref }}
- name: Setup Python
uses: actions/setup-python@v5
- name: Download files
uses: actions/download-artifact@v4
with:
name: lkg-${{ matrix.kind }}
path: requirements
- name: "Generate last known good versions"
run: python .github/workflows/generate_lkg.py requirements "lkg-${{ matrix.kind }}-(?P<os>[a-z]+)-(latest|[0-9.]+)-(?P<pyversion>[0-9.]+).txt" ${{ matrix.output-name }}.txt
- name: "Upload last known good versions"
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.output-name }}
path: ${{ matrix.output-name }}.txt

merge-lkg:
name: "Merge last known good versions"
needs: [generate-lkg]
if: success() || failure()
runs-on: ubuntu-latest
steps:
- name: "Merge last known good versions"
uses: actions/upload-artifact/merge@v4
with:
# can't use just "lkg" for the name since that artifact already exists and merge won't overwrite it even when delete-merged is true
name: lkg-files
pattern: "lkg*"
delete-merged: true

build:
name: Build package
needs: [eval]
Expand Down
217 changes: 217 additions & 0 deletions .github/workflows/generate_lkg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
# Copyright (c) PyWhy contributors. All rights reserved.
# Licensed under the MIT License.

import argparse
import re
from collections import defaultdict, namedtuple
from os import listdir, path

import packaging.version
from packaging.version import Version

# We have a list of requirements files, one per python version and OS.
# We want to generate a single requirements file that specifies the requirements
# for each package contained in any of those files, along with the constraints on python version
# and OS that apply to each package.

Combo = namedtuple('Combo', ['os', 'py_version'])

# For each version of a package (say numpy==0.24.1), we'll have a set of os/py_version combos
# where it was installed; the correct constraint will be the union of all these pairs.
# However, for readability we'd like to simplify that when possible to something more readable.
# For example, if numpy==0.24.1 is installed on all versions of python and all OSes, we can just say
# "numpy==0.24.1"; if it's installed on all versions of python on ubuntu, we can say
# "numpy==0.24.1; platform_system=='Linux'".


# We'll precompute a dictionary of simple constraints, mapping from the sets of combos to a string representation
# of the constraint.
# For simplicity, we won't consider all possible constraints, just some easy to generate ones.
# In the most general case we'll OR together constraints grouped by os
def simple_constraint_map(all_combos: frozenset[Combo]) -> tuple[dict[frozenset[Combo], str],
dict[tuple[str, frozenset[Version]], str]]:
"""
Represent simple constraints via dictionaries.

Parameters
----------
all_combos : frozenset[Combo]
All of the possible os/py_version pairs

Returns
-------
(d1, d2): tuple[dict[frozenset[Combo], str], dict[tuple[str, frozenset[Version]], str]]
A tuple of two dictionaries.
The first dictionary maps from a constrained set of os/py_version pairs to the string representation of the
constraint.
The second dictionary maps from a tuple of (os, set of py_versions) to the string representation of the
constraint.
"""
all_os = frozenset({combo.os for combo in all_combos})
all_py_versions = frozenset({combo.py_version for combo in all_combos})

# constraint_map will map from sets of os/py_version pairs to a string representation of the constraint
# that would restrict all possible combos to just that set;
# we'll look up the sets of os/py_version pairs that a package is installed on and use this map to generate
# the correct constraint for that package.
constraint_map = {}

# first generate simple os constraints, like "platform_system=='Linux'" or "platform_system!='Linux'"
for os in all_os:
# Get the set of all os/py_version pairs where the os is the given os and the py_version is anything
filtered_combos = frozenset({combo for combo in all_combos if combo.os == os})
constraint_map[filtered_combos] = f"; platform_system=='{os}'"
constraint_map[all_combos - filtered_combos] = f"; platform_system!='{os}'"

# now generate simple python version constraints,
# like "python_version=='3.8'"", "python_version!='3.8'"; "python_version<'3.8'", "python_version>'3.8'"
for i, py_version in enumerate(sorted(all_py_versions)):
# Get the set of all os/py_version pairs where the py_version is the given py_version and the os is anything
filtered_combos = frozenset({combo for combo in all_combos if combo.py_version == py_version})
constraint_map[filtered_combos] = f"; python_version=='{py_version}'"
constraint_map[all_combos - filtered_combos] = f"; python_version!='{py_version}'"

if i > 0:
less_than = frozenset({combo for combo in all_combos if combo.py_version < py_version})
constraint_map[less_than] = f"; python_version<'{py_version}'"
if i < len(all_py_versions)-2:
greater_than = frozenset({combo for combo in all_combos if combo.py_version > py_version})
constraint_map[greater_than] = f"; python_version>'{py_version}'"

# if every combination is present, we don't need to add any constraint
constraint_map[all_combos] = ""

# generate simple per-os python version constraints
# we include the os in the key because we might not have every combination for every os
# (e.g. maybe macos doesn't support python 3.8, in which case there won't be a combo for that, but there might
# be a combo for ubuntu with python 3.8; then if we see all versions of python 3.9 and up on macos, we don't need
# any python version constraint, whereas if we see all versions of python 3.9 and up on ubuntu,
# we still do need a constraint since 3.8 is missing")
os_map = {}
for os in all_os:
for i, py_version in enumerate(all_py_versions):
filtered_combos = frozenset({combo for combo in all_combos
if combo.os == os and combo.py_version == py_version})
os_map[(os, frozenset({py_version}))] = f"python_version=='{py_version}'"
if i > 0 and i < len(all_py_versions)-1:
os_map[(os, all_py_versions - frozenset({py_version}))] = f"python_version!='{py_version}'"

if i > 0:
os_map[(os, frozenset({py for py in all_py_versions
if py < py_version}))] = f"python_version<'{py_version}'"
if i < len(all_py_versions)-1:
os_map[(os, frozenset({py for py in all_py_versions
if py > py_version}))] = f"python_version>'{py_version}'"

# if every combination is present, we don't need to add any constraint for that os
os_map[(os, all_py_versions)] = ""

return constraint_map, os_map


# Convert between GitHub Actions' platform names and Python's platform.system() names
platform_map = {'macos': 'Darwin', 'ubuntu': 'Linux', 'windows': 'Windows'}


def make_req_file(requirements_directory, regex):
"""
Make a unified requirements file from a directory of requirements files.

Parameters
----------
requirements_directory : str
Directory containing requirements files

regex : str
Regex to match requirements file names, must have named groups "os" and "pyversion"
"""
req_regex = r'^(?P<pkg>.*?)==(?P<version>.*)$' # parses requirements from pip freeze results
files = listdir(requirements_directory)

all_combos = set()

# We'll store the requirements for each version of each package in a dictionary
# (e.g. "numpy" -> {0.24.1 -> {Combo1, Combo2, ...}, 0.24.2 -> {Combo3, Combo4, ...}, ...})
# each entry of the inner dictionary will become a line in the requirements file
# (e.g. "numpy==0.24.1; platform_system=='Linux' and python_version=='3.8' or ...")
req_dict = defaultdict(lambda: defaultdict(set)) # package -> package_version -> set of Combos

for file in files:
match = re.match(regex, file)
if not match:
print(f"Skipping {file} because it doesn't match the regex")
continue
os = platform_map[match.group('os')]
py_version = packaging.version.parse(match.group('pyversion'))
combo = Combo(os, py_version)
all_combos.add(combo)

# read each line of the file
with open(path.join(requirements_directory, file)) as lines:
for line in lines:
match = re.search(req_regex, line)
pkg_version = packaging.version.parse(match.group('version'))
req_dict[match.group('pkg')][pkg_version].add(combo)

constraint_map, os_map = simple_constraint_map(frozenset(all_combos))
# list of all requirements, sorted by package name and version
reqs = []
for pkg, versions in sorted(req_dict.items()):
for version, combos in sorted(versions.items()):
combos = frozenset(combos)
req = f"{pkg}=={version}"

if combos in constraint_map:
suffix = constraint_map[combos]

else:
# we don't have a simple constraint for this package, so we need to generate a more complex one
# which will generally be of the form:
# "(platform_system=='os1' and (python_version=='py1' or python_version=='py2') or ...) or
# (platform_system=='os2' and (python_version=='py3' or ...) ..."
#
# that is, we will OR together constraints grouped by os
# for some oses, we might find a nice representation for their python version constraints in the os_map
# (e.g. "python_version=='3.8'", or "python_version<'3.8'"), in which case we'll use that;
# for others, we'll have to OR together all of the relevant individual versions
os_constraints = []

os_versions = defaultdict(set) # dictionary from os to set of python versions
for combo in combos:
os_versions[combo.os].add(combo.py_version)

# for each os, generate the corresponding constraint
for os in sorted(os_versions.keys()):
versions = os_versions[os]
os_key = (os, frozenset(os_versions[os]))
if os_key in os_map:
constraint = os_map[os_key]
if constraint == "":
os_constraints.append(f"platform_system=='{os}'")
else:
os_constraints.append(f"platform_system=='{os}' and {constraint}")
else:
version_constraint = " or ".join([f"python_version=='{py_version}'"
for py_version in sorted(os_versions[os])])
os_constraints.append(f"platform_system=='{os}' and ({version_constraint})")
if len(os_constraints) == 1: # just one os with correspondig python versions, can use it directly
suffix = f"; {os_constraints[0]}"
else: # need to OR them together
suffix = f"; ({') or ('.join(os_constraints)})"

reqs.append(f"{req}{suffix}")

return '\n'.join(reqs)


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Generate requirements files for CI')
parser.add_argument('requirements_directory', type=str, help='Directory containing requirements files')
parser.add_argument('regex', type=str,
help='Regex to match requirements file names, must have named groups "os" and "pyversion"')
parser.add_argument('output_name', type=str, help='File to write requirements to')
args = parser.parse_args()

reqs = make_req_file(args.requirements_directory, args.regex)
with open(args.output_name, 'w') as f:
f.write(reqs)
10 changes: 5 additions & 5 deletions econml/tests/test_grf_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,10 +494,10 @@ def test_projection(self,):
forest = RegressionForest(**config).fit(X, y)
projector = np.ones((X.shape[0], 2)) / 2.0
mean_proj, var_proj = forest.predict_projection_and_var(X, projector)
np.testing.assert_array_equal(mean_proj, mean)
np.testing.assert_array_equal(var_proj, var)
np.testing.assert_array_equal(var_proj, forest.predict_projection_var(X, projector))
np.testing.assert_array_equal(mean_proj, forest.predict_projection(X, projector))
np.testing.assert_allclose(mean_proj, mean)
np.testing.assert_allclose(var_proj, var)
np.testing.assert_allclose(var_proj, forest.predict_projection_var(X, projector))
np.testing.assert_allclose(mean_proj, forest.predict_projection(X, projector))
return

def test_feature_importances(self,):
Expand Down Expand Up @@ -547,7 +547,7 @@ def test_feature_importances(self,):
feature_importances /= tw[0]
totest = tree.tree_.compute_feature_importances(normalize=False,
max_depth=max_depth, depth_decay=2.0)
np.testing.assert_array_equal(feature_importances, totest)
np.testing.assert_allclose(feature_importances, totest)

het_importances = np.zeros(n_features)
for it, (feat, depth, left, right, w) in\
Expand Down
Loading