diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 82ba25bc6..b2b1e115e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,6 +12,11 @@ on: default: '' type: string +# Only run once per PR, canceling any previous runs +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + # Precompute the ref if the workflow was triggered by a workflow dispatch rather than copying this logic repeatedly env: ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || null }} @@ -72,24 +77,6 @@ jobs: buildNbs: ${{ steps.eval.outputs.buildNbs }} testCode: ${{ steps.eval.outputs.testCode }} - lint: - name: Lint code - needs: [eval] - if: ${{ needs.eval.outputs.testCode == 'True' }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - name: Checkout repository - with: - ref: ${{ env.ref }} - - uses: actions/setup-python@v4 - name: Setup Python - with: - python-version: 3.9 - - run: python -m pip install --upgrade pip && pip install --upgrade setuptools - name: Ensure latest pip and setuptools - - run: 'pip install pycodestyle && pycodestyle econml' - notebooks: name: Run notebooks needs: [eval] @@ -98,6 +85,8 @@ jobs: strategy: matrix: kind: [except customer scenarios, customer scenarios] + nbconvert: ['nbconvert', '"nbconvert<7.4"'] + ipykernel: ['ipykernel', '"ipykernel<6.23"'] include: - kind: "except customer scenarios" extras: "[tf,plt]" @@ -124,9 +113,10 @@ jobs: - run: sudo apt-get -yq install graphviz name: Install graphviz if: ${{ matrix.install_graphviz }} - - run: pip install -e .${{ matrix.extras }} + # Add verbose flag to pip installation if in debug mode + - run: pip install -e .${{ matrix.extras }} ${{ fromJSON('["","-v"]')[runner.debug] }} name: Install econml - - run: pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm + - run: pip install pytest pytest-runner jupyter jupyter-client ${{ matrix.nbconvert }} ${{ matrix.ipykernel }} nbformat seaborn xgboost tqdm name: Install test and notebook requirements - run: pip list name: List installed packages @@ -135,96 +125,4 @@ jobs: env: PYTEST_ADDOPTS: '-m "notebook"' NOTEBOOK_DIR_PATTERN: ${{ matrix.pattern }} - - tests: - name: "Run tests" - needs: [eval] - if: ${{ needs.eval.outputs.testCode == 'True' }} - strategy: - matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - python-version: [3.6, 3.7, 3.8, 3.9] - kind: [serial, other, dml, main, treatment] - exclude: - # Serial tests fail randomly on mac sometimes, so we don't run them there - - os: macos-latest - kind: serial - # Python 3.6 isn't supported on ubuntu-latest - - os: ubuntu-latest - python-version: 3.6 - - # Assign the correct package and testing options for each kind of test - include: - - kind: serial - opts: '-m "serial" -n 1' - extras: "[tf,plt]" - - kind: other - opts: '-m "cate_api" -n auto' - extras: "[tf,plt]" - - kind: dml - opts: '-m "dml"' - extras: "[tf,plt]" - - kind: main - opts: '-m "not (notebook or automl or dml or serial or cate_api or treatment_featurization)" -n 2' - extras: "[tf,plt,dowhy]" - - kind: treatment - opts: '-m "treatment_featurization" -n auto' - extras: "[tf,plt]" - fail-fast: false - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - name: Checkout repository - with: - ref: ${{ env.ref }} - - uses: actions/setup-python@v4 - name: Setup Python - with: - python-version: ${{ matrix.python-version }} - - run: python -m pip install --upgrade pip && pip install --upgrade setuptools - name: Ensure latest pip and setuptools - - run: pip install -e .${{ matrix.extras }} - name: Install econml - - run: pip install pytest pytest-runner coverage - name: Install pytest - - run: python setup.py pytest - name: Run tests - env: - PYTEST_ADDOPTS: ${{ matrix.opts }} - COVERAGE_PROCESS_START: 'setup.cfg' - # todo: publish test results, coverage info - - build: - name: Build package - needs: [eval] - if: ${{ needs.eval.outputs.testCode == 'True' }} - uses: ./.github/workflows/publish-package.yml - with: - publish: false - repository: testpypi - # don't have access to env context here for some reason - ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || null }} - - docs: - name: Build documentation - needs: [eval] - if: ${{ needs.eval.outputs.buildDocs == 'True' }} - uses: ./.github/workflows/publish-documentation.yml - with: - publish: false - environment: test - # don't have access to env context here for some reason - ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || null }} - - verify: - name: Verify CI checks - needs: [lint, notebooks, tests, build, docs] - if: always() - runs-on: ubuntu-latest - steps: - - run: exit 1 - name: At least one check failed or was cancelled - if: ${{ !(success()) }} - - run: exit 0 - name: All checks passed - if: ${{ success() }} + \ No newline at end of file diff --git a/econml/_ensemble/_ensemble.py b/econml/_ensemble/_ensemble.py index ff833f9ec..cdc23da4c 100644 --- a/econml/_ensemble/_ensemble.py +++ b/econml/_ensemble/_ensemble.py @@ -158,8 +158,8 @@ def _partition_estimators(n_estimators, n_jobs): # Partition estimators between jobs n_estimators_per_job = np.full(n_jobs, n_estimators // n_jobs, - dtype=np.int) - n_estimators_per_job[:n_estimators % n_jobs] += 1 + dtype=int) + n_estimators_per_job[: n_estimators % n_jobs] += 1 starts = np.cumsum(n_estimators_per_job) return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist() diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index fdc9e7693..39a300ade 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -899,7 +899,7 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): nuisances = [np.zeros((n_iters * n_splits,) + nuis.shape) for nuis in nuisance_temp] for it, nuis in enumerate(nuisance_temp): - nuisances[it][i * n_iters + j] = nuis + nuisances[it][j * n_iters + i] = nuis for it in range(len(nuisances)): nuisances[it] = np.mean(nuisances[it], axis=0) diff --git a/econml/cate_interpreter/_interpreters.py b/econml/cate_interpreter/_interpreters.py index 82a47ef0c..67dd78888 100644 --- a/econml/cate_interpreter/_interpreters.py +++ b/econml/cate_interpreter/_interpreters.py @@ -4,7 +4,6 @@ import abc import numbers import numpy as np -from packaging import version import sklearn from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier from sklearn.utils import check_array @@ -151,7 +150,7 @@ def __init__(self, *, self.include_uncertainty = include_model_uncertainty self.uncertainty_level = uncertainty_level self.uncertainty_only_on_leaves = uncertainty_only_on_leaves - self.criterion = "squared_error" if version.parse(sklearn.__version__) >= version.parse("1.0") else "mse" + self.criterion = "squared_error" self.splitter = splitter self.max_depth = max_depth self.min_samples_split = min_samples_split diff --git a/econml/data/dynamic_panel_dgp.py b/econml/data/dynamic_panel_dgp.py index 82b842912..cb7ffa17c 100644 --- a/econml/data/dynamic_panel_dgp.py +++ b/econml/data/dynamic_panel_dgp.py @@ -135,7 +135,7 @@ def simulate_residuals(ind): def simulate_residuals_all(res_df): - res_df_new = res_df.copy(deep=True) + res_df_new = res_df.astype(dtype='float64', copy=True, errors='raise') for i in range(res_df.shape[1]): res_df_new.iloc[:, i] = simulate_residuals(i) # demean the new residual again diff --git a/econml/sklearn_extensions/linear_model.py b/econml/sklearn_extensions/linear_model.py index a7f5d029c..da2218473 100644 --- a/econml/sklearn_extensions/linear_model.py +++ b/econml/sklearn_extensions/linear_model.py @@ -38,51 +38,6 @@ from joblib import Parallel, delayed -# TODO: once we drop support for sklearn < 1.0, we can remove this -def _add_normalize(to_wrap): - """ - Add a fictitious "normalize" argument to linear model initializer signatures. - - This is necessary for their get_params to play nicely with some other sklearn-internal methods. - - Note that directly adding a **params argument to the ordinary initializer will not work, - because get_params explicitly looks only at the initializer signature arguments that are not - varargs or varkeywords, so we need to modify the signature of the initializer to include the - "normalize" argument. - """ - # if we're decorating a class, just update the __init__ method, - # so that the result is still a class instead of a wrapper method - if isinstance(to_wrap, type): - import sklearn - from packaging import version - - if version.parse(sklearn.__version__) >= version.parse("1.0"): - # normalize was deprecated or removed; don't need to do anything - return to_wrap - - else: - from inspect import Parameter, signature - from functools import wraps - - old_init = to_wrap.__init__ - - @wraps(old_init) - def new_init(self, *args, normalize=False, **kwargs): - if normalize is not False: - warnings.warn("normalize is deprecated and will be ignored", stacklevel=2) - return old_init(self, *args, **kwargs) - - sig = signature(old_init) - sig = sig.replace(parameters=[*sig.parameters.values(), - Parameter("normalize", kind=Parameter.KEYWORD_ONLY, default=False)]) - - new_init.__signature__ = sig - to_wrap.__init__ = new_init - return to_wrap - else: - raise ValueError("This decorator was applied to a method, but is intended to be applied only to types.") - - def _weighted_check_cv(cv=5, y=None, classifier=False, random_state=None): cv = 5 if cv is None else cv if isinstance(cv, numbers.Integral): @@ -176,7 +131,6 @@ def _fit_weighted_linear_model(self, X, y, sample_weight, check_input=None): super().fit(**fit_params) -@_add_normalize class WeightedLasso(WeightedModelMixin, Lasso): """Version of sklearn Lasso that accepts weights. @@ -282,7 +236,6 @@ def fit(self, X, y, sample_weight=None, check_input=True): return self -@_add_normalize class WeightedMultiTaskLasso(WeightedModelMixin, MultiTaskLasso): """Version of sklearn MultiTaskLasso that accepts weights. @@ -372,7 +325,6 @@ def fit(self, X, y, sample_weight=None): return self -@_add_normalize class WeightedLassoCV(WeightedModelMixin, LassoCV): """Version of sklearn LassoCV that accepts weights. @@ -491,7 +443,6 @@ def fit(self, X, y, sample_weight=None): return self -@_add_normalize class WeightedMultiTaskLassoCV(WeightedModelMixin, MultiTaskLassoCV): """Version of sklearn MultiTaskLassoCV that accepts weights. @@ -631,7 +582,6 @@ def _get_theta_coefs_and_tau_sq(i, X, sample_weight, alpha_cov, n_alphas_cov, ma return coefs, tausq -@_add_normalize class DebiasedLasso(WeightedLasso): """Debiased Lasso model. @@ -977,7 +927,6 @@ def _get_unscaled_coef_var(self, X, theta_hat, sample_weight): return _unscaled_coef_var -@_add_normalize class MultiOutputDebiasedLasso(MultiOutputRegressor): """Debiased MultiOutputLasso model. diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py index 8105f7ec7..57b5c3ec4 100644 --- a/econml/tests/test_dml.py +++ b/econml/tests/test_dml.py @@ -1095,6 +1095,7 @@ def test_nuisance_scores(self): est.fit(y, T, X=X, W=W) assert len(est.nuisance_scores_t) == len(est.nuisance_scores_y) == mc_iters assert len(est.nuisance_scores_t[0]) == len(est.nuisance_scores_y[0]) == cv + est.score(y, T, X=X, W=W) def test_categories(self): dmls = [LinearDML, SparseLinearDML] diff --git a/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb b/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb index 3b7b7d936..504d9e0f7 100644 --- a/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb +++ b/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb @@ -598,7 +598,7 @@ "X = Xy.drop(columns = 'SalePrice')\n", "X_ohe = (\n", " X\n", - " .pipe(pd.get_dummies, prefix_sep = '_OHE_', columns = categorical)\n", + " .pipe(pd.get_dummies, prefix_sep = '_OHE_', columns = categorical, dtype='uint8')\n", ")\n", "y = Xy['SalePrice']" ] diff --git a/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb b/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb index 24f00b8c2..94ba335da 100644 --- a/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb +++ b/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb @@ -432,7 +432,7 @@ "outputs": [], "source": [ "categorical = []\n", - "for col, value in attritionXData.iteritems():\n", + "for col, value in attritionXData.items():\n", " if value.dtype == \"object\":\n", " categorical.append(col)\n", "\n", diff --git a/setup.cfg b/setup.cfg index 2e0ca2c37..067333d5a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,12 +34,12 @@ packages = find_namespace: install_requires = numpy scipy > 1.4.0 - scikit-learn > 0.22.0, < 1.3 + scikit-learn >= 1.0, < 1.3 sparse joblib >= 0.13.0 statsmodels >= 0.10 pandas - shap >= 0.38.1, < 0.41.0 + shap >= 0.38.1, < 0.42.0 lightgbm test_suite = econml.tests tests_require = @@ -58,6 +58,8 @@ tf = tensorflow > 1.10, < 2.3;python_version < '3.9' ; Version capped due to tensorflow incompatibility protobuf < 4 + ; Version capped due to tensorflow incompatibility + numpy < 1.24 plt = graphviz ; Version capped due to shap incompatibility @@ -70,6 +72,8 @@ all = tensorflow > 1.10, < 2.3 ; Version capped due to tensorflow incompatibility protobuf < 4 + ; Version capped due to tensorflow incompatibility + numpy < 1.24 ; Version capped due to shap incompatibility matplotlib < 3.6.0 dowhy < 0.9