Skip to content

Commit

Permalink
automate the first stage model T and update DML notebook (#172)
Browse files Browse the repository at this point in the history
* automate the first stage model T and update DML notebook
* Changed model defaults in ORF and fixed a bug in WeightedKFold
  • Loading branch information
heimengqi authored and Miruna Oprescu committed Nov 21, 2019
1 parent 607e0ea commit 818c832
Show file tree
Hide file tree
Showing 8 changed files with 196 additions and 125 deletions.
71 changes: 51 additions & 20 deletions econml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@
from .utilities import (shape, reshape, ndim, hstack, cross_product, transpose, inverse_onehot,
broadcast_unit_treatments, reshape_treatmentwise_effects,
StatsModelsLinearRegression, LassoCVWrapper, check_high_dimensional)
from econml.sklearn_extensions.linear_model import MultiOutputDebiasedLasso
from econml.sklearn_extensions.linear_model import MultiOutputDebiasedLasso, WeightedLassoCVWrapper
from sklearn.model_selection import KFold, StratifiedKFold, check_cv
from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV
from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegressionCV, ElasticNetCV
from sklearn.preprocessing import (PolynomialFeatures, LabelEncoder, OneHotEncoder,
FunctionTransformer)
from sklearn.base import clone, TransformerMixin
Expand All @@ -52,6 +52,7 @@
DebiasedLassoCateEstimatorMixin)
from .inference import StatsModelsInference
from ._rlearner import _RLearner
from .sklearn_extensions.model_selection import WeightedStratifiedKFold


class DMLCateEstimator(_RLearner):
Expand Down Expand Up @@ -116,9 +117,15 @@ class takes as input the parameter `model_t`, which is an arbitrary scikit-learn
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods. Must be a linear model for correctness when linear_first_stages is ``True``.
model_t: estimator
The estimator for fitting the treatment to the features. Must implement
`fit` and `predict` methods. Must be a linear model for correctness when linear_first_stages is ``True``.
model_t: estimator or 'auto' (default is 'auto')
The estimator for fitting the treatment to the features.
If estimator, it must implement `fit` and `predict` methods. Must be a linear model for correctness
when linear_first_stages is ``True``;
If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
will be applied for discrete treatment,
and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
:class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
will be applied for continuous treatment.
model_final: estimator
The estimator for fitting the response residuals to the treatment residuals. Must implement
Expand Down Expand Up @@ -170,6 +177,12 @@ def __init__(self,
# TODO: consider whether we need more care around stateful featurizers,
# since we clone it and fit separate copies

if model_t == 'auto':
if discrete_treatment:
model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold())
else:
model_t = WeightedLassoCVWrapper()

class FirstStageWrapper:
def __init__(self, model, is_Y):
self._model = clone(model, safe=False)
Expand Down Expand Up @@ -284,13 +297,19 @@ class LinearDMLCateEstimator(StatsModelsCateEstimatorMixin, DMLCateEstimator):
Parameters
----------
model_y: estimator
model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.
model_t: estimator
The estimator for fitting the treatment to the features. Must implement
`fit` and `predict` methods.
model_t: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the treatment to the features.
If estimator, it must implement `fit` and `predict` methods;
If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
will be applied for discrete treatment,
and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
:class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
will be applied for continuous treatment.
featurizer: transformer, optional (default is \
:class:`PolynomialFeatures(degree=1, include_bias=True) <sklearn.preprocessing.PolynomialFeatures>`)
Expand Down Expand Up @@ -329,7 +348,7 @@ class LinearDMLCateEstimator(StatsModelsCateEstimatorMixin, DMLCateEstimator):
"""

def __init__(self,
model_y=LassoCV(), model_t=LassoCV(),
model_y=WeightedLassoCVWrapper(), model_t='auto',
featurizer=PolynomialFeatures(degree=1, include_bias=True),
linear_first_stages=True,
discrete_treatment=False,
Expand Down Expand Up @@ -389,13 +408,20 @@ class SparseLinearDMLCateEstimator(DebiasedLassoCateEstimatorMixin, DMLCateEstim
Parameters
----------
model_y: estimator
model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.
model_t: estimator
The estimator for fitting the treatment to the features. Must implement
`fit` and `predict` methods, and must be a linear model for correctness.
model_t: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the treatment to the features.
If estimator, it must implement `fit` and `predict` methods, and must be a
linear model for correctness;
If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
will be applied for discrete treatment,
and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
:class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
will be applied for continuous treatment.
alpha: string | float, optional. Default='auto'.
CATE L1 regularization applied through the debiased lasso in the final model.
Expand Down Expand Up @@ -446,7 +472,7 @@ class SparseLinearDMLCateEstimator(DebiasedLassoCateEstimatorMixin, DMLCateEstim
"""

def __init__(self,
model_y=LassoCV(), model_t=LassoCV(),
model_y=WeightedLassoCVWrapper(), model_t='auto',
alpha='auto',
max_iter=1000,
tol=1e-4,
Expand Down Expand Up @@ -511,13 +537,18 @@ class KernelDMLCateEstimator(DMLCateEstimator):
Parameters
----------
model_y: estimator, optional (default is :class:`LassoCV() <sklearn.linear_model.LassoCV>`)
model_y: estimator, optional (default is :class:`<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.
model_t: estimator, optional (default is :class:`LassoCV() <sklearn.linear_model.LassoCV>`)
The estimator for fitting the treatment to the features. Must implement
`fit` and `predict` methods.
model_t: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the treatment to the features.
If estimator, it must implement `fit` and `predict` methods;
If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
will be applied for discrete treatment,
and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
:class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
will be applied for continuous treatment.
dim: int, optional (default is 20)
The number of random Fourier features to generate
Expand Down Expand Up @@ -551,7 +582,7 @@ class KernelDMLCateEstimator(DMLCateEstimator):
by :mod:`np.random<numpy.random>`.
"""

def __init__(self, model_y=LassoCV(), model_t=LassoCV(),
def __init__(self, model_y=WeightedLassoCVWrapper(), model_t='auto',
dim=20, bw=1.0, discrete_treatment=False, n_splits=2, random_state=None):
class RandomFeatures(TransformerMixin):
def __init__(self, random_state):
Expand Down
9 changes: 5 additions & 4 deletions econml/ortho_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, PolynomialFeatures, FunctionTransformer
from sklearn.utils import check_random_state, check_array, column_or_1d
from .sklearn_extensions.linear_model import WeightedLassoCVWrapper
from .cate_estimator import BaseCateEstimator, LinearCateEstimator, TreatmentExpansionMixin
from .causal_tree import CausalTree
from .utilities import reshape, reshape_Y_T, MAX_RAND_SEED, check_inputs, WeightedModelWrapper, cross_product
from .utilities import reshape, reshape_Y_T, MAX_RAND_SEED, check_inputs, cross_product


def _build_tree_in_parallel(Y, T, X, W,
Expand Down Expand Up @@ -399,8 +400,8 @@ def __init__(self,
subsample_ratio=0.7,
bootstrap=False,
lambda_reg=0.01,
model_T=WeightedModelWrapper(LassoCV(cv=3)),
model_Y=WeightedModelWrapper(LassoCV(cv=3)),
model_T=WeightedLassoCVWrapper(cv=3),
model_Y=WeightedLassoCVWrapper(cv=3),
model_T_final=None,
model_Y_final=None,
n_jobs=-1,
Expand Down Expand Up @@ -627,7 +628,7 @@ def __init__(self,
lambda_reg=0.01,
propensity_model=LogisticRegression(penalty='l1', solver='saga',
multi_class='auto'), # saga solver supports l1
model_Y=WeightedModelWrapper(LassoCV(cv=3)),
model_Y=WeightedLassoCVWrapper(cv=3),
propensity_model_final=None,
model_Y_final=None,
n_jobs=-1,
Expand Down
38 changes: 38 additions & 0 deletions econml/sklearn_extensions/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from collections.abc import Iterable
from scipy.stats import norm
from econml.sklearn_extensions.model_selection import WeightedKFold, WeightedStratifiedKFold
from econml.utilities import ndim, shape, reshape
from sklearn.linear_model import LassoCV, MultiTaskLassoCV, Lasso, MultiTaskLasso
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection._split import _CVIterableWrapper, CV_WARNING
Expand Down Expand Up @@ -1048,3 +1049,40 @@ def _set_attribute(self, attribute_name, condition=True, default=None):
else:
attribute_value = default
setattr(self, attribute_name, attribute_value)


class WeightedLassoCVWrapper:
"""Helper class to wrap either WeightedLassoCV or WeightedMultiTaskLassoCV depending on the shape of the target."""

def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs

def fit(self, X, y, sample_weight=None):
self.needs_unravel = False
if ndim(y) == 2 and shape(y)[1] > 1:
self.model = WeightedMultiTaskLassoCV(*self.args, **self.kwargs)
else:
if ndim(y) == 2 and shape(y)[1] == 1:
y = np.ravel(y)
self.needs_unravel = True
self.model = WeightedLassoCV(*self.args, **self.kwargs)
self.model.fit(X, y, sample_weight)
# set intercept_ attribute
self.intercept_ = self.model.intercept_
# set coef_ attribute
self.coef_ = self.model.coef_
# set alpha_ attribute
self.alpha_ = self.model.alpha_
# set alphas_ attribute
self.alphas_ = self.model.alphas_
# set n_iter_ attribute
self.n_iter_ = self.model.n_iter_
return self

def predict(self, X):
predictions = self.model.predict(X)
return reshape(predictions, (-1, 1)) if self.needs_unravel else predictions

def score(self, X, y, sample_weight=None):
return self.model.score(X, y, sample_weight)
4 changes: 2 additions & 2 deletions econml/sklearn_extensions/model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _split_weighted_sample(self, X, y, sample_weight, is_stratified=False):
return self._get_folds_from_splits(splits, X.shape[0])
# Record all splits in case the stratification by weight yeilds a worse partition
all_splits.append(splits)
max_deviation = np.abs(weight_fracs - 1 / self.n_splits)
max_deviation = np.max(np.abs(weight_fracs - 1 / self.n_splits))
max_deviations.append(max_deviation)
# Reseed random generator and try again
kfold_model.shuffle = True
Expand All @@ -57,7 +57,7 @@ def _split_weighted_sample(self, X, y, sample_weight, is_stratified=False):
# Did not find a good split
# Record the devaiation for the weight-stratified split to compare with KFold splits
all_splits.append(stratified_weight_splits)
max_deviation = np.abs(weight_fracs - 1 / self.n_splits)
max_deviation = np.max(np.abs(weight_fracs - 1 / self.n_splits))
max_deviations.append(max_deviation)
# Return most weight-balanced partition
min_deviation_index = np.argmin(max_deviations)
Expand Down
6 changes: 3 additions & 3 deletions econml/tests/test_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def make_random(is_discrete, d):
all_infs.append(BootstrapInference(1))

for est, multi, infs in [(LinearDMLCateEstimator(model_y=Lasso(),
model_t=model_t,
model_t='auto',
discrete_treatment=is_discrete),
False,
all_infs),
Expand Down Expand Up @@ -149,8 +149,8 @@ def test_can_use_vectors(self):
def test_can_use_sample_weights(self):
"""Test that we can pass sample weights to an estimator."""
dmls = [
LinearDMLCateEstimator(LinearRegression(), LinearRegression(), featurizer=FunctionTransformer()),
SparseLinearDMLCateEstimator(LinearRegression(), LinearRegression(), featurizer=FunctionTransformer())
LinearDMLCateEstimator(LinearRegression(), 'auto', featurizer=FunctionTransformer()),
SparseLinearDMLCateEstimator(LinearRegression(), 'auto', featurizer=FunctionTransformer())
]
for dml in dmls:
dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]),
Expand Down
21 changes: 10 additions & 11 deletions econml/tests/test_orf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, LogisticRegression, LogisticRegressionCV
from sklearn.multioutput import MultiOutputRegressor
from sklearn.pipeline import Pipeline
from econml.ortho_forest import ContinuousTreatmentOrthoForest, DiscreteTreatmentOrthoForest, \
WeightedModelWrapper
from econml.ortho_forest import ContinuousTreatmentOrthoForest, DiscreteTreatmentOrthoForest
from econml.sklearn_extensions.linear_model import WeightedLassoCVWrapper


class TestOrthoForest(unittest.TestCase):
Expand Down Expand Up @@ -53,8 +53,8 @@ def test_continuous_treatments(self):
est = ContinuousTreatmentOrthoForest(n_jobs=4, n_trees=10,
model_T=Lasso(),
model_Y=Lasso(),
model_T_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"),
model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"))
model_T_final=WeightedLassoCVWrapper(),
model_Y_final=WeightedLassoCVWrapper())
# Test inputs for continuous treatments
# --> Check that one can pass in regular lists
est.fit(list(Y), list(T), list(TestOrthoForest.X), list(TestOrthoForest.W))
Expand All @@ -69,8 +69,8 @@ def test_continuous_treatments(self):
max_depth=50, subsample_ratio=0.30, bootstrap=False, n_jobs=4,
model_T=Lasso(alpha=0.024),
model_Y=Lasso(alpha=0.024),
model_T_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"),
model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"))
model_T_final=WeightedLassoCVWrapper(),
model_Y_final=WeightedLassoCVWrapper())
est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W)
self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5)
# Test continuous treatments without controls
Expand All @@ -94,7 +94,7 @@ def test_binary_treatments(self):
est = DiscreteTreatmentOrthoForest(n_trees=10, n_jobs=4,
propensity_model=LogisticRegression(), model_Y=Lasso(),
propensity_model_final=LogisticRegressionCV(penalty='l1', solver='saga'),
model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"))
model_Y_final=WeightedLassoCVWrapper())
# Test inputs for binary treatments
# --> Check that one can pass in regular lists
est.fit(list(Y), list(T), list(TestOrthoForest.X), list(TestOrthoForest.W))
Expand All @@ -118,7 +118,7 @@ def test_binary_treatments(self):
propensity_model=LogisticRegression(C=1 / 0.024, penalty='l1'),
model_Y=Lasso(alpha=0.024),
propensity_model_final=LogisticRegressionCV(penalty='l1', solver='saga'),
model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"))
model_Y_final=WeightedLassoCVWrapper())
est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W)
self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.7, treatment_type='discrete')
# Test binary treatments without controls
Expand Down Expand Up @@ -146,9 +146,8 @@ def test_multiple_treatments(self):
max_depth=50, subsample_ratio=0.30, bootstrap=False, n_jobs=4,
model_T=MultiOutputRegressor(Lasso(alpha=0.024)),
model_Y=Lasso(alpha=0.024),
model_T_final=WeightedModelWrapper(
MultiOutputRegressor(LassoCV()), sample_type="weighted"),
model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"))
model_T_final=WeightedLassoCVWrapper(),
model_Y_final=WeightedLassoCVWrapper())
est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W)
expected_te = np.array([TestOrthoForest.expected_exp_te, TestOrthoForest.expected_const_te]).T
self._test_te(est, expected_te, tol=0.5, treatment_type='multi')
Expand Down
Loading

0 comments on commit 818c832

Please sign in to comment.