diff --git a/econml/drlearner.py b/econml/drlearner.py index 829cf4e13..e30a0f416 100644 --- a/econml/drlearner.py +++ b/econml/drlearner.py @@ -29,15 +29,14 @@ import numpy as np from warnings import warn + +from sklearn.base import clone from sklearn.linear_model import LogisticRegressionCV, LinearRegression, LassoCV -from econml.utilities import inverse_onehot, check_high_dimensional -from econml.sklearn_extensions.linear_model import WeightedLassoCV, DebiasedLasso +from econml.utilities import inverse_onehot, check_high_dimensional, StatsModelsLinearRegression +from econml.sklearn_extensions.linear_model import WeightedLassoCVWrapper, DebiasedLasso from econml.sklearn_extensions.ensemble import SubsampledHonestForest -from sklearn.base import clone from econml._ortho_learner import _OrthoLearner from econml.cate_estimator import StatsModelsCateEstimatorDiscreteMixin, DebiasedLassoCateEstimatorDiscreteMixin -from econml.utilities import StatsModelsLinearRegression -from sklearn.preprocessing import PolynomialFeatures from econml.inference import GenericModelFinalInferenceDiscrete @@ -243,7 +242,7 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc """ def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'), - model_regression=WeightedLassoCV(cv=3), + model_regression=WeightedLassoCVWrapper(cv=3), model_final=StatsModelsLinearRegression(), multitask_model_final=False, featurizer=None, @@ -258,8 +257,9 @@ def _combine(self, X, W): return np.hstack([arr for arr in [X, W] if arr is not None]) def fit(self, Y, T, X=None, W=None, *, sample_weight=None): - # TODO Allow for non-vector y, i.e. of shape (n, 1) - assert np.ndim(Y) == 1, "Can only accept single dimensional outcomes Y! Use Y.ravel()." + if Y.ndim != 1 and (Y.ndim != 2 or Y.shape[1] != 1): + raise ValueError("The outcome matrix must be of shape ({0}, ) or ({0}, 1), " + "instead got {1}.".format(len(X), Y.shape)) if (X is None) and (W is None): raise AttributeError("At least one of X or W has to not be None!") if np.any(np.all(T == 0, axis=0)) or (not np.any(np.all(T == 0, axis=1))): @@ -274,16 +274,17 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None): def predict(self, Y, T, X=None, W=None, *, sample_weight=None): XW = self._combine(X, W) propensities = self._model_propensity.predict_proba(XW) + n = T.shape[0] Y_pred = np.zeros((T.shape[0], T.shape[1] + 1)) T_counter = np.zeros(T.shape) - Y_pred[:, 0] = self._model_regression.predict(np.hstack([XW, T_counter])) - Y_pred[:, 0] += (Y - Y_pred[:, 0]) * np.all(T == 0, axis=1) / propensities[:, 0] + Y_pred[:, 0] = self._model_regression.predict(np.hstack([XW, T_counter])).reshape(n) + Y_pred[:, 0] += (Y.reshape(n) - Y_pred[:, 0]) * np.all(T == 0, axis=1) / propensities[:, 0] for t in np.arange(T.shape[1]): T_counter = np.zeros(T.shape) T_counter[:, t] = 1 - Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter])) - Y_pred[:, t + 1] += (Y - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1] - return Y_pred + Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter])).reshape(n) + Y_pred[:, t + 1] += (Y.reshape(n) - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1] + return Y_pred.reshape(Y.shape + (T.shape[1] + 1,)) class ModelFinal: # Coding Remark: The reasoning around the multitask_model_final could have been simplified if @@ -299,36 +300,45 @@ def __init__(self, model_final, featurizer, multitask_model_final): def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None): Y_pred, = nuisances + self.d_y = Y_pred.shape[1:-1] # track whether there's a Y dimension (must be a singleton) if (X is not None) and (self._featurizer is not None): X = self._featurizer.fit_transform(X) filtered_kwargs = _filter_none_kwargs(sample_weight=sample_weight, sample_var=sample_var) if self._multitask_model_final: - self.model_cate = self._model_final.fit(X, Y_pred[:, 1:] - Y_pred[:, [0]], **filtered_kwargs) + ys = Y_pred[..., 1:] - Y_pred[..., [0]] # subtract control results from each other arm + if self.d_y: # need to squeeze out singleton so that we fit on 2D array + ys = ys.squeeze(1) + self.model_cate = self._model_final.fit(X, ys, **filtered_kwargs) else: - self.models_cate = [clone(self._model_final, safe=False).fit(X, Y_pred[:, t] - Y_pred[:, 0], + self.models_cate = [clone(self._model_final, safe=False).fit(X, Y_pred[..., t] - Y_pred[..., 0], **filtered_kwargs) - for t in np.arange(1, Y_pred.shape[1])] + for t in np.arange(1, Y_pred.shape[-1])] return self def predict(self, X=None): if (X is not None) and (self._featurizer is not None): X = self._featurizer.transform(X) if self._multitask_model_final: - return self.model_cate.predict(X) + pred = self.model_cate.predict(X) + if self.d_y: # need to reintroduce singleton Y dimension + return pred[:, np.newaxis, :] + return pred else: - return np.array([mdl.predict(X) for mdl in self.models_cate]).T + preds = np.array([mdl.predict(X) for mdl in self.models_cate]) + return np.moveaxis(preds, 0, -1) # move treatment dim to end def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None): if (X is not None) and (self._featurizer is not None): X = self._featurizer.transform(X) Y_pred, = nuisances if self._multitask_model_final: - return np.mean(np.average((Y_pred[:, 1:] - Y_pred[:, [0]] - self.model_cate.predict(X))**2, + return np.mean(np.average((Y_pred[..., 1:] - Y_pred[..., [0]] - self.model_cate.predict(X))**2, weights=sample_weight, axis=0)) else: - return np.mean([np.average((Y_pred[:, t] - Y_pred[:, 0] - self.models_cate[t - 1].predict(X))**2, + return np.mean([np.average((Y_pred[..., t] - Y_pred[..., 0] - + self.models_cate[t - 1].predict(X))**2, weights=sample_weight, axis=0) - for t in np.arange(1, Y_pred.shape[1])]) + for t in np.arange(1, Y_pred.shape[-1])]) self._multitask_model_final = multitask_model_final super().__init__(ModelNuisance(model_propensity, model_regression), @@ -615,7 +625,7 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner): def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'), - model_regression=WeightedLassoCV(cv=3), + model_regression=WeightedLassoCVWrapper(cv=3), featurizer=None, fit_cate_intercept=True, n_splits=2, random_state=None): @@ -806,7 +816,7 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner): def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'), - model_regression=WeightedLassoCV(cv=3), + model_regression=WeightedLassoCVWrapper(cv=3), featurizer=None, fit_cate_intercept=True, alpha='auto', diff --git a/econml/inference.py b/econml/inference.py index 597107dc7..50e8bc283 100644 --- a/econml/inference.py +++ b/econml/inference.py @@ -221,19 +221,19 @@ def const_marginal_effect_interval(self, X, *, alpha=0.1): if (X is not None) and (self.featurizer is not None): X = self.featurizer.fit_transform(X) preds = np.array([mdl.predict_interval(X, alpha=alpha) for mdl in self.fitted_models_final]) - return tuple([preds[:, 0, :].T, preds[:, 1, :].T]) + return tuple(np.moveaxis(preds, [0, 1], [-1, 0])) # send treatment to the end, pull bounds to the front def effect_interval(self, X, *, T0, T1, alpha=0.1): X, T0, T1 = self._est._expand_treatments(X, T0, T1) if np.any(np.any(T0 > 0, axis=1)): raise AttributeError("Can only calculate intervals of effects with respect to baseline treatment!") - ind = (T1 @ np.arange(1, T1.shape[1] + 1)).astype(int) + ind = inverse_onehot(T1) lower, upper = self.const_marginal_effect_interval(X, alpha=alpha) - lower = np.hstack([np.zeros((lower.shape[0], 1)), lower]) - upper = np.hstack([np.zeros((upper.shape[0], 1)), upper]) + lower = np.concatenate([np.zeros(lower.shape[0:-1] + (1,)), lower], -1) + upper = np.concatenate([np.zeros(upper.shape[0:-1] + (1,)), upper], -1) if X is None: # Then const_marginal_effect_interval will return a single row - lower, upper = np.tile(lower, (T0.shape[0], 1)), np.tile(upper, (T0.shape[0], 1)) - return lower[np.arange(T0.shape[0]), ind], upper[np.arange(T0.shape[0]), ind] + lower, upper = np.repeat(lower, T0.shape[0], axis=0), np.repeat(upper, T0.shape[0], axis=0) + return lower[np.arange(T0.shape[0]), ..., ind], upper[np.arange(T0.shape[0]), ..., ind] class LinearModelFinalInferenceDiscrete(GenericModelFinalInferenceDiscrete): diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py index 99068ed9a..97fe1f2e5 100644 --- a/econml/tests/test_drlearner.py +++ b/econml/tests/test_drlearner.py @@ -70,12 +70,11 @@ def make_random(is_discrete, d): else: return np.random.normal(size=sz) - d_y = 0 - is_discrete = True - for d_t in [0, 1]: - for d_x in [2, None]: - for d_w in [2, None]: - with self.subTest(d_t=d_t, d_x=d_x, d_w=d_w): + for d_y in [0, 1]: + is_discrete = True + for d_t in [0, 1]: + for d_x in [2, None]: + for d_w in [2, None]: W, X, Y, T = [make_random(is_discrete, d) for is_discrete, d in [(False, d_w), (False, d_x), @@ -96,51 +95,56 @@ def make_random(is_discrete, d): ((d_y,) if d_y > 0 else ()) + ((d_t_final,) if d_t_final > 0 else())) - # TODO: add stratification to bootstrap so that we can use it even with discrete treatments - infs = [None, 'statsmodels'] - - est = LinearDRLearner(model_regression=Lasso(), - model_propensity=LogisticRegression(C=1000, solver='lbfgs', - multi_class='auto')) - - for inf in infs: - with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t, - is_discrete=is_discrete, est=est, inf=inf): - est.fit(Y, T, X, W, inference=inf) - # make sure we can call the marginal_effect and effect methods - const_marg_eff = est.const_marginal_effect(X) - marg_eff = est.marginal_effect(T, X) - self.assertEqual(shape(marg_eff), marginal_effect_shape) - self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape) - - np.testing.assert_array_equal( - marg_eff if d_x else marg_eff[0:1], const_marg_eff) - - T0 = np.full_like(T, 'a') - eff = est.effect(X, T0=T0, T1=T) - self.assertEqual(shape(eff), effect_shape) - if inf is not None: - const_marg_eff_int = est.const_marginal_effect_interval(X) - marg_eff_int = est.marginal_effect_interval(T, X) - self.assertEqual(shape(marg_eff_int), - (2,) + marginal_effect_shape) - self.assertEqual(shape(const_marg_eff_int), - (2,) + const_marginal_effect_shape) - self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)), - (2,) + effect_shape) - - est.score(Y, T, X, W) - - # make sure we can call effect with implied scalar treatments, no matter the - # dimensions of T, and also that we warn when there are multiple treatments - if d_t > 1: - cm = self.assertWarns(Warning) - else: - cm = ExitStack() # ExitStack can be used as a "do nothing" ContextManager - with cm: - effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else()) - eff = est.effect(X, T0='a', T1='b') - self.assertEqual(shape(eff), effect_shape2) + for est in [LinearDRLearner(model_propensity=LogisticRegression(C=1000, solver='lbfgs', + multi_class='auto')), + DRLearner(model_propensity=LogisticRegression(multi_class='auto'), + model_regression=LinearRegression(), + model_final=StatsModelsLinearRegression(), + multitask_model_final=True)]: + + # TODO: add stratification to bootstrap so that we can use it even with discrete treatments + infs = [None] + if isinstance(est, LinearDRLearner): + infs.append('statsmodels') + + for inf in infs: + with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t, + is_discrete=is_discrete, est=est, inf=inf): + est.fit(Y, T, X, W, inference=inf) + # make sure we can call the marginal_effect and effect methods + const_marg_eff = est.const_marginal_effect(X) + marg_eff = est.marginal_effect(T, X) + self.assertEqual(shape(marg_eff), marginal_effect_shape) + self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape) + + np.testing.assert_array_equal( + marg_eff if d_x else marg_eff[0:1], const_marg_eff) + + T0 = np.full_like(T, 'a') + eff = est.effect(X, T0=T0, T1=T) + self.assertEqual(shape(eff), effect_shape) + if inf is not None: + const_marg_eff_int = est.const_marginal_effect_interval(X) + marg_eff_int = est.marginal_effect_interval(T, X) + self.assertEqual(shape(marg_eff_int), + (2,) + marginal_effect_shape) + self.assertEqual(shape(const_marg_eff_int), + (2,) + const_marginal_effect_shape) + self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)), + (2,) + effect_shape) + + est.score(Y, T, X, W) + + # make sure we can call effect with implied scalar treatments, no matter the + # dimensions of T, and also that we warn when there are multiple treatments + if d_t > 1: + cm = self.assertWarns(Warning) + else: + cm = ExitStack() # ExitStack can be used as a "do nothing" ContextManager + with cm: + effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else()) + eff = est.effect(X, T0='a', T1='b') + self.assertEqual(shape(eff), effect_shape2) def test_can_use_vectors(self): """