Skip to content

Commit

Permalink
Add 2d array support to DRLearner
Browse files Browse the repository at this point in the history
  • Loading branch information
kbattocchi committed Jan 13, 2020
1 parent 8370694 commit f8ac4fb
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 80 deletions.
56 changes: 33 additions & 23 deletions econml/drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,14 @@

import numpy as np
from warnings import warn

from sklearn.base import clone
from sklearn.linear_model import LogisticRegressionCV, LinearRegression, LassoCV
from econml.utilities import inverse_onehot, check_high_dimensional
from econml.sklearn_extensions.linear_model import WeightedLassoCV, DebiasedLasso
from econml.utilities import inverse_onehot, check_high_dimensional, StatsModelsLinearRegression
from econml.sklearn_extensions.linear_model import WeightedLassoCVWrapper, DebiasedLasso
from econml.sklearn_extensions.ensemble import SubsampledHonestForest
from sklearn.base import clone
from econml._ortho_learner import _OrthoLearner
from econml.cate_estimator import StatsModelsCateEstimatorDiscreteMixin, DebiasedLassoCateEstimatorDiscreteMixin
from econml.utilities import StatsModelsLinearRegression
from sklearn.preprocessing import PolynomialFeatures
from econml.inference import GenericModelFinalInferenceDiscrete


Expand Down Expand Up @@ -243,7 +242,7 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc
"""

def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
model_regression=WeightedLassoCV(cv=3),
model_regression=WeightedLassoCVWrapper(cv=3),
model_final=StatsModelsLinearRegression(),
multitask_model_final=False,
featurizer=None,
Expand All @@ -258,8 +257,9 @@ def _combine(self, X, W):
return np.hstack([arr for arr in [X, W] if arr is not None])

def fit(self, Y, T, X=None, W=None, *, sample_weight=None):
# TODO Allow for non-vector y, i.e. of shape (n, 1)
assert np.ndim(Y) == 1, "Can only accept single dimensional outcomes Y! Use Y.ravel()."
if Y.ndim != 1 and (Y.ndim != 2 or Y.shape[1] != 1):
raise ValueError("The outcome matrix must be of shape ({0}, ) or ({0}, 1), "
"instead got {1}.".format(len(X), Y.shape))
if (X is None) and (W is None):
raise AttributeError("At least one of X or W has to not be None!")
if np.any(np.all(T == 0, axis=0)) or (not np.any(np.all(T == 0, axis=1))):
Expand All @@ -274,16 +274,17 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None):
def predict(self, Y, T, X=None, W=None, *, sample_weight=None):
XW = self._combine(X, W)
propensities = self._model_propensity.predict_proba(XW)
n = T.shape[0]
Y_pred = np.zeros((T.shape[0], T.shape[1] + 1))
T_counter = np.zeros(T.shape)
Y_pred[:, 0] = self._model_regression.predict(np.hstack([XW, T_counter]))
Y_pred[:, 0] += (Y - Y_pred[:, 0]) * np.all(T == 0, axis=1) / propensities[:, 0]
Y_pred[:, 0] = self._model_regression.predict(np.hstack([XW, T_counter])).reshape(n)
Y_pred[:, 0] += (Y.reshape(n) - Y_pred[:, 0]) * np.all(T == 0, axis=1) / propensities[:, 0]
for t in np.arange(T.shape[1]):
T_counter = np.zeros(T.shape)
T_counter[:, t] = 1
Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter]))
Y_pred[:, t + 1] += (Y - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]
return Y_pred
Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter])).reshape(n)
Y_pred[:, t + 1] += (Y.reshape(n) - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]
return Y_pred.reshape(Y.shape + (T.shape[1] + 1,))

class ModelFinal:
# Coding Remark: The reasoning around the multitask_model_final could have been simplified if
Expand All @@ -299,36 +300,45 @@ def __init__(self, model_final, featurizer, multitask_model_final):

def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None):
Y_pred, = nuisances
self.d_y = Y_pred.shape[1:-1] # track whether there's a Y dimension (must be a singleton)
if (X is not None) and (self._featurizer is not None):
X = self._featurizer.fit_transform(X)
filtered_kwargs = _filter_none_kwargs(sample_weight=sample_weight, sample_var=sample_var)
if self._multitask_model_final:
self.model_cate = self._model_final.fit(X, Y_pred[:, 1:] - Y_pred[:, [0]], **filtered_kwargs)
ys = Y_pred[..., 1:] - Y_pred[..., [0]] # subtract control results from each other arm
if self.d_y: # need to squeeze out singleton so that we fit on 2D array
ys = ys.squeeze(1)
self.model_cate = self._model_final.fit(X, ys, **filtered_kwargs)
else:
self.models_cate = [clone(self._model_final, safe=False).fit(X, Y_pred[:, t] - Y_pred[:, 0],
self.models_cate = [clone(self._model_final, safe=False).fit(X, Y_pred[..., t] - Y_pred[..., 0],
**filtered_kwargs)
for t in np.arange(1, Y_pred.shape[1])]
for t in np.arange(1, Y_pred.shape[-1])]
return self

def predict(self, X=None):
if (X is not None) and (self._featurizer is not None):
X = self._featurizer.transform(X)
if self._multitask_model_final:
return self.model_cate.predict(X)
pred = self.model_cate.predict(X)
if self.d_y: # need to reintroduce singleton Y dimension
return pred[:, np.newaxis, :]
return pred
else:
return np.array([mdl.predict(X) for mdl in self.models_cate]).T
preds = np.array([mdl.predict(X) for mdl in self.models_cate])
return np.moveaxis(preds, 0, -1) # move treatment dim to end

def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None):
if (X is not None) and (self._featurizer is not None):
X = self._featurizer.transform(X)
Y_pred, = nuisances
if self._multitask_model_final:
return np.mean(np.average((Y_pred[:, 1:] - Y_pred[:, [0]] - self.model_cate.predict(X))**2,
return np.mean(np.average((Y_pred[..., 1:] - Y_pred[..., [0]] - self.model_cate.predict(X))**2,
weights=sample_weight, axis=0))
else:
return np.mean([np.average((Y_pred[:, t] - Y_pred[:, 0] - self.models_cate[t - 1].predict(X))**2,
return np.mean([np.average((Y_pred[..., t] - Y_pred[..., 0] -
self.models_cate[t - 1].predict(X))**2,
weights=sample_weight, axis=0)
for t in np.arange(1, Y_pred.shape[1])])
for t in np.arange(1, Y_pred.shape[-1])])

self._multitask_model_final = multitask_model_final
super().__init__(ModelNuisance(model_propensity, model_regression),
Expand Down Expand Up @@ -615,7 +625,7 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):

def __init__(self,
model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
model_regression=WeightedLassoCV(cv=3),
model_regression=WeightedLassoCVWrapper(cv=3),
featurizer=None,
fit_cate_intercept=True,
n_splits=2, random_state=None):
Expand Down Expand Up @@ -806,7 +816,7 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):

def __init__(self,
model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
model_regression=WeightedLassoCV(cv=3),
model_regression=WeightedLassoCVWrapper(cv=3),
featurizer=None,
fit_cate_intercept=True,
alpha='auto',
Expand Down
12 changes: 6 additions & 6 deletions econml/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,19 +221,19 @@ def const_marginal_effect_interval(self, X, *, alpha=0.1):
if (X is not None) and (self.featurizer is not None):
X = self.featurizer.fit_transform(X)
preds = np.array([mdl.predict_interval(X, alpha=alpha) for mdl in self.fitted_models_final])
return tuple([preds[:, 0, :].T, preds[:, 1, :].T])
return tuple(np.moveaxis(preds, [0, 1], [-1, 0])) # send treatment to the end, pull bounds to the front

def effect_interval(self, X, *, T0, T1, alpha=0.1):
X, T0, T1 = self._est._expand_treatments(X, T0, T1)
if np.any(np.any(T0 > 0, axis=1)):
raise AttributeError("Can only calculate intervals of effects with respect to baseline treatment!")
ind = (T1 @ np.arange(1, T1.shape[1] + 1)).astype(int)
ind = inverse_onehot(T1)
lower, upper = self.const_marginal_effect_interval(X, alpha=alpha)
lower = np.hstack([np.zeros((lower.shape[0], 1)), lower])
upper = np.hstack([np.zeros((upper.shape[0], 1)), upper])
lower = np.concatenate([np.zeros(lower.shape[0:-1] + (1,)), lower], -1)
upper = np.concatenate([np.zeros(upper.shape[0:-1] + (1,)), upper], -1)
if X is None: # Then const_marginal_effect_interval will return a single row
lower, upper = np.tile(lower, (T0.shape[0], 1)), np.tile(upper, (T0.shape[0], 1))
return lower[np.arange(T0.shape[0]), ind], upper[np.arange(T0.shape[0]), ind]
lower, upper = np.repeat(lower, T0.shape[0], axis=0), np.repeat(upper, T0.shape[0], axis=0)
return lower[np.arange(T0.shape[0]), ..., ind], upper[np.arange(T0.shape[0]), ..., ind]


class LinearModelFinalInferenceDiscrete(GenericModelFinalInferenceDiscrete):
Expand Down
106 changes: 55 additions & 51 deletions econml/tests/test_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,11 @@ def make_random(is_discrete, d):
else:
return np.random.normal(size=sz)

d_y = 0
is_discrete = True
for d_t in [0, 1]:
for d_x in [2, None]:
for d_w in [2, None]:
with self.subTest(d_t=d_t, d_x=d_x, d_w=d_w):
for d_y in [0, 1]:
is_discrete = True
for d_t in [0, 1]:
for d_x in [2, None]:
for d_w in [2, None]:
W, X, Y, T = [make_random(is_discrete, d)
for is_discrete, d in [(False, d_w),
(False, d_x),
Expand All @@ -96,51 +95,56 @@ def make_random(is_discrete, d):
((d_y,) if d_y > 0 else ()) +
((d_t_final,) if d_t_final > 0 else()))

# TODO: add stratification to bootstrap so that we can use it even with discrete treatments
infs = [None, 'statsmodels']

est = LinearDRLearner(model_regression=Lasso(),
model_propensity=LogisticRegression(C=1000, solver='lbfgs',
multi_class='auto'))

for inf in infs:
with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t,
is_discrete=is_discrete, est=est, inf=inf):
est.fit(Y, T, X, W, inference=inf)
# make sure we can call the marginal_effect and effect methods
const_marg_eff = est.const_marginal_effect(X)
marg_eff = est.marginal_effect(T, X)
self.assertEqual(shape(marg_eff), marginal_effect_shape)
self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)

np.testing.assert_array_equal(
marg_eff if d_x else marg_eff[0:1], const_marg_eff)

T0 = np.full_like(T, 'a')
eff = est.effect(X, T0=T0, T1=T)
self.assertEqual(shape(eff), effect_shape)
if inf is not None:
const_marg_eff_int = est.const_marginal_effect_interval(X)
marg_eff_int = est.marginal_effect_interval(T, X)
self.assertEqual(shape(marg_eff_int),
(2,) + marginal_effect_shape)
self.assertEqual(shape(const_marg_eff_int),
(2,) + const_marginal_effect_shape)
self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)),
(2,) + effect_shape)

est.score(Y, T, X, W)

# make sure we can call effect with implied scalar treatments, no matter the
# dimensions of T, and also that we warn when there are multiple treatments
if d_t > 1:
cm = self.assertWarns(Warning)
else:
cm = ExitStack() # ExitStack can be used as a "do nothing" ContextManager
with cm:
effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else())
eff = est.effect(X, T0='a', T1='b')
self.assertEqual(shape(eff), effect_shape2)
for est in [LinearDRLearner(model_propensity=LogisticRegression(C=1000, solver='lbfgs',
multi_class='auto')),
DRLearner(model_propensity=LogisticRegression(multi_class='auto'),
model_regression=LinearRegression(),
model_final=StatsModelsLinearRegression(),
multitask_model_final=True)]:

# TODO: add stratification to bootstrap so that we can use it even with discrete treatments
infs = [None]
if isinstance(est, LinearDRLearner):
infs.append('statsmodels')

for inf in infs:
with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t,
is_discrete=is_discrete, est=est, inf=inf):
est.fit(Y, T, X, W, inference=inf)
# make sure we can call the marginal_effect and effect methods
const_marg_eff = est.const_marginal_effect(X)
marg_eff = est.marginal_effect(T, X)
self.assertEqual(shape(marg_eff), marginal_effect_shape)
self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)

np.testing.assert_array_equal(
marg_eff if d_x else marg_eff[0:1], const_marg_eff)

T0 = np.full_like(T, 'a')
eff = est.effect(X, T0=T0, T1=T)
self.assertEqual(shape(eff), effect_shape)
if inf is not None:
const_marg_eff_int = est.const_marginal_effect_interval(X)
marg_eff_int = est.marginal_effect_interval(T, X)
self.assertEqual(shape(marg_eff_int),
(2,) + marginal_effect_shape)
self.assertEqual(shape(const_marg_eff_int),
(2,) + const_marginal_effect_shape)
self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)),
(2,) + effect_shape)

est.score(Y, T, X, W)

# make sure we can call effect with implied scalar treatments, no matter the
# dimensions of T, and also that we warn when there are multiple treatments
if d_t > 1:
cm = self.assertWarns(Warning)
else:
cm = ExitStack() # ExitStack can be used as a "do nothing" ContextManager
with cm:
effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else())
eff = est.effect(X, T0='a', T1='b')
self.assertEqual(shape(eff), effect_shape2)

def test_can_use_vectors(self):
"""
Expand Down

0 comments on commit f8ac4fb

Please sign in to comment.