diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py
index 2fde969d3..07597d98a 100644
--- a/econml/_ortho_learner.py
+++ b/econml/_ortho_learner.py
@@ -449,6 +449,7 @@ def __init__(self, model_nuisance, model_final, *,
         self._n_splits = n_splits
         self._discrete_treatment = discrete_treatment
         self._discrete_instrument = discrete_instrument
+        self._init_random_state = random_state
         self._random_state = check_random_state(random_state)
         if discrete_treatment:
             if categories != 'auto':
@@ -535,6 +536,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, *, sample_weight=None, sample_var=No
         -------
         self : _OrthoLearner instance
         """
+        self._random_state = check_random_state(self._init_random_state)
         Y, T, X, W, Z, sample_weight, sample_var, groups = check_input_arrays(
             Y, T, X, W, Z, sample_weight, sample_var, groups)
         self._check_input_dims(Y, T, X, W, Z, sample_weight, sample_var, groups)
@@ -651,7 +653,7 @@ def effect_inference(self, X=None, *, T0=0, T1=1):
         return super().effect_inference(X, T0=T0, T1=T1)
     effect_inference.__doc__ = LinearCateEstimator.effect_inference.__doc__
 
-    def score(self, Y, T, X=None, W=None, Z=None):
+    def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None):
         """
         Score the fitted CATE model on a new data set. Generates nuisance parameters
         for the new data set based on the fitted nuisance models created at fit time.
@@ -673,6 +675,8 @@ def score(self, Y, T, X=None, W=None, Z=None):
             Controls for each sample
         Z: optional (n, d_z) matrix or None (Default=None)
             Instruments for each sample
+        sample_weight: optional(n,) vector or None (Default=None)
+            Weights for each samples
 
         Returns
         -------
@@ -703,7 +707,8 @@ def score(self, Y, T, X=None, W=None, Z=None):
         for it in range(len(nuisances)):
             nuisances[it] = np.mean(nuisances[it], axis=0)
 
-        return self._model_final.score(Y, T, **filter_none_kwargs(X=X, W=W, Z=Z, nuisances=nuisances))
+        return self._model_final.score(Y, T, nuisances=nuisances,
+                                       **filter_none_kwargs(X=X, W=W, Z=Z, sample_weight=sample_weight))
 
     @property
     def model_final(self):
diff --git a/econml/dml.py b/econml/dml.py
index 796c33a26..a4db014c9 100644
--- a/econml/dml.py
+++ b/econml/dml.py
@@ -362,9 +362,10 @@ class takes as input the parameter `model_t`, which is an arbitrary scikit-learn
 
     Parameters
     ----------
-    model_y: estimator
+    model_y: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the response to the features. Must implement
-        `fit` and `predict` methods.  Must be a linear model for correctness when linear_first_stages is ``True``.
+        `fit` and `predict` methods.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     model_t: estimator or 'auto' (default is 'auto')
         The estimator for fitting the treatment to the features.
@@ -435,11 +436,14 @@ def __init__(self,
 
         # TODO: consider whether we need more care around stateful featurizers,
         #       since we clone it and fit separate copies
+        if model_y == 'auto':
+            model_y = WeightedLassoCVWrapper(random_state=random_state)
         if model_t == 'auto':
             if discrete_treatment:
-                model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold())
+                model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold(random_state=random_state),
+                                               random_state=random_state)
             else:
-                model_t = WeightedLassoCVWrapper()
+                model_t = WeightedLassoCVWrapper(random_state=random_state)
         self.bias_part_of_coef = fit_cate_intercept
         self.fit_cate_intercept = fit_cate_intercept
         super().__init__(model_y=_FirstStageWrapper(model_y, True,
@@ -490,9 +494,10 @@ class LinearDML(StatsModelsCateEstimatorMixin, DML):
 
     Parameters
     ----------
-    model_y: estimator, optional (default is :class:`.WeightedLassoCVWrapper`)
+    model_y: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     model_t: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the treatment to the features.
@@ -545,7 +550,7 @@ class LinearDML(StatsModelsCateEstimatorMixin, DML):
     """
 
     def __init__(self,
-                 model_y=WeightedLassoCVWrapper(), model_t='auto',
+                 model_y='auto', model_t='auto',
                  featurizer=None,
                  fit_cate_intercept=True,
                  linear_first_stages=True,
@@ -615,10 +620,10 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML):
 
     Parameters
     ----------
-    model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
-        <econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
+    model_y: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     model_t: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the treatment to the features.
@@ -686,7 +691,7 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML):
     """
 
     def __init__(self,
-                 model_y=WeightedLassoCVWrapper(), model_t='auto',
+                 model_y='auto', model_t='auto',
                  alpha='auto',
                  max_iter=1000,
                  tol=1e-4,
@@ -701,7 +706,8 @@ def __init__(self,
             alpha=alpha,
             fit_intercept=False,
             max_iter=max_iter,
-            tol=tol)
+            tol=tol,
+            random_state=random_state)
         super().__init__(model_y=model_y,
                          model_t=model_t,
                          model_final=model_final,
@@ -765,11 +771,12 @@ class _RandomFeatures(TransformerMixin):
     def __init__(self, dim, bw, random_state):
         self._dim = dim
         self._bw = bw
-        self._random_state = check_random_state(random_state)
+        self._random_state = random_state
 
     def fit(self, X):
-        self.omegas = self._random_state.normal(0, 1 / self._bw, size=(shape(X)[1], self._dim))
-        self.biases = self._random_state.uniform(0, 2 * np.pi, size=(1, self._dim))
+        random_state = check_random_state(self._random_state)
+        self.omegas = random_state.normal(0, 1 / self._bw, size=(shape(X)[1], self._dim))
+        self.biases = random_state.uniform(0, 2 * np.pi, size=(1, self._dim))
         return self
 
     def transform(self, X):
@@ -782,9 +789,10 @@ class KernelDML(DML):
 
     Parameters
     ----------
-    model_y: estimator, optional (default is :class:`<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
+    model_y: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     model_t: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the treatment to the features.
@@ -834,10 +842,10 @@ class KernelDML(DML):
         by :mod:`np.random<numpy.random>`.
     """
 
-    def __init__(self, model_y=WeightedLassoCVWrapper(), model_t='auto', fit_cate_intercept=True,
+    def __init__(self, model_y='auto', model_t='auto', fit_cate_intercept=True,
                  dim=20, bw=1.0, discrete_treatment=False, categories='auto', n_splits=2, random_state=None):
         super().__init__(model_y=model_y, model_t=model_t,
-                         model_final=ElasticNetCV(fit_intercept=False),
+                         model_final=ElasticNetCV(fit_intercept=False, random_state=random_state),
                          featurizer=_RandomFeatures(dim, bw, random_state),
                          fit_cate_intercept=fit_cate_intercept,
                          discrete_treatment=discrete_treatment,
diff --git a/econml/drlearner.py b/econml/drlearner.py
index 90cdf9ab9..a1c4ce727 100644
--- a/econml/drlearner.py
+++ b/econml/drlearner.py
@@ -200,16 +200,18 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc
 
     Parameters
     ----------
-    model_propensity : scikit-learn classifier
+    model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
         Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
         Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
         where T is a shape (n, ) array.
+        If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.
 
-    model_regression : scikit-learn regressor
+    model_regression : scikit-learn regressor or 'auto', optional (default='auto')
         Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
         concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
         `predict` methods. If different models per treatment arm are desired, see the
         :class:`.MultiModelWrapper` helper class.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     model_final :
         estimator for the final cate model. Trained on regressing the doubly robust potential outcomes
@@ -358,8 +360,8 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc
 
     """
 
-    def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
-                 model_regression=WeightedLassoCVWrapper(cv=3),
+    def __init__(self, model_propensity='auto',
+                 model_regression='auto',
                  model_final=StatsModelsLinearRegression(),
                  multitask_model_final=False,
                  featurizer=None,
@@ -367,6 +369,11 @@ def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', m
                  categories='auto',
                  n_splits=2,
                  random_state=None):
+        if model_propensity == 'auto':
+            model_propensity = LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto',
+                                                    random_state=random_state)
+        if model_regression == 'auto':
+            model_regression = WeightedLassoCVWrapper(cv=3, random_state=random_state)
         self._multitask_model_final = multitask_model_final
         super().__init__(_ModelNuisance(model_propensity, model_regression, min_propensity),
                          _ModelFinal(model_final, featurizer, multitask_model_final),
@@ -585,16 +592,18 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
 
     Parameters
     ----------
-    model_propensity : scikit-learn classifier
+    model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
         Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
         Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
         where T is a shape (n, ) array.
+        If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.
 
-    model_regression : scikit-learn regressor
+    model_regression : scikit-learn regressor or 'auto', optional (default='auto')
         Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
         concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
         `predict` methods. If different models per treatment arm are desired, see the
         :class:`.MultiModelWrapper` helper class.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     featurizer : :term:`transformer`, optional, default None
         Must support fit_transform and transform. Used to create composite features in the final CATE regression.
@@ -678,8 +687,8 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
     """
 
     def __init__(self,
-                 model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
-                 model_regression=WeightedLassoCVWrapper(cv=3),
+                 model_propensity='auto',
+                 model_regression='auto',
                  featurizer=None,
                  fit_cate_intercept=True,
                  min_propensity=1e-6,
@@ -781,16 +790,18 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
 
     Parameters
     ----------
-    model_propensity : scikit-learn classifier
+    model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
         Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
         Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
         where T is a shape (n, ) array.
+        If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.
 
-    model_regression : scikit-learn regressor
+    model_regression : scikit-learn regressor or 'auto', optional (default='auto')
         Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
         concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
         `predict` methods. If different models per treatment arm are desired, see the
         :class:`.MultiModelWrapper` helper class.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     featurizer : :term:`transformer`, optional, default None
         Must support fit_transform and transform. Used to create composite features in the final CATE regression.
@@ -887,8 +898,8 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
     """
 
     def __init__(self,
-                 model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
-                 model_regression=WeightedLassoCVWrapper(cv=3),
+                 model_propensity='auto',
+                 model_regression='auto',
                  featurizer=None,
                  fit_cate_intercept=True,
                  alpha='auto',
diff --git a/econml/ortho_iv.py b/econml/ortho_iv.py
index f189795bf..bab8d6a76 100644
--- a/econml/ortho_iv.py
+++ b/econml/ortho_iv.py
@@ -24,7 +24,7 @@
 from .dml import _FinalWrapper
 from .inference import StatsModelsInference
 from .sklearn_extensions.linear_model import StatsModelsLinearRegression
-from .utilities import (_deprecate_positional, add_intercept, fit_with_groups,
+from .utilities import (_deprecate_positional, add_intercept, fit_with_groups, filter_none_kwargs,
                         hstack, inverse_onehot)
 
 
@@ -752,6 +752,12 @@ class DMLIV(_BaseDMLIV):
     categories: 'auto' or list, default 'auto'
         The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values).
         The first category will be treated as the control treatment.
+
+    random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator;
+        If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
+        by :mod:`np.random<numpy.random>`.
     """
 
     def __init__(self, model_Y_X, model_T_X, model_T_XZ, model_final, featurizer=None,
@@ -842,11 +848,18 @@ class NonParamDMLIV(_BaseDMLIV):
         The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values).
         The first category will be treated as the control treatment.
 
+    random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator;
+        If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
+        by :mod:`np.random<numpy.random>`.
+
     """
 
     def __init__(self, model_Y_X, model_T_X, model_T_XZ, model_final,
                  featurizer=None, fit_cate_intercept=True,
-                 n_splits=2, discrete_instrument=False, discrete_treatment=False, categories='auto'):
+                 n_splits=2, discrete_instrument=False, discrete_treatment=False, categories='auto',
+                 random_state=None):
         super().__init__(_FirstStageWrapper(model_Y_X, False),
                          _FirstStageWrapper(model_T_X, discrete_treatment),
                          _FirstStageWrapper(model_T_XZ, discrete_treatment),
@@ -857,7 +870,8 @@ def __init__(self, model_Y_X, model_T_X, model_T_XZ, model_final,
                          n_splits=n_splits,
                          discrete_instrument=discrete_instrument,
                          discrete_treatment=discrete_treatment,
-                         categories=categories)
+                         categories=categories,
+                         random_state=random_state)
 
 
 class _BaseDRIVModelFinal:
@@ -933,12 +947,12 @@ def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None,
 
         if (X is not None) and (self._featurizer is not None):
             X = self._featurizer.fit_transform(X)
-        # TODO: how do we incorporate the sample_weight and sample_var passed into this method
-        #       as arguments?
-        if self._opt_reweighted:
-            self._model_final.fit(X, theta_dr, sample_weight=clipped_cov.ravel()**2)
-        else:
-            self._model_final.fit(X, theta_dr)
+        if self._opt_reweighted and (sample_weight is not None):
+            sample_weight = sample_weight * clipped_cov.ravel()**2
+        elif self._opt_reweighted:
+            sample_weight = clipped_cov.ravel()**2
+        self._model_final.fit(X, theta_dr, **filter_none_kwargs(sample_weight=sample_weight, sample_var=sample_var))
+
         return self
 
     def predict(self, X=None):
@@ -947,22 +961,18 @@ def predict(self, X=None):
         return self._model_final.predict(X).reshape((-1,) + self.d_y + self.d_t)
 
     def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None):
-        # TODO: is there a good way to incorporate the other nuisance terms in the score?
-        _, T_res, Y_res, _, _ = nuisances
+        theta_dr, clipped_cov = self._effect_estimate(nuisances)
 
-        if Y_res.ndim == 1:
-            Y_res = Y_res.reshape((-1, 1))
-        if T_res.ndim == 1:
-            T_res = T_res.reshape((-1, 1))
         if (X is not None) and (self._featurizer is not None):
             X = self._featurizer.transform(X)
-        effects = self._model_final.predict(X).reshape((-1, Y_res.shape[1], T_res.shape[1]))
-        Y_res_pred = np.einsum('ijk,ik->ij', effects, T_res).reshape(Y_res.shape)
 
-        if sample_weight is not None:
-            return np.mean(np.average((Y_res - Y_res_pred)**2, weights=sample_weight, axis=0))
-        else:
-            return np.mean((Y_res - Y_res_pred) ** 2)
+        if self._opt_reweighted and (sample_weight is not None):
+            sample_weight = sample_weight * clipped_cov.ravel()**2
+        elif self._opt_reweighted:
+            sample_weight = clipped_cov.ravel()**2
+
+        return np.average((theta_dr.ravel() - self._model_final.predict(X).ravel())**2,
+                          weights=sample_weight, axis=0)
 
 
 class _BaseDRIV(_OrthoLearner):
@@ -1094,7 +1104,7 @@ def fit(self, Y, T, Z, X=None, W=None, *, sample_weight=None, sample_var=None, g
                            sample_weight=sample_weight, sample_var=sample_var, groups=groups,
                            inference=inference)
 
-    def score(self, Y, T, Z, X=None, W=None):
+    def score(self, Y, T, Z, X=None, W=None, sample_weight=None):
         """
         Score the fitted CATE model on a new data set. Generates nuisance parameters
         for the new data set based on the fitted nuisance models created at fit time.
@@ -1116,6 +1126,8 @@ def score(self, Y, T, Z, X=None, W=None):
             Features for each sample
         W: optional(n, d_w) matrix or None (Default=None)
             Controls for each sample
+        sample_weight: optional(n,) vector or None (Default=None)
+            Weights for each samples
 
         Returns
         -------
@@ -1124,7 +1136,7 @@ def score(self, Y, T, Z, X=None, W=None):
             type of the model_final.score method.
         """
         # Replacing score from _OrthoLearner, to reorder arguments and improve the docstring
-        return super().score(Y, T, X=X, W=W, Z=Z)
+        return super().score(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight)
 
     @property
     def original_featurizer(self):
@@ -1182,7 +1194,8 @@ def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None):
         self._model_T_XZ.fit(X=X, W=W, Z=Z, Target=T, sample_weight=sample_weight, groups=groups)
         # we need to undo the one-hot encoding for calling effect,
         # since it expects raw values
-        self._prel_model_effect.fit(Y, inverse_onehot(T), inverse_onehot(Z), X=X, groups=groups)
+        self._prel_model_effect.fit(Y, inverse_onehot(T), Z=inverse_onehot(Z), X=X, W=W,
+                                    sample_weight=sample_weight, groups=groups)
         return self
 
     def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None):
@@ -1197,7 +1210,8 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None):
         if hasattr(self._prel_model_effect, 'score'):
             # we need to undo the one-hot encoding for calling effect,
             # since it expects raw values
-            effect_score = self._prel_model_effect.score(Y, inverse_onehot(T), inverse_onehot(Z), X=X)
+            effect_score = self._prel_model_effect.score(Y, inverse_onehot(T),
+                                                         Z=inverse_onehot(Z), X=X, W=W, sample_weight=sample_weight)
         else:
             effect_score = None
 
@@ -1231,7 +1245,8 @@ def __init__(self, model_Y_X, model_T_XZ,
                  cov_clip=.1,
                  n_splits=3,
                  opt_reweighted=False,
-                 categories='auto'):
+                 categories='auto',
+                 random_state=None):
         """
         """
 
@@ -1244,7 +1259,8 @@ def __init__(self, model_Y_X, model_T_XZ,
                          n_splits=n_splits,
                          discrete_instrument=True, discrete_treatment=True,
                          categories=categories,
-                         opt_reweighted=opt_reweighted)
+                         opt_reweighted=opt_reweighted,
+                         random_state=random_state)
 
 
 class _DummyCATE:
@@ -1255,7 +1271,7 @@ class _DummyCATE:
     def __init__(self):
         return
 
-    def fit(self, y, T, Z, X, groups=None):
+    def fit(self, y, T, *, Z, X, W=None, sample_weight=None, groups=None):
         return self
 
     def effect(self, X):
@@ -1321,6 +1337,12 @@ class IntentToTreatDRIV(_IntentToTreatDRIV):
     categories: 'auto' or list, default 'auto'
         The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values).
         The first category will be treated as the control treatment.
+
+    random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator;
+        If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
+        by :mod:`np.random<numpy.random>`.
     """
 
     def __init__(self, model_Y_X, model_T_XZ,
@@ -1331,14 +1353,17 @@ def __init__(self, model_Y_X, model_T_XZ,
                  cov_clip=.1,
                  n_splits=3,
                  opt_reweighted=False,
-                 categories='auto'):
+                 categories='auto',
+                 random_state=None):
         model_Y_X = _FirstStageWrapper(model_Y_X, discrete_target=False)
         model_T_XZ = _FirstStageWrapper(model_T_XZ, discrete_target=True)
         prel_model_effect = _IntentToTreatDRIV(model_Y_X,
                                                model_T_XZ,
                                                _DummyCATE(),
                                                flexible_model_effect,
-                                               cov_clip=1e-7, n_splits=1, opt_reweighted=True)
+                                               cov_clip=1e-7, n_splits=1,
+                                               opt_reweighted=True,
+                                               random_state=random_state)
         if final_model_effect is None:
             final_model_effect = flexible_model_effect
         super().__init__(model_Y_X, model_T_XZ, prel_model_effect,
@@ -1348,7 +1373,8 @@ def __init__(self, model_Y_X, model_T_XZ,
                          cov_clip=cov_clip,
                          n_splits=n_splits,
                          opt_reweighted=opt_reweighted,
-                         categories=categories)
+                         categories=categories,
+                         random_state=random_state)
 
     @property
     def models_Y_X(self):
@@ -1418,6 +1444,11 @@ class LinearIntentToTreatDRIV(StatsModelsCateEstimatorMixin, IntentToTreatDRIV):
         The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values).
         The first category will be treated as the control treatment.
 
+    random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator;
+        If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
+        by :mod:`np.random<numpy.random>`.
     """
 
     def __init__(self, model_Y_X, model_T_XZ,
@@ -1426,14 +1457,15 @@ def __init__(self, model_Y_X, model_T_XZ,
                  fit_cate_intercept=True,
                  cov_clip=.1,
                  n_splits=3,
-                 categories='auto'):
+                 categories='auto',
+                 random_state=None):
         super().__init__(model_Y_X, model_T_XZ,
                          flexible_model_effect=flexible_model_effect,
                          featurizer=featurizer,
                          fit_cate_intercept=fit_cate_intercept,
                          final_model_effect=StatsModelsLinearRegression(fit_intercept=False),
                          cov_clip=cov_clip, n_splits=n_splits, opt_reweighted=False,
-                         categories=categories)
+                         categories=categories, random_state=random_state)
 
     # override only so that we can update the docstring to indicate support for `StatsModelsInference`
     @_deprecate_positional("X, W, and Z should be passed by keyword only. In a future release "
diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py
index 018364ff0..ce78343de 100644
--- a/econml/tests/test_dml.py
+++ b/econml/tests/test_dml.py
@@ -19,10 +19,12 @@
 from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsRLM
 from econml.tests.test_statsmodels import _summarize
 import econml.tests.utilities  # bugfix for assertWarns
-
+from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
 
 # all solutions to underdetermined (or exactly determined) Ax=b are given by A⁺b+(I-A⁺A)w for some arbitrary w
 # note that if Ax=b is overdetermined, this will raise an assertion error
+
+
 def rand_sol(A, b):
     """Generate a random solution to the equation Ax=b."""
     assert np.linalg.matrix_rank(A) <= len(b)
diff --git a/econml/tests/test_random_state.py b/econml/tests/test_random_state.py
new file mode 100644
index 000000000..696f85cdd
--- /dev/null
+++ b/econml/tests/test_random_state.py
@@ -0,0 +1,131 @@
+import unittest
+import pytest
+import pickle
+from sklearn.linear_model import LinearRegression, Lasso, LassoCV, LogisticRegression
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, PolynomialFeatures
+from sklearn.model_selection import KFold, GroupKFold
+from econml.dml import DML, LinearDML, SparseLinearDML, KernelDML
+from econml.dml import NonParamDML, ForestDML
+from econml.drlearner import DRLearner, SparseLinearDRLearner, LinearDRLearner, ForestDRLearner
+from econml.ortho_iv import DMLATEIV, ProjectedDMLATEIV, DMLIV, NonParamDMLIV,\
+    IntentToTreatDRIV, LinearIntentToTreatDRIV
+import numpy as np
+from econml.utilities import shape, hstack, vstack, reshape, cross_product
+from econml.inference import BootstrapInference
+from contextlib import ExitStack
+from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
+import itertools
+from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsRLM
+from econml.tests.test_statsmodels import _summarize
+import econml.tests.utilities  # bugfix for assertWarns
+from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+
+
+class TestRandomState(unittest.TestCase):
+
+    @staticmethod
+    def _make_data(n, p):
+        np.random.seed(1283)
+        X = np.random.uniform(-1, 1, size=(n, p))
+        W = np.random.uniform(-1, 1, size=(n, p))
+
+        def true_propensity(x):
+            return .4 + .1 * (x[:, 0] > 0)
+
+        def true_effect(x):
+            return .4 + .2 * x[:, 0]
+
+        def true_conf(x):
+            return x[:, 1]
+
+        T = np.random.binomial(1, true_propensity(X))
+        Y = true_effect(X) * T + true_conf(X) + np.random.normal(size=(n,))
+        X_test = np.zeros((100, p))
+        X_test[:, 0] = np.linspace(-1, 1, 100)
+        return Y, T, X, W, X_test
+
+    @staticmethod
+    def _test_random_state(est, X_test, Y, T, **kwargs):
+        est.fit(Y, T, **kwargs)
+        te1 = est.effect(X_test)
+        est.fit(Y, T, **kwargs)
+        te2 = est.effect(X_test)
+        est.fit(Y, T, **kwargs)
+        te3 = est.effect(X_test)
+        np.testing.assert_array_equal(te1, te2, err_msg='random state fixing does not work')
+        np.testing.assert_array_equal(te1, te3, err_msg='random state fixing does not work')
+
+    def test_dml_random_state(self):
+        Y, T, X, W, X_test = TestRandomState._make_data(500, 2)
+        for est in [
+                NonParamDML(model_y=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123),
+                            model_t=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                            model_final=RandomForestRegressor(max_depth=3, n_estimators=10, min_samples_leaf=100,
+                                                              bootstrap=True, random_state=123),
+                            discrete_treatment=True, n_splits=2, random_state=123),
+                ForestDML(model_y=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123),
+                          model_t=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                          n_estimators=10,
+                          discrete_treatment=True, n_crossfit_splits=2, random_state=123),
+                LinearDML(model_y=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123),
+                          model_t=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                          discrete_treatment=True, n_splits=2, random_state=123),
+                SparseLinearDML(discrete_treatment=True, n_splits=2, random_state=123),
+                KernelDML(discrete_treatment=True, n_splits=2, random_state=123)]:
+            TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W)
+
+    def test_dr_random_state(self):
+        Y, T, X, W, X_test = self._make_data(500, 2)
+        for est in [
+                DRLearner(model_final=RandomForestRegressor(max_depth=3, n_estimators=10, min_samples_leaf=100,
+                                                            bootstrap=True, random_state=123),
+                          n_splits=2, random_state=123),
+                LinearDRLearner(random_state=123),
+                SparseLinearDRLearner(n_splits=2, random_state=123),
+                ForestDRLearner(model_regression=RandomForestRegressor(n_estimators=10, max_depth=4,
+                                                                       random_state=123),
+                                model_propensity=RandomForestClassifier(
+                    n_estimators=10, max_depth=4, random_state=123),
+                    n_crossfit_splits=2, random_state=123)]:
+            TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W)
+
+    def test_orthoiv_random_state(self):
+        Y, T, X, W, X_test = self._make_data(500, 2)
+        for est in [
+            DMLATEIV(model_Y_W=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123),
+                     model_T_W=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                     model_Z_W=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                     discrete_treatment=True, discrete_instrument=True, n_splits=2, random_state=123),
+            ProjectedDMLATEIV(model_Y_W=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123),
+                              model_T_W=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                              model_T_WZ=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                              discrete_treatment=True, discrete_instrument=True, n_splits=2, random_state=123)]:
+            TestRandomState._test_random_state(est, None, Y, T, W=W, Z=T)
+        for est in [
+                DMLIV(model_Y_X=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123),
+                      model_T_X=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                      model_T_XZ=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                      model_final=LinearRegression(fit_intercept=False),
+                      discrete_treatment=True, discrete_instrument=True, n_splits=2, random_state=123),
+                NonParamDMLIV(model_Y_X=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123),
+                              model_T_X=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                              model_T_XZ=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123),
+                              model_final=LinearRegression(),
+                              discrete_treatment=True, discrete_instrument=True, n_splits=2, random_state=123)]:
+            TestRandomState._test_random_state(est, X_test, Y, T, X=X, Z=T)
+        for est in [IntentToTreatDRIV(model_Y_X=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123),
+                                      model_T_XZ=RandomForestClassifier(n_estimators=10,
+                                                                        max_depth=4, random_state=123),
+                                      flexible_model_effect=RandomForestRegressor(n_estimators=10,
+                                                                                  max_depth=4, random_state=123),
+                                      n_splits=2, random_state=123),
+                    LinearIntentToTreatDRIV(model_Y_X=RandomForestRegressor(n_estimators=10,
+                                                                            max_depth=4, random_state=123),
+                                            model_T_XZ=RandomForestClassifier(n_estimators=10,
+                                                                              max_depth=4, random_state=123),
+                                            flexible_model_effect=RandomForestRegressor(n_estimators=10,
+                                                                                        max_depth=4,
+                                                                                        random_state=123),
+                                            n_splits=2, random_state=123)]:
+            TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W, Z=T)