diff --git a/econml/dml/causal_forest.py b/econml/dml/causal_forest.py
index 4a1a536bc..e54c6a845 100644
--- a/econml/dml/causal_forest.py
+++ b/econml/dml/causal_forest.py
@@ -17,6 +17,8 @@
 from .._cate_estimator import LinearCateEstimator
 from .._shap import _shap_explain_multitask_model_cate
 from .._ortho_learner import _OrthoLearner
+from ..validate.sensitivity_analysis import (sensitivity_interval, RV, dml_sensitivity_values,
+                                             sensitivity_summary)
 
 
 class _CausalForestFinalWrapper:
@@ -56,6 +58,11 @@ def fit(self, X, T, T_res, Y_res, sample_weight=None, freq_weight=None, sample_v
             T_res = T_res.reshape((-1, 1))
         if Y_res.ndim == 1:
             Y_res = Y_res.reshape((-1, 1))
+
+        # if binary/continuous treatment and single outcome, can calculate sensitivity params
+        if not ((self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1)):
+            self.sensitivity_params = dml_sensitivity_values(T_res, Y_res)
+
         self._model.fit(fts, T_res, Y_res, sample_weight=sample_weight)
         # Fit a doubly robust average effect
         if self._discrete_treatment and self._drate:
@@ -811,6 +818,113 @@ def tune(self, Y, T, *, X=None, W=None,
 
         return self
 
+    def sensitivity_summary(self, null_hypothesis=0, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., decimals=3):
+        """
+        Generate a summary of the sensitivity analysis for the ATE.
+
+        Parameters
+        ----------
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
+        alpha: float, default 0.05
+            The significance level for the sensitivity interval.
+
+        c_y: float, default 0.05
+            The level of confounding in the outcome. Ranges from 0 to 1.
+
+        c_d: float, default 0.05
+            The level of confounding in the treatment. Ranges from 0 to 1.
+
+        decimals: int, default 3
+            Number of decimal places to round each column to.
+
+        """
+        if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
+            raise ValueError(
+                "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
+        sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
+        return sensitivity_summary(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis, alpha=alpha,
+                                    c_y=c_y, c_t=c_t, rho=rho, decimals=decimals)
+
+    def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
+        """
+        Calculate the sensitivity interval for the ATE.
+
+        The sensitivity interval is the range of values for the ATE that are
+        consistent with the observed data, given a specified level of confounding.
+
+        Can only be calculated when Y and T are single arrays, and T is binary or continuous.
+
+        Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_
+
+        Parameters
+        ----------
+        alpha: float, default 0.05
+            The significance level for the sensitivity interval.
+
+        c_y: float, default 0.05
+            The level of confounding in the outcome. Ranges from 0 to 1.
+
+        c_d: float, default 0.05
+            The level of confounding in the treatment. Ranges from 0 to 1.
+
+        interval_type: str, default 'ci'
+            The type of interval to return. Can be 'ci' or 'theta'
+
+        Returns
+        -------
+        (lb, ub): tuple of floats
+            sensitivity interval for the ATE
+        """
+        if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
+            raise ValueError(
+                "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
+        sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
+        return sensitivity_interval(**sensitivity_params._asdict(), alpha=alpha,
+                                    c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)
+
+    def robustness_value(self, null_hypothesis=0, alpha=0.05, interval_type='ci'):
+        """
+        Calculate the robustness value for the ATE.
+
+        The robustness value is the level of confounding (between 0 and 1) in
+        *both* the treatment and outcome that would result in enough omitted variable bias such that
+        we can no longer reject the null hypothesis. When null_hypothesis is the default of 0, the robustness value
+        has the interpretation that it is the level of confounding that would make the
+        ATE statistically insignificant.
+
+        A higher value indicates a more robust estimate.
+
+        Returns 0 if the original interval already includes the null_hypothesis.
+
+        Can only be calculated when Y and T are single arrays, and T is binary or continuous.
+
+        Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_
+
+        Parameters
+        ----------
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
+        alpha: float, default 0.05
+            The significance level for the robustness value.
+
+        interval_type: str, default 'ci'
+            The type of interval to return. Can be 'ci' or 'theta'
+
+        Returns
+        -------
+        float
+            The robustness value
+        """
+        if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
+            raise ValueError(
+                "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
+        sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
+        return RV(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis,
+                  alpha=alpha, interval_type=interval_type)
+
     # override only so that we can update the docstring to indicate support for `blb`
     def fit(self, Y, T, *, X=None, W=None, sample_weight=None, groups=None,
             cache_values=False, inference='auto'):
diff --git a/econml/dml/dml.py b/econml/dml/dml.py
index b597e5681..01e184b74 100644
--- a/econml/dml/dml.py
+++ b/econml/dml/dml.py
@@ -25,6 +25,8 @@
                          shape, get_feature_names_or_default, filter_none_kwargs)
 from .._shap import _shap_explain_model_cate
 from ..sklearn_extensions.model_selection import get_selector, SingleModelSelector
+from ..validate.sensitivity_analysis import (sensitivity_interval, RV, dml_sensitivity_values,
+                                             sensitivity_summary)
 
 
 def _combine(X, W, n_samples):
@@ -105,10 +107,12 @@ def _make_first_stage_selector(model, is_discrete, random_state):
 
 
 class _FinalWrapper:
-    def __init__(self, model_final, fit_cate_intercept, featurizer, use_weight_trick):
+    def __init__(self, model_final, fit_cate_intercept, featurizer,
+                 use_weight_trick, allow_sensitivity_analysis=False):
         self._model = clone(model_final, safe=False)
         self._use_weight_trick = use_weight_trick
         self._original_featurizer = clone(featurizer, safe=False)
+        self.allow_sensitivity_analysis = allow_sensitivity_analysis
         if self._use_weight_trick:
             self._fit_cate_intercept = False
             self._featurizer = self._original_featurizer
@@ -147,6 +151,14 @@ def fit(self, X, T, T_res, Y_res, sample_weight=None, freq_weight=None, sample_v
         self._d_t = shape(T_res)[1:]
         self._d_y = shape(Y_res)[1:]
         if not self._use_weight_trick:
+
+            # if binary/continuous treatment and single outcome, can calculate sensitivity params
+            if self.allow_sensitivity_analysis and not (
+                (self._d_t and self._d_t[0] > 1) or (
+                    self._d_y and self._d_y[0] > 1)
+            ):
+                self.sensitivity_params = dml_sensitivity_values(T_res, Y_res)
+
             fts = self._combine(X, T_res)
             filtered_kwargs = filter_none_kwargs(sample_weight=sample_weight,
                                                  freq_weight=freq_weight, sample_var=sample_var)
@@ -535,7 +547,8 @@ def _gen_model_final(self):
         return clone(self.model_final, safe=False)
 
     def _gen_rlearner_model_final(self):
-        return _FinalWrapper(self._gen_model_final(), self.fit_cate_intercept, self._gen_featurizer(), False)
+        return _FinalWrapper(self._gen_model_final(), self.fit_cate_intercept,
+                             self._gen_featurizer(), False, allow_sensitivity_analysis=True)
 
     # override only so that we can update the docstring to indicate support for `LinearModelFinalInference`
     def fit(self, Y, T, *, X=None, W=None, sample_weight=None, freq_weight=None, sample_var=None, groups=None,
@@ -594,6 +607,114 @@ def bias_part_of_coef(self):
     def fit_cate_intercept_(self):
         return self.rlearner_model_final_._fit_cate_intercept
 
+    def sensitivity_summary(self, null_hypothesis=0, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., decimals=3):
+        """
+        Generate a summary of the sensitivity analysis for the ATE.
+
+        Parameters
+        ----------
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
+        alpha: float, default 0.05
+            The significance level for the sensitivity interval.
+
+        c_y: float, default 0.05
+            The level of confounding in the outcome. Ranges from 0 to 1.
+
+        c_d: float, default 0.05
+            The level of confounding in the treatment. Ranges from 0 to 1.
+
+        decimals: int, default 3
+            Number of decimal places to round each column to.
+
+        """
+        if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
+            raise ValueError(
+                "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
+        sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
+        return sensitivity_summary(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis, alpha=alpha,
+                                    c_y=c_y, c_t=c_t, rho=rho, decimals=decimals)
+
+
+    def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
+        """
+        Calculate the sensitivity interval for the ATE.
+
+        The sensitivity interval is the range of values for the ATE that are
+        consistent with the observed data, given a specified level of confounding.
+
+        Can only be calculated when Y and T are single arrays, and T is binary or continuous.
+
+        Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_
+
+        Parameters
+        ----------
+        alpha: float, default 0.05
+            The significance level for the sensitivity interval.
+
+        c_y: float, default 0.05
+            The level of confounding in the outcome. Ranges from 0 to 1.
+
+        c_d: float, default 0.05
+            The level of confounding in the treatment. Ranges from 0 to 1.
+
+        interval_type: str, default 'ci'
+            The type of interval to return. Can be 'ci' or 'theta'
+
+        Returns
+        -------
+        (lb, ub): tuple of floats
+            sensitivity interval for the ATE
+        """
+        if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
+            raise ValueError(
+                "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
+        sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
+        return sensitivity_interval(**sensitivity_params._asdict(), alpha=alpha,
+                                    c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)
+
+    def robustness_value(self, null_hypothesis=0, alpha=0.05, interval_type='ci'):
+        """
+        Calculate the robustness value for the ATE.
+
+        The robustness value is the level of confounding (between 0 and 1) in
+        *both* the treatment and outcome that would result in enough omitted variable bias such that
+        we can no longer reject the null hypothesis. When null_hypothesis is the default of 0, the robustness value
+        has the interpretation that it is the level of confounding that would make the
+        ATE statistically insignificant.
+
+        A higher value indicates a more robust estimate.
+
+        Returns 0 if the original interval already includes the null_hypothesis.
+
+        Can only be calculated when Y and T are single arrays, and T is binary or continuous.
+
+        Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_
+
+        Parameters
+        ----------
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
+        alpha: float, default 0.05
+            The significance level for the robustness value.
+
+        interval_type: str, default 'ci'
+            The type of interval to return. Can be 'ci' or 'theta'
+
+        Returns
+        -------
+        float
+            The robustness value
+        """
+        if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
+            raise ValueError(
+                "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
+        sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
+        return RV(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis,
+                  alpha=alpha, interval_type=interval_type)
+
 
 class LinearDML(StatsModelsCateEstimatorMixin, DML):
     """
diff --git a/econml/dr/_drlearner.py b/econml/dr/_drlearner.py
index 4bdd1a352..3208c8064 100644
--- a/econml/dr/_drlearner.py
+++ b/econml/dr/_drlearner.py
@@ -52,6 +52,8 @@
 from ..utilities import (check_high_dimensional,
                          filter_none_kwargs, inverse_onehot, get_feature_names_or_default)
 from .._shap import _shap_explain_multitask_model_cate, _shap_explain_model_cate
+from ..validate.sensitivity_analysis import (sensitivity_interval, RV,
+                                             sensitivity_summary, dr_sensitivity_values)
 
 
 class _ModelNuisance(ModelSelector):
@@ -95,7 +97,7 @@ def score(self, Y, T, X=None, W=None, *, sample_weight=None, groups=None):
 
     def predict(self, Y, T, X=None, W=None, *, sample_weight=None, groups=None):
         XW = self._combine(X, W)
-        propensities = np.maximum(self._model_propensity.predict_proba(XW), self._min_propensity)
+        propensities = np.clip(self._model_propensity.predict_proba(XW), self._min_propensity, 1-self._min_propensity)
         n = T.shape[0]
         Y_pred = np.zeros((T.shape[0], T.shape[1] + 1))
         T_counter = np.zeros(T.shape)
@@ -118,9 +120,7 @@ def predict(self, Y, T, X=None, W=None, *, sample_weight=None, groups=None):
             else:
                 Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter])).reshape(n)
             Y_pred[:, t + 1] += (Y.reshape(n) - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]
-        T_complete = np.hstack(((np.all(T == 0, axis=1) * 1).reshape(-1, 1), T))
-        propensities_weight = np.sum(propensities * T_complete, axis=1)
-        return Y_pred.reshape(Y.shape + (T.shape[1] + 1,)), propensities_weight.reshape((n,))
+        return Y_pred.reshape(Y.shape + (T.shape[1] + 1,)), propensities
 
 
 def _make_first_stage_selector(model, is_discrete, random_state):
@@ -143,9 +143,15 @@ def __init__(self, model_final, featurizer, multitask_model_final):
 
     def fit(self, Y, T, X=None, W=None, *, nuisances,
             sample_weight=None, freq_weight=None, sample_var=None, groups=None):
-        Y_pred, propensities = nuisances
+        Y_pred, T_pred = nuisances
+        self.sensitivity_params = dr_sensitivity_values(Y, T, Y_pred, T_pred)
+
+        T_complete = np.hstack(((np.all(T == 0, axis=1) * 1).reshape(-1, 1), T))
+        propensities = np.sum(T_pred * T_complete, axis=1).reshape((T.shape[0],))
+
         self.d_y = Y_pred.shape[1:-1]  # track whether there's a Y dimension (must be a singleton)
         self.d_t = Y_pred.shape[-1] - 1  # track # of treatment (exclude baseline treatment)
+
         if (X is not None) and (self._featurizer is not None):
             X = self._featurizer.fit_transform(X)
 
@@ -183,6 +189,7 @@ def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, groups=N
         if (X is not None) and (self._featurizer is not None):
             X = self._featurizer.transform(X)
         Y_pred, _ = nuisances
+
         if self._multitask_model_final:
             Y_pred_diff = Y_pred[..., 1:] - Y_pred[..., [0]]
             cate_pred = self.model_cate.predict(X).reshape((-1, self.d_t))
@@ -751,6 +758,127 @@ def shap_values(self, X, *, feature_names=None, treatment_names=None, output_nam
                                             background_samples=background_samples)
     shap_values.__doc__ = LinearCateEstimator.shap_values.__doc__
 
+    def sensitivity_summary(self, T, null_hypothesis=0, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., decimals=3):
+        """
+        Generate a summary of the sensitivity analysis for the ATE for a given treatment.
+
+        Parameters
+        ----------
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
+        alpha: float, default 0.05
+            The significance level for the sensitivity interval.
+
+        c_y: float, default 0.05
+            The level of confounding in the outcome. Ranges from 0 to 1.
+
+        c_d: float, default 0.05
+            The level of confounding in the treatment. Ranges from 0 to 1.
+
+        decimals: int, default 3
+            Number of decimal places to round each column to.
+
+        """
+        if T not in self.transformer.categories_[0]:
+            # raise own ValueError here because sometimes error from sklearn is not transparent
+            raise ValueError(f"Treatment {T} not in the list of treatments {self.transformer.categories_[0]}")
+        _, T = self._expand_treatments(None, T)
+        T_ind = inverse_onehot(T).item() - 1
+        assert T_ind >= 0, "No model was fitted for the control"
+        sensitivity_params = {
+            k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params._asdict().items()}
+        return sensitivity_summary(**sensitivity_params, null_hypothesis=null_hypothesis, alpha=alpha,
+                                    c_y=c_y, c_t=c_t, rho=rho, decimals=decimals)
+
+    def sensitivity_interval(self, T, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
+        """
+        Calculate the sensitivity interval for the ATE for a given treatment category.
+
+        The sensitivity interval is the range of values for the ATE that are
+        consistent with the observed data, given a specified level of confounding.
+
+        Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_
+
+        Parameters
+        ----------
+        T: alphanumeric
+            The treatment with respect to calculate the sensitivity interval.
+
+        alpha: float, default 0.05
+            The significance level for the sensitivity interval.
+
+        c_y: float, default 0.05
+            The level of confounding in the outcome. Ranges from 0 to 1.
+
+        c_d: float, default 0.05
+            The level of confounding in the treatment. Ranges from 0 to 1.
+
+        interval_type: str, default 'ci'
+            The type of interval to return. Can be 'ci' or 'theta'
+
+        Returns
+        -------
+        (lb, ub): tuple of floats
+            sensitivity interval for the ATE for treatment T
+        """
+        if T not in self.transformer.categories_[0]:
+            # raise own ValueError here because sometimes error from sklearn is not transparent
+            raise ValueError(f"Treatment {T} not in the list of treatments {self.transformer.categories_[0]}")
+        _, T = self._expand_treatments(None, T)
+        T_ind = inverse_onehot(T).item() - 1
+        assert T_ind >= 0, "No model was fitted for the control"
+        sensitivity_params = {
+            k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params._asdict().items()}
+        return sensitivity_interval(**sensitivity_params, alpha=alpha,
+                                    c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)
+
+
+    def robustness_value(self, T, null_hypothesis=0, alpha=0.05, interval_type='ci'):
+        """
+        Calculate the robustness value for the ATE for a given treatment category.
+
+        The robustness value is the level of confounding (between 0 and 1) in
+        *both* the treatment and outcome that would result in enough omitted variable bias such that
+        we can no longer reject the null hypothesis. When null_hypothesis is the default of 0, the robustness value
+        has the interpretation that it is the level of confounding that would make the
+        ATE statistically insignificant.
+
+        A higher value indicates a more robust estimate.
+
+        Returns 0 if the original interval already includes the null_hypothesis.
+
+        Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_
+
+        Parameters
+        ----------
+        T: alphanumeric
+            The treatment with respect to calculate the robustness value.
+
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
+        alpha: float, default 0.05
+            The significance level for the robustness value.
+
+        interval_type: str, default 'ci'
+            The type of interval to return. Can be 'ci' or 'theta'
+
+        Returns
+        -------
+        float
+            The robustness value
+        """
+        if T not in self.transformer.categories_[0]:
+            # raise own ValueError here because sometimes error from sklearn is not transparent
+            raise ValueError(f"Treatment {T} not in the list of treatments {self.transformer.categories_[0]}")
+        _, T = self._expand_treatments(None, T)
+        T_ind = inverse_onehot(T).item() - 1
+        assert T_ind >= 0, "No model was fitted for the control"
+        sensitivity_params = {
+            k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params._asdict().items()}
+        return RV(**sensitivity_params, null_hypothesis=null_hypothesis, alpha=alpha, interval_type=interval_type)
+
 
 class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
     """
diff --git a/econml/policy/_drlearner.py b/econml/policy/_drlearner.py
index e9263cce8..f7f328b65 100644
--- a/econml/policy/_drlearner.py
+++ b/econml/policy/_drlearner.py
@@ -19,6 +19,7 @@ def fit(self, Y, T, X=None, W=None, *, nuisances,
             warn('Parameter `sample_var` is ignored by the final estimator')
             sample_var = None
         Y_pred, _ = nuisances
+
         self.d_y = Y_pred.shape[1:-1]  # track whether there's a Y dimension (must be a singleton)
         if (X is not None) and (self._featurizer is not None):
             X = self._featurizer.fit_transform(X)
diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py
index 9114e46d7..5e52f4cda 100644
--- a/econml/tests/test_dml.py
+++ b/econml/tests/test_dml.py
@@ -247,6 +247,31 @@ def make_random(n, is_discrete, d):
                                                         with pytest.raises(AttributeError):
                                                             self.assertEqual(shape(est.intercept_), intercept_shape)
 
+                                                if d_y > 1 or is_discrete or d_t > 1:
+                                                    # sensitivity interval should not calculate
+                                                    # when d_y > 1 or t is multi category discrete / multi dim cont
+                                                    with pytest.raises(
+                                                            ValueError,
+                                                            match='Sensitivity analysis for DML is not supported'):
+                                                        est.sensitivity_interval()
+
+                                                    with pytest.raises(
+                                                            ValueError,
+                                                            match='Sensitivity analysis for DML is not supported'):
+                                                        est.robustness_value()
+
+                                                else:
+
+                                                    # make sure sensitivity methods can be called.
+                                                    # allow for data-dependent error with negative sigma, nu
+                                                    for method in [est.sensitivity_interval,
+                                                                   est.robustness_value,
+                                                                   est.sensitivity_summary]:
+                                                        try:
+                                                            method()
+                                                        except ValueError as e:
+                                                            assert 'sigma and nu must be non-negative' in str(e)
+
                                                 if inf is not None:
                                                     const_marg_eff_int = est.const_marginal_effect_interval(X)
                                                     marg_eff_int = est.marginal_effect_interval(T, X)
diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py
index 422c1048b..4bc508c7c 100644
--- a/econml/tests/test_drlearner.py
+++ b/econml/tests/test_drlearner.py
@@ -142,6 +142,38 @@ def make_random(is_discrete, d):
                                     # ensure that we can serialize fit estimator
                                     pickle.dumps(est)
 
+                                    # make sure sensitivity methods can be called.
+                                    # allow for data-dependent error with negative sigma, nu
+                                    for T_val in ['b', 'c']:
+                                        for func in [est.sensitivity_interval,
+                                                     est.robustness_value,
+                                                     est.sensitivity_summary]:
+                                            try:
+                                                func(T=T_val)
+                                            except ValueError as e:
+                                                self.assertIn('sigma and nu must be non-negative', str(e))
+
+                                    # ensure sensitivity analysis fails on control
+                                    with pytest.raises(AssertionError):
+                                        est.sensitivity_interval(T='a')
+
+                                    with pytest.raises(AssertionError):
+                                        est.robustness_value(T='a')
+
+                                    with pytest.raises(AssertionError):
+                                        est.sensitivity_summary(T='a')
+
+                                    # ensure failure on unknown treatment values
+                                    with pytest.raises(ValueError, match='not in the list of treatments'):
+                                        est.sensitivity_interval(T=1)
+
+                                    with pytest.raises(ValueError, match='not in the list of treatments'):
+                                        est.robustness_value(T=1)
+
+                                    with pytest.raises(ValueError, match='not in the list of treatments'):
+                                        est.sensitivity_summary(T=1)
+
+
                                     # make sure we can call the marginal_effect and effect methods
                                     const_marg_eff = est.const_marginal_effect(
                                         X)
diff --git a/econml/tests/test_sensitivity_analysis.py b/econml/tests/test_sensitivity_analysis.py
new file mode 100644
index 000000000..bf157dfd9
--- /dev/null
+++ b/econml/tests/test_sensitivity_analysis.py
@@ -0,0 +1,133 @@
+# Copyright (c) PyWhy contributors. All rights reserved.
+# Licensed under the MIT License.
+
+import unittest
+import pytest
+
+from econml.dml import LinearDML, CausalForestDML
+from econml.dr import LinearDRLearner
+from econml.validate.sensitivity_analysis import sensitivity_interval
+from sklearn.linear_model import LinearRegression, LogisticRegression
+import numpy as np
+
+
+
+class TestSensitivityAnalysis(unittest.TestCase):
+
+    def test_params(self):
+        # test alpha when calling through lineardml
+
+        n = 1000
+        a = np.random.normal(size=5)
+        X = np.random.normal(size=(n,5), loc=[1,0.5,0,0,0])/3 # closest to center 1, then 2, then 3
+        centers = np.array([[1,0,0,0,0], [0,1,0,0,0]]) # uncomment for binary treatment
+
+        ds = X[:,None,:]-centers[None,:,:]
+        ds = np.einsum("nci,nci->nc", ds, ds)
+
+        ps_r = np.exp(-ds)
+        ps = ps_r / np.sum(ps_r, axis=1, keepdims=True)
+
+        T = np.random.default_rng().multinomial(1, ps) @ np.arange(len(centers))
+        Y = np.random.normal(size=n) + 3*(T == 1)*X[:,1] - (T == 2) + 2 * X @ a
+
+        ests = [
+            LinearDML(
+                model_y=LinearRegression(),
+                model_t=LogisticRegression(),
+                # implicitly test that discrete binary treatment works for dml
+                # other permutations are tested in test_dml
+                discrete_treatment=True,
+                cv=3,
+                random_state=0,
+            ),
+            CausalForestDML(
+                model_y=LinearRegression(),
+                model_t=LogisticRegression(),
+                discrete_treatment=True,
+                cv=3,
+                random_state=0,
+            ),
+            LinearDRLearner(
+                model_regression=LinearRegression(),
+                model_propensity=LogisticRegression(),
+                cv=3,
+                random_state=0,
+            )
+        ]
+
+        for est in ests:
+
+            est.fit(Y, T, X=X, W=None)
+
+            T_arg = {}
+            if isinstance(est, LinearDRLearner):
+                T_arg = {'T': 1}
+
+            # baseline sensitivity results
+            lb, ub = est.sensitivity_interval(**T_arg, alpha=0.05, c_y=0.05, c_t=0.05, rho=1)
+            rv = est.robustness_value(**T_arg, alpha=0.05)
+
+            # check that alpha is passed through
+            lb2, ub2 = est.sensitivity_interval(**T_arg, alpha=0.5, c_y=0.05, c_t=0.05, rho=1)
+            self.assertTrue(lb < ub)
+            self.assertTrue(lb2 < ub2)
+            self.assertTrue(lb2 > lb)
+            self.assertTrue(ub2 < ub)
+
+            rv2 = est.robustness_value(**T_arg, alpha=0.5)
+            self.assertTrue(rv2 > rv)
+
+            # check that c_y, c_d are passed through
+            lb3, ub3 = est.sensitivity_interval(**T_arg, alpha=0.05, c_y=0.3, c_t=0.3, rho=1)
+            self.assertTrue(lb3 < lb)
+            self.assertTrue(ub3 > ub)
+
+            # check that interval_type is passed through
+            lb4, ub4 = est.sensitivity_interval(**T_arg, alpha=0.05,
+                                                c_y=0.05, c_t=0.05, rho=1, interval_type='theta')
+            self.assertTrue(lb4 > lb)
+            self.assertTrue(ub4 < ub)
+            self.assertTrue(lb4 < ub4)
+
+            rv4 = est.robustness_value(**T_arg, alpha=0.05, interval_type='theta')
+            self.assertTrue(rv4 > rv)
+
+            # check that null_hypothesis is passed through
+            rv5 = est.robustness_value(**T_arg, alpha=0.05, null_hypothesis=10)
+            self.assertNotEqual(rv5, rv)
+
+
+    def test_invalid_params(self):
+
+        theta = 0.5
+        sigma = 0.5
+        nu = 0.5
+        cov = np.random.normal(size=(3, 3))
+
+        sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1)
+
+        # check that c_y, c_y, rho are constrained
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=-0.5, c_t=0.05, rho=1)
+
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=-0.5, rho=1)
+
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=1.5, c_t=0.05, rho=1)
+
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=-1.5)
+
+        # ensure we raise an error on invalid sigma, nu
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, -1, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1)
+
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma,-1, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1)
+
+        # ensure failure on invalid interval_type
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1, interval_type='foo')
+
diff --git a/econml/validate/__init__.py b/econml/validate/__init__.py
index a38ed78ad..f49df3a6d 100644
--- a/econml/validate/__init__.py
+++ b/econml/validate/__init__.py
@@ -6,6 +6,5 @@
 from .drtester import DRTester
 from .results import BLPEvaluationResults, CalibrationEvaluationResults, UpliftEvaluationResults, EvaluationResults
 
-
 __all__ = ['DRTester',
            'BLPEvaluationResults', 'CalibrationEvaluationResults', 'UpliftEvaluationResults', 'EvaluationResults']
diff --git a/econml/validate/sensitivity_analysis.py b/econml/validate/sensitivity_analysis.py
new file mode 100644
index 000000000..09c24d418
--- /dev/null
+++ b/econml/validate/sensitivity_analysis.py
@@ -0,0 +1,215 @@
+# Copyright (c) PyWhy contributors. All rights reserved.
+# Licensed under the MIT License.
+
+import numpy as np
+from econml.utilities import _safe_norm_ppf, Summary
+from collections import namedtuple
+
+SensitivityParams = namedtuple('SensitivityParams', ['theta', 'sigma', 'nu', 'cov'])
+
+def sensitivity_summary(theta, sigma, nu, cov, null_hypothesis=0, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., decimals=3):
+    theta_lb, theta_ub = sensitivity_interval(
+        theta, sigma, nu, cov, alpha, c_y, c_t, rho, interval_type='theta')
+
+    ci_lb, ci_ub = sensitivity_interval(
+        theta, sigma, nu, cov, alpha, c_y, c_t, rho, interval_type='ci')
+
+    rv_theta = RV(theta, sigma, nu, cov, alpha, null_hypothesis=null_hypothesis, interval_type='theta')
+    rv_ci = RV(theta, sigma, nu, cov, alpha, null_hypothesis=null_hypothesis, interval_type='ci')
+
+
+    smry = Summary()
+    title = f'Sensitivity Analysis Summary for c_y={c_y}, c_t={c_t}, rho={rho}'
+    res = np.array([[ci_lb, theta_lb, theta, theta_ub, ci_ub]])
+    res = np.round(res, decimals)
+    headers = ['CI Lower', 'Theta Lower', 'Theta', 'Theta Upper', 'CI Upper']
+
+    smry.add_table(res, headers, [], title=title)
+
+    res_rv = [[rv_theta, rv_ci]]
+    res_rv = np.round(res_rv, decimals)
+    headers_rv = ['Robustness Value (Theta)', 'Robustness Value (CI)']
+    title_rv = f'Robustness Values for null_hypothesis={null_hypothesis}'
+    smry.add_table(res_rv, headers_rv, [], title=title_rv)
+
+    return smry
+
+
+def sensitivity_interval(theta, sigma, nu, cov, alpha, c_y, c_t, rho, interval_type='ci'):
+    """Calculate the sensitivity interval."""
+    if interval_type not in ['theta', 'ci']:
+        raise ValueError(
+            f"interval_type for sensitivity_interval must be 'theta' or 'ci'. Received {interval_type}")
+
+    if not (c_y >= 0 and c_y < 1 and c_t >= 0 and c_t < 1):
+        raise ValueError(
+            "Invalid input: c_y and c_t must be between 0 and 1.")
+
+    if rho < -1 or rho > 1:
+        raise ValueError(
+            "Invalid input: rho must be between -1 and 1.")
+
+    if sigma < 0 or nu < 0:
+        raise ValueError(
+            "Invalid input: sigma and nu must be non-negative. "
+            "Negative values may indicate issues with the underlying nuisance model estimations.")
+
+    C = np.abs(rho) * np.sqrt(c_y) * np.sqrt(c_t/(1-c_t))/2
+    ests = np.array([theta, sigma, nu])
+
+    coefs_p = np.array([1, C*np.sqrt(nu/sigma), C*np.sqrt(sigma/nu)])
+    coefs_n = np.array([1, -C*np.sqrt(nu/sigma), -C*np.sqrt(sigma/nu)])
+
+    lb = ests @ coefs_n
+    ub = ests @ coefs_p
+
+    if interval_type == 'ci':
+        # One dimensional normal distribution:
+        sigma_p = coefs_p @ cov @ coefs_p
+        sigma_n = coefs_n @ cov @ coefs_n
+
+        lb = _safe_norm_ppf(alpha / 2, loc=lb, scale=np.sqrt(sigma_n))
+        ub = _safe_norm_ppf(1 - alpha / 2, loc=ub, scale=np.sqrt(sigma_p))
+
+    return (lb, ub)
+
+
+def RV(theta, sigma, nu, cov, alpha, null_hypothesis=0, interval_type='ci'):
+    """
+    Calculate the robustness value.
+
+    The robustness value is the degree of confounding of *both* the
+    treatment and the outcome that still produces an interval
+    that excludes zero.
+
+    When null_hypothesis is default of zero, we're looking for a value of r
+    such that the sensitivity bounds just touch zero.
+
+    Returns
+    -------
+    float
+        The robustness value - the level of confounding (between 0 and 1) that would make
+        the ATE reach the null_hypothesis. A higher value indicates
+        a more robust estimate.
+        Returns 0 if the original interval already includes the null_hypothesis.
+
+    Notes
+    -----
+    This function uses a binary search approach to find the value of r where the
+    sensitivity interval just touches zero.
+    """
+    if interval_type not in ['theta', 'ci']:
+        raise ValueError(
+            f"interval_type for sensitivity_interval must be 'theta' or 'ci'. Received {interval_type}")
+
+    r = 0
+    r_up = 1
+    r_down = 0
+    lb, ub = sensitivity_interval(theta, sigma, nu, cov,
+                                  alpha, 0, 0, 1, interval_type=interval_type)
+    if lb < null_hypothesis and ub > null_hypothesis:
+        return 0
+
+    else:
+        if lb > null_hypothesis:
+            target_ind = 0
+            mult = 1
+            d = lb - null_hypothesis
+        else:
+            target_ind = 1
+            mult = -1
+            d = ub - null_hypothesis
+
+    while abs(d) > 1e-6 and r_up - r_down > 1e-10:
+        interval = sensitivity_interval(
+            theta, sigma, nu, cov, alpha, r, r, 1, interval_type=interval_type)
+        bound = interval[target_ind]
+        d = mult * (bound - null_hypothesis)
+        if d > 0:
+            r_down = r
+        else:
+            r_up = r
+
+        r = (r_down + r_up) / 2
+
+    return r
+
+
+def dml_sensitivity_values(t_res, y_res):
+
+    t_res = t_res.reshape(-1, 1)
+    y_res = y_res.reshape(-1, 1)
+
+    theta = np.mean(y_res*t_res) / np.mean(t_res**2)  # Estimate the treatment effect
+    # Estimate the variance of the outcome residuals (after subtracting the treatment effect)
+    sigma2 = np.mean((y_res - theta*t_res)**2)
+    nu2 = 1/np.mean(t_res**2)  # Estimate the variance of the treatment
+
+    ls = np.concatenate([t_res**2, np.ones_like(t_res), t_res**2], axis=1)
+
+    G = np.diag(np.mean(ls, axis=0))  # G matrix, diagonal with means of ls
+    G_inv = np.linalg.inv(G)  # Inverse of G matrix, could just take reciprocals since it's diagonal
+
+    residuals = np.concatenate([y_res*t_res-theta*t_res*t_res,
+                                (y_res-theta*t_res) ** 2-sigma2, t_res**2*nu2-1], axis=1)  # Combine residuals
+    # Estimate the covariance matrix of the residuals
+    Ω = residuals.T @ residuals / len(residuals)
+    # Estimate the variance of the parameters
+    cov = G_inv @ Ω @ G_inv / len(residuals)
+
+    return SensitivityParams(
+        theta=theta,
+        sigma=sigma2,
+        nu=nu2,
+        cov=cov
+    )
+
+
+def dr_sensitivity_values(Y, T, y_pred, t_pred):
+    n_treatments = T.shape[1]
+
+    # squeeze extra dimension if exists
+    y_pred = y_pred.squeeze(1) if np.ndim(y_pred) == 3 else y_pred
+
+    T_complete = np.hstack(((np.all(T == 0, axis=1) * 1).reshape(-1, 1), T))
+    Y = Y.squeeze()
+    alpha = np.zeros(shape=(T.shape[0], n_treatments))
+    T = np.argmax(T_complete, axis=1)  # Convert dummy encoding to a factor vector
+
+    # undo doubly robust correction for y pred
+    # i.e. reconstruct original y_pred
+    y_pred = y_pred.copy() # perform operations on copy to avoid modifying original
+    for t in np.arange(T_complete.shape[1]):
+        npp = (T_complete[:, t] == 1) / t_pred[:, t]
+        y_pred[:, t] = (y_pred[:, t] - npp * Y) / (1 - npp)
+
+    # one value of theta, sigma, nu for each non-control treatment
+    theta = np.zeros(n_treatments)
+    sigma = np.zeros(n_treatments)
+    nu = np.zeros(n_treatments)
+
+    # one theta, sigma, nu covariance matrix for each non-control treatment
+    cov = np.zeros((n_treatments, 3, 3))
+
+    # ATE, sigma^2, and nu^2
+    for i in range(n_treatments):
+        theta_score = y_pred[:, i+1] - y_pred[:, 0] + (Y-y_pred[:, i+1]) * (
+            T_complete[:, i+1] == 1)/t_pred[:, i+1] - (Y-y_pred[:, 0]) * ((T_complete[:,0] == 1)/t_pred[:, 0])
+        sigma_score = (Y-np.choose(T, y_pred.T))**2
+        # exclude rows with other treatments
+        sigma_keep = np.isin(T, [0, i+1])
+        sigma_score = np.where(sigma_keep, sigma_score, 0)
+        alpha[:,i] = (T_complete[:,i+1] == 1)/t_pred[:,i+1] - (T_complete[:,0] == 1)/t_pred[:,0]
+        nu_score = 2*(1/t_pred[:,i+1]+1/t_pred[:,0])-alpha[:,i]**2
+        theta[i] = np.mean(theta_score)
+        sigma[i] = np.mean(sigma_score)
+        nu[i] = np.mean(nu_score)
+        scores = np.stack([theta_score-theta[i], sigma_score-sigma[i], nu_score-nu[i]], axis=1)
+        cov[i,:,:] = (scores.T @ scores / len(scores)) / len(scores)
+
+    return SensitivityParams(
+        theta=theta,
+        sigma=sigma,
+        nu=nu,
+        cov=cov
+    )
diff --git a/prototypes/sensitivity_analysis/ovb_dml.ipynb b/prototypes/sensitivity_analysis/ovb_dml.ipynb
new file mode 100644
index 000000000..21016628c
--- /dev/null
+++ b/prototypes/sensitivity_analysis/ovb_dml.ipynb
@@ -0,0 +1,1429 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from sklearn.model_selection import KFold\n",
+    "from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression\n",
+    "from econml.utilities import _safe_norm_ppf\n",
+    "\n",
+    "\n",
+    "def dml_sim(model_t, model_y, n_folds, Y, T, X):\n",
+    "    \"\"\"\n",
+    "    DML for a single treatment and outcome.\n",
+    " \n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    model_t : object\n",
+    "        A fitted model object for the treatment.\n",
+    "    model_y : object\n",
+    "        A fitted model object for the outcome.\n",
+    "    n_folds : int\n",
+    "        The number of folds to use in cross-validation.\n",
+    "    Y : array-like\n",
+    "        The outcome variable.\n",
+    "    T : array-like\n",
+    "        The treatment variable.\n",
+    "    X : array-like\n",
+    "        The covariates.\n",
+    " \n",
+    "    Returns\n",
+    "    -------\n",
+    "    float\n",
+    "        The estimated treatment effect.\n",
+    "    \"\"\"\n",
+    " \n",
+    "    # Initialize KFold cross-validation\n",
+    "    kf = KFold(n_splits=n_folds, shuffle=True)\n",
+    " \n",
+    "    # Initialize arrays to hold predictions\n",
+    "    y_res = np.zeros_like(Y)\n",
+    "    t_res = np.zeros_like(T)\n",
+    " \n",
+    "    # Cross-validation loop\n",
+    "    for train_index, test_index in kf.split(X):\n",
+    "        X_train, X_test = X[train_index], X[test_index]\n",
+    "        Y_train, Y_test = Y[train_index], Y[test_index]\n",
+    "        T_train, T_test = T[train_index], T[test_index]\n",
+    " \n",
+    "        # Fit the treatment model\n",
+    "        t_res[test_index] = T_test-model_t.fit(X_train, T_train).predict(X_test).reshape(T_test.shape)\n",
+    " \n",
+    "        # Fit the outcome model\n",
+    "        y_res[test_index] = Y_test-model_y.fit(X_train, Y_train).predict(X_test).reshape(Y_test.shape)\n",
+    "\n",
+    "    # for testing.. use econml nuisances\n",
+    "    # y_res = econ_y_res.reshape(-1, 1)  # Reshape y_res to be a column vector\n",
+    "    # t_res = econ_t_res.reshape(-1, 1)  # Reshape t_res to be a column vector\n",
+    " \n",
+    "    # ATE, sigma^2, and nu^2\n",
+    "    \n",
+    "\n",
+    "    # print('rmse t', np.mean(t_res**2)**0.5)\n",
+    "    # print('rmse y:', np.mean(y_res**2)**0.5)\n",
+    "\n",
+    "    # smlr = StatsModelsLinearRegression(fit_intercept=False).fit(t_res, y_res)\n",
+    "    # theta_smlr = smlr.coef_[0]\n",
+    "    # var_smlr = smlr._var[0][0]\n",
+    "    # print('theta_smlr:', theta_smlr)\n",
+    "    # print('var_smlr:', var_smlr)\n",
+    "    # print('se_smlr:', np.sqrt(var_smlr))\n",
+    " \n",
+    "    theta = np.mean(y_res*t_res) / np.mean(t_res**2)  # Estimate the treatment effect\n",
+    "    # print('theta:', theta)\n",
+    "    sigma2 = np.mean((y_res - theta*t_res)**2)  # Estimate the variance of the outcome residuals (after subtracting the treatment effect)\n",
+    "    nu2 = 1/np.mean(t_res**2)  # Estimate the variance of the treatment\n",
+    "    ests = np.array([theta, sigma2, nu2])  # Estimated parameters\n",
+    " \n",
+    "    ls = np.concatenate([t_res**2, np.ones_like(t_res), t_res**2], axis=1)\n",
+    " \n",
+    "    G = np.diag(np.mean(ls, axis=0))  # G matrix, diagonal with means of ls\n",
+    "    G_inv = np.linalg.inv(G)  # Inverse of G matrix, could just take reciprocals since it's diagonal\n",
+    " \n",
+    " \n",
+    "    residuals = np.concatenate([y_res*t_res-theta*t_res*t_res, (y_res-theta*t_res)**2-sigma2, t_res**2*nu2-1], axis=1)  # Combine residuals\n",
+    "    Ω = residuals.T @ residuals / len(residuals)  # Estimate the covariance matrix of the residuals\n",
+    "    cov = G_inv @ Ω @ G_inv / len(residuals)  # Estimate the variance of the parameters\n",
+    " \n",
+    "    return theta, sigma2, nu2, cov\n",
+    "\n",
+    "def sensitivity_interval(theta, sigma, nu, cov, alpha, c_y, c_t, rho):\n",
+    "    # [theta, sigma, nu] = ests\n",
+    "    C = np.abs(rho) * np.sqrt(c_y) * np.sqrt(c_t/(1-c_t)) / 2\n",
+    "    ests = np.array([theta, sigma, nu])\n",
+    "    coefs_p = np.array([1, C*np.sqrt(nu/sigma), C*np.sqrt(sigma/nu)])\n",
+    "    coefs_n = np.array([1, -C*np.sqrt(nu/sigma), -C*np.sqrt(sigma/nu)])\n",
+    "    # One dimensional normal distribution:\n",
+    "    sigma_p = coefs_p @ cov @ coefs_p\n",
+    "    sigma_n = coefs_n @ cov @ coefs_n\n",
+    "\n",
+    "    lb = _safe_norm_ppf(alpha / 2, loc=ests @ coefs_n, scale=np.sqrt(sigma_n))\n",
+    "    ub = _safe_norm_ppf(1 - alpha / 2, loc=ests @ coefs_p, scale=np.sqrt(sigma_p))\n",
+    "\n",
+    "    return (lb, ub)\n",
+    "\n",
+    "def RV(theta, sigma, nu, cov, alpha):\n",
+    "    # The robustness value is the degree of confounding of *both* the treatment and the outcome that still produces an interval\n",
+    "    # that excludes zero.\n",
+    "\n",
+    "    # We're looking for a value of r such that the sensitivity bounds just touch zero\n",
+    "\n",
+    "    r = 0\n",
+    "    r_up = 1\n",
+    "    r_down = 0\n",
+    "    lb, ub = sensitivity_interval(theta, sigma, nu, cov, alpha, 0, 0, 1)\n",
+    "    if lb < 0 and ub > 0:\n",
+    "        return 0\n",
+    "    \n",
+    "    else:\n",
+    "        if lb > 0:\n",
+    "            target = 0\n",
+    "            mult = 1\n",
+    "            d = lb\n",
+    "        else:\n",
+    "            target = 1\n",
+    "            mult = -1\n",
+    "            d = ub\n",
+    "\n",
+    "    while abs(d) > 1e-6:\n",
+    "        d = mult * sensitivity_interval(theta, sigma, nu, cov, alpha, r, r, 1)[target]\n",
+    "        if d > 0:\n",
+    "            r_down = r\n",
+    "        else:\n",
+    "            r_up = r\n",
+    "\n",
+    "        r = (r_down + r_up) / 2\n",
+    "        \n",
+    "    return r\n",
+    "\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Simulate data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LinearRegression\n",
+    "\n",
+    "n = 1000\n",
+    "\n",
+    "alpha = np.random.normal(size=30)\n",
+    "beta = np.random.normal(size=30)\n",
+    "\n",
+    "X = np.random.normal(size=(n,30))\n",
+    "\n",
+    "t = np.random.normal(size=n) + 2 * X @ alpha\n",
+    "y = np.random.normal(size=n) + 3*t + 50* X @ beta\n",
+    "\n",
+    "T = t.reshape(-1, 1)\n",
+    "Y = y.reshape(-1, 1)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Simple example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Run simple dml and calculate intermediate values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sig_level = 0.1\n",
+    "\n",
+    "theta, sigma, nu, sig = dml_sim(LinearRegression(), LinearRegression(), 2, Y, T, X)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Calculate a \"sensitivity interval\". \n",
+    "\n",
+    "Need to supply \"strength of latent confounder\" as argument."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1.9713167396783304, 3.992013510060846)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sensitivity_interval(theta, sigma, nu, sig, sig_level, 0.6, 0.6, 1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Calculate Robustness Value. \n",
+    "\n",
+    "The required strength of a latent confounder in order for the confidence interval to include 0."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9007053971290588"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "RV(theta, sigma, nu, sig, sig_level)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Ablations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'i': 30,\n",
+       " 'alpha': 0.1,\n",
+       " 'sensitivity_interval': (2.006244806224368, 4.0212629919887455),\n",
+       " 'RV': 0.9028782248497009}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sig_level = 0.1\n",
+    "\n",
+    "results = []\n",
+    "for i in [1, 5, 15, 30]:\n",
+    "    theta, sigma, nu, sig = dml_sim(LinearRegression(), LinearRegression(), 2, Y, T, X[:,:i])\n",
+    "    result_dict = {\n",
+    "        'i': i,\n",
+    "        'alpha': sig_level,\n",
+    "        'sensitivity_interval': sensitivity_interval(theta, sigma, nu, sig, sig_level, 0.6, 0.6, 1),\n",
+    "        'RV': RV(theta, sigma, nu, sig, sig_level)\n",
+    "    }\n",
+    "    results.append(result_dict)\n",
+    "                   \n",
+    "results[-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>i</th>\n",
+       "      <th>alpha</th>\n",
+       "      <th>sensitivity_interval</th>\n",
+       "      <th>RV</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>(-5.76103393577295, 30.16877869667943)</td>\n",
+       "      <td>0.463505</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>5</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>(-4.254207344618898, 31.474674283379382)</td>\n",
+       "      <td>0.503265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>15</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>(2.926606519518738, 25.91115187623303)</td>\n",
+       "      <td>0.679077</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>30</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>(2.006244806224368, 4.0212629919887455)</td>\n",
+       "      <td>0.902878</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    i  alpha                      sensitivity_interval        RV\n",
+       "0   1    0.1    (-5.76103393577295, 30.16877869667943)  0.463505\n",
+       "1   5    0.1  (-4.254207344618898, 31.474674283379382)  0.503265\n",
+       "2  15    0.1    (2.926606519518738, 25.91115187623303)  0.679077\n",
+       "3  30    0.1   (2.006244806224368, 4.0212629919887455)  0.902878"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(\n",
+    "    pd.DataFrame(results)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# DoubleML"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import doubleml as dml\n",
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Y</th>\n",
+       "      <th>T</th>\n",
+       "      <th>X0</th>\n",
+       "      <th>X1</th>\n",
+       "      <th>X2</th>\n",
+       "      <th>X3</th>\n",
+       "      <th>X4</th>\n",
+       "      <th>X5</th>\n",
+       "      <th>X6</th>\n",
+       "      <th>X7</th>\n",
+       "      <th>...</th>\n",
+       "      <th>X20</th>\n",
+       "      <th>X21</th>\n",
+       "      <th>X22</th>\n",
+       "      <th>X23</th>\n",
+       "      <th>X24</th>\n",
+       "      <th>X25</th>\n",
+       "      <th>X26</th>\n",
+       "      <th>X27</th>\n",
+       "      <th>X28</th>\n",
+       "      <th>X29</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>169.161860</td>\n",
+       "      <td>7.570734</td>\n",
+       "      <td>-1.720637</td>\n",
+       "      <td>-0.480190</td>\n",
+       "      <td>1.793066</td>\n",
+       "      <td>-0.909049</td>\n",
+       "      <td>-0.491242</td>\n",
+       "      <td>0.407515</td>\n",
+       "      <td>-2.719292</td>\n",
+       "      <td>0.214566</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-1.236440</td>\n",
+       "      <td>0.460231</td>\n",
+       "      <td>-0.266955</td>\n",
+       "      <td>0.185816</td>\n",
+       "      <td>0.729663</td>\n",
+       "      <td>1.324584</td>\n",
+       "      <td>-0.091459</td>\n",
+       "      <td>1.387470</td>\n",
+       "      <td>-0.982113</td>\n",
+       "      <td>-2.001484</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-368.478684</td>\n",
+       "      <td>-10.259294</td>\n",
+       "      <td>1.502222</td>\n",
+       "      <td>-0.351112</td>\n",
+       "      <td>0.326909</td>\n",
+       "      <td>0.383989</td>\n",
+       "      <td>1.019918</td>\n",
+       "      <td>0.102063</td>\n",
+       "      <td>-1.172437</td>\n",
+       "      <td>2.210359</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-1.044021</td>\n",
+       "      <td>-0.816131</td>\n",
+       "      <td>0.288329</td>\n",
+       "      <td>-0.428007</td>\n",
+       "      <td>1.047723</td>\n",
+       "      <td>-0.058769</td>\n",
+       "      <td>-0.664951</td>\n",
+       "      <td>-0.111616</td>\n",
+       "      <td>1.111973</td>\n",
+       "      <td>0.214340</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>595.208775</td>\n",
+       "      <td>3.414269</td>\n",
+       "      <td>-0.584506</td>\n",
+       "      <td>-0.410095</td>\n",
+       "      <td>-0.085977</td>\n",
+       "      <td>0.515477</td>\n",
+       "      <td>1.374335</td>\n",
+       "      <td>1.912329</td>\n",
+       "      <td>-0.645654</td>\n",
+       "      <td>-0.413862</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.029845</td>\n",
+       "      <td>-1.035931</td>\n",
+       "      <td>-0.146995</td>\n",
+       "      <td>-1.224990</td>\n",
+       "      <td>-2.704679</td>\n",
+       "      <td>1.110162</td>\n",
+       "      <td>-0.191411</td>\n",
+       "      <td>0.852031</td>\n",
+       "      <td>-0.457792</td>\n",
+       "      <td>-2.563874</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>265.170499</td>\n",
+       "      <td>21.740763</td>\n",
+       "      <td>0.285862</td>\n",
+       "      <td>-0.802048</td>\n",
+       "      <td>0.211192</td>\n",
+       "      <td>-0.799453</td>\n",
+       "      <td>0.643080</td>\n",
+       "      <td>-0.379773</td>\n",
+       "      <td>0.055227</td>\n",
+       "      <td>1.142309</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.290620</td>\n",
+       "      <td>-1.175747</td>\n",
+       "      <td>0.404689</td>\n",
+       "      <td>-1.427746</td>\n",
+       "      <td>0.069783</td>\n",
+       "      <td>0.237225</td>\n",
+       "      <td>-0.983619</td>\n",
+       "      <td>0.459482</td>\n",
+       "      <td>-0.306016</td>\n",
+       "      <td>1.853582</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>66.916752</td>\n",
+       "      <td>12.252024</td>\n",
+       "      <td>-1.129366</td>\n",
+       "      <td>-1.768697</td>\n",
+       "      <td>-1.095474</td>\n",
+       "      <td>0.536305</td>\n",
+       "      <td>1.300771</td>\n",
+       "      <td>0.316005</td>\n",
+       "      <td>0.877389</td>\n",
+       "      <td>-0.117935</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.062981</td>\n",
+       "      <td>0.789482</td>\n",
+       "      <td>1.940387</td>\n",
+       "      <td>1.153639</td>\n",
+       "      <td>0.741752</td>\n",
+       "      <td>-0.210623</td>\n",
+       "      <td>0.046603</td>\n",
+       "      <td>1.389971</td>\n",
+       "      <td>-0.172416</td>\n",
+       "      <td>0.265279</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 32 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            Y          T        X0        X1        X2        X3        X4  \\\n",
+       "0  169.161860   7.570734 -1.720637 -0.480190  1.793066 -0.909049 -0.491242   \n",
+       "1 -368.478684 -10.259294  1.502222 -0.351112  0.326909  0.383989  1.019918   \n",
+       "2  595.208775   3.414269 -0.584506 -0.410095 -0.085977  0.515477  1.374335   \n",
+       "3  265.170499  21.740763  0.285862 -0.802048  0.211192 -0.799453  0.643080   \n",
+       "4   66.916752  12.252024 -1.129366 -1.768697 -1.095474  0.536305  1.300771   \n",
+       "\n",
+       "         X5        X6        X7  ...       X20       X21       X22       X23  \\\n",
+       "0  0.407515 -2.719292  0.214566  ... -1.236440  0.460231 -0.266955  0.185816   \n",
+       "1  0.102063 -1.172437  2.210359  ... -1.044021 -0.816131  0.288329 -0.428007   \n",
+       "2  1.912329 -0.645654 -0.413862  ... -0.029845 -1.035931 -0.146995 -1.224990   \n",
+       "3 -0.379773  0.055227  1.142309  ... -0.290620 -1.175747  0.404689 -1.427746   \n",
+       "4  0.316005  0.877389 -0.117935  ... -0.062981  0.789482  1.940387  1.153639   \n",
+       "\n",
+       "        X24       X25       X26       X27       X28       X29  \n",
+       "0  0.729663  1.324584 -0.091459  1.387470 -0.982113 -2.001484  \n",
+       "1  1.047723 -0.058769 -0.664951 -0.111616  1.111973  0.214340  \n",
+       "2 -2.704679  1.110162 -0.191411  0.852031 -0.457792 -2.563874  \n",
+       "3  0.069783  0.237225 -0.983619  0.459482 -0.306016  1.853582  \n",
+       "4  0.741752 -0.210623  0.046603  1.389971 -0.172416  0.265279  \n",
+       "\n",
+       "[5 rows x 32 columns]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.concat([\n",
+    "    pd.DataFrame(Y).squeeze().to_frame('Y'),\n",
+    "    pd.DataFrame(T).squeeze().to_frame('T'),\n",
+    "    pd.DataFrame(X).add_prefix('X'),\n",
+    "], axis=1)\n",
+    "\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<doubleml.double_ml_data.DoubleMLData at 0x198557e2e30>"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dml_data = dml.DoubleMLData(df, 'Y', 'T')\n",
+    "dml_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== DoubleMLPLR Object ==================\n",
+      "\n",
+      "------------------ Data summary      ------------------\n",
+      "Outcome variable: Y\n",
+      "Treatment variable(s): ['T']\n",
+      "Covariates: ['X0', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23', 'X24', 'X25', 'X26', 'X27', 'X28', 'X29']\n",
+      "Instrument variable(s): None\n",
+      "No. Observations: 1000\n",
+      "\n",
+      "------------------ Score & algorithm ------------------\n",
+      "Score function: partialling out\n",
+      "\n",
+      "------------------ Machine learner   ------------------\n",
+      "Learner ml_l: LinearRegression()\n",
+      "Learner ml_m: LinearRegression()\n",
+      "Out-of-sample Performance:\n",
+      "Regression:\n",
+      "Learner ml_l RMSE: [[3.19708721]]\n",
+      "Learner ml_m RMSE: [[1.01678607]]\n",
+      "\n",
+      "------------------ Resampling        ------------------\n",
+      "No. folds: 2\n",
+      "No. repeated sample splits: 1\n",
+      "\n",
+      "------------------ Fit summary       ------------------\n",
+      "       coef   std err          t  P>|t|     2.5 %    97.5 %\n",
+      "T  2.992753  0.032487  92.121737    0.0  2.929079  3.056426\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj = dml.DoubleMLPLR(dml_data,\n",
+    "                          ml_l=LinearRegression(),\n",
+    "                          ml_m=LinearRegression(),\n",
+    "                          n_folds=2,\n",
+    "                          score='partialling out',)\n",
+    "dml_obj.fit()\n",
+    "print(dml_obj)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== Sensitivity Analysis ==================\n",
+      "\n",
+      "------------------ Scenario          ------------------\n",
+      "Significance Level: level=0.95\n",
+      "Sensitivity parameters: cf_y=0.03; cf_d=0.03, rho=1.0\n",
+      "\n",
+      "------------------ Bounds with CI    ------------------\n",
+      "   CI lower  theta lower     theta  theta upper  CI upper\n",
+      "0  2.909926     2.963376  2.992753     3.022129  3.075592\n",
+      "\n",
+      "------------------ Robustness Values ------------------\n",
+      "   H_0     RV (%)    RVa (%)\n",
+      "0  0.0  91.336809  90.546872\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj.sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.)\n",
+    "print(dml_obj.sensitivity_summary)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   cf_y     cf_d  rho  delta_theta\n",
+      "T   1.0  0.13301 -1.0    -7.112185\n"
+     ]
+    }
+   ],
+   "source": [
+    "sens_benchmark = dml_obj.sensitivity_benchmark(benchmarking_set=[\"X6\"])\n",
+    "print(sens_benchmark)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# New datasets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### DoubleML confounded synthetic data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from doubleml.datasets import make_confounded_plr_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cf_y = 0.1\n",
+    "cf_d = 0.1\n",
+    "theta = 5.0\n",
+    "dpg_dict = make_confounded_plr_data(n_obs=10000, cf_y=cf_y, cf_d=cf_d, theta=theta)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_cols = [f'X{i + 1}' for i in np.arange(dpg_dict['x'].shape[1])]\n",
+    "df = pd.DataFrame(np.column_stack((dpg_dict['x'], dpg_dict['y'], dpg_dict['d'])), columns=x_cols + ['y', 'd'])\n",
+    "dml_data = dml.DoubleMLData(df, 'y', 'd')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import RandomForestRegressor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== DoubleMLPLR Object ==================\n",
+      "\n",
+      "------------------ Data summary      ------------------\n",
+      "Outcome variable: y\n",
+      "Treatment variable(s): ['d']\n",
+      "Covariates: ['X1', 'X2', 'X3', 'X4']\n",
+      "Instrument variable(s): None\n",
+      "No. Observations: 10000\n",
+      "\n",
+      "------------------ Score & algorithm ------------------\n",
+      "Score function: partialling out\n",
+      "\n",
+      "------------------ Machine learner   ------------------\n",
+      "Learner ml_l: RandomForestRegressor()\n",
+      "Learner ml_m: RandomForestRegressor()\n",
+      "Out-of-sample Performance:\n",
+      "Regression:\n",
+      "Learner ml_l RMSE: [[8.43266256]]\n",
+      "Learner ml_m RMSE: [[1.13959992]]\n",
+      "\n",
+      "------------------ Resampling        ------------------\n",
+      "No. folds: 2\n",
+      "No. repeated sample splits: 1\n",
+      "\n",
+      "------------------ Fit summary       ------------------\n",
+      "       coef   std err          t  P>|t|     2.5 %    97.5 %\n",
+      "d  4.386734  0.076845  57.085535    0.0  4.236121  4.537348\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj = dml.DoubleMLPLR(dml_data,\n",
+    "                          ml_l=RandomForestRegressor(),\n",
+    "                          ml_m=RandomForestRegressor(),\n",
+    "                          n_folds=2,\n",
+    "                          score='partialling out',)\n",
+    "dml_obj.fit()\n",
+    "print(dml_obj)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== Sensitivity Analysis ==================\n",
+      "\n",
+      "------------------ Scenario          ------------------\n",
+      "Significance Level: level=0.95\n",
+      "Sensitivity parameters: cf_y=0.1; cf_d=0.1, rho=1.0\n",
+      "\n",
+      "------------------ Bounds with CI    ------------------\n",
+      "   CI lower  theta lower     theta  theta upper  CI upper\n",
+      "0  3.652547     3.758583  4.386734     5.014886  5.170425\n",
+      "\n",
+      "------------------ Robustness Values ------------------\n",
+      "   H_0     RV (%)    RVa (%)\n",
+      "0  0.0  51.346635  49.604976\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj.sensitivity_analysis(cf_y=0.1, cf_d=0.1, rho=1.)\n",
+    "print(dml_obj.sensitivity_summary)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(10000,)"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dpg_dict['y'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4.84204509497674, 5.158534499404629)"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sensitivity_interval(theta, sigma, nu, sig, sig_level, 0.1, 0.1, 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'alpha': 0.05,\n",
+       " 'sig': array([[ 5.97407504e-03, -1.67034912e-01, -5.43651824e-05],\n",
+       "        [-1.67034912e-01,  1.20203175e+01,  2.52109790e-03],\n",
+       "        [-5.43651824e-05,  2.52109790e-03,  1.18426109e-04]]),\n",
+       " 'sensitivity_interval': (-1.6122852078135446, 10.442290925119165),\n",
+       " 'RV': 0.4936022162437439}"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sig_level=0.05\n",
+    "\n",
+    "theta, sigma, nu, sig = dml_sim(\n",
+    "    RandomForestRegressor(), \n",
+    "    RandomForestRegressor(), \n",
+    "    2, \n",
+    "    dpg_dict['y'].reshape(-1, 1),\n",
+    "    dpg_dict['d'].reshape(-1, 1),\n",
+    "    dpg_dict['x']\n",
+    ")\n",
+    "\n",
+    "result_dict = {\n",
+    "    'alpha': sig_level,\n",
+    "    'sig': sig,\n",
+    "    'sensitivity_interval': sensitivity_interval(theta, sigma, nu, sig, sig_level, 0.6, 0.6, 1),\n",
+    "    'RV': RV(theta, sigma, nu, sig, sig_level)\n",
+    "}\n",
+    "result_dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.4936022162437439"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result_dict['RV']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 401k data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dml_data = dml.datasets.fetch_401K()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== DoubleMLPLR Object ==================\n",
+      "\n",
+      "------------------ Data summary      ------------------\n",
+      "Outcome variable: net_tfa\n",
+      "Treatment variable(s): ['e401']\n",
+      "Covariates: ['age', 'inc', 'educ', 'fsize', 'marr', 'twoearn', 'db', 'pira', 'hown']\n",
+      "Instrument variable(s): None\n",
+      "No. Observations: 9915\n",
+      "\n",
+      "------------------ Score & algorithm ------------------\n",
+      "Score function: partialling out\n",
+      "\n",
+      "------------------ Machine learner   ------------------\n",
+      "Learner ml_l: RandomForestRegressor()\n",
+      "Learner ml_m: RandomForestRegressor()\n",
+      "Out-of-sample Performance:\n",
+      "Regression:\n",
+      "Learner ml_l RMSE: [[56554.88810261]]\n",
+      "Learner ml_m RMSE: [[0.46880446]]\n",
+      "\n",
+      "------------------ Resampling        ------------------\n",
+      "No. folds: 5\n",
+      "No. repeated sample splits: 1\n",
+      "\n",
+      "------------------ Fit summary       ------------------\n",
+      "             coef      std err         t         P>|t|        2.5 %  \\\n",
+      "e401  9545.723974  1260.808673  7.571112  3.700422e-14  7074.584382   \n",
+      "\n",
+      "            97.5 %  \n",
+      "e401  12016.863565  \n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj = dml.DoubleMLPLR(dml_data,\n",
+    "                          ml_l=RandomForestRegressor(),\n",
+    "                          ml_m=RandomForestRegressor(),\n",
+    "                          n_folds=5,\n",
+    "                          score='partialling out',)\n",
+    "dml_obj.fit()\n",
+    "print(dml_obj)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== Sensitivity Analysis ==================\n",
+      "\n",
+      "------------------ Scenario          ------------------\n",
+      "Significance Level: level=0.95\n",
+      "Sensitivity parameters: cf_y=0.04; cf_d=0.04, rho=1.0\n",
+      "\n",
+      "------------------ Bounds with CI    ------------------\n",
+      "      CI lower  theta lower        theta   theta upper      CI upper\n",
+      "0  2409.550717  4636.205456  9545.723974  14455.242491  16511.651393\n",
+      "\n",
+      "------------------ Robustness Values ------------------\n",
+      "   H_0    RV (%)   RVa (%)\n",
+      "0  0.0  7.628916  5.816637\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj.sensitivity_analysis(cf_y=0.04, cf_d=0.04, rho=1.)\n",
+    "print(dml_obj.sensitivity_summary)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LinearRegression, LassoCV"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'alpha': 0.05,\n",
+       " 'sensitivity_interval': (-3259.2844095062455, 7370.312469068845),\n",
+       " 'RV': 0}"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sig_level=0.05\n",
+    "\n",
+    "theta, sigma, nu, sig = dml_sim(\n",
+    "    RandomForestRegressor(), \n",
+    "    RandomForestRegressor(), \n",
+    "    5, \n",
+    "    dml_data.y.reshape(-1, 1),\n",
+    "    dml_data.d.reshape(-1, 1),\n",
+    "    dml_data.x\n",
+    ")\n",
+    "\n",
+    "# seem to get weird results here but not when I pass econml nuisances directly inside dml_sim func.\n",
+    "result_dict = {\n",
+    "    'alpha': sig_level,\n",
+    "    'sensitivity_interval': sensitivity_interval(theta, sigma, nu, sig, sig_level, 0.00001, 0.0001, 1),\n",
+    "    'RV': RV(theta, sigma, nu, sig, sig_level)\n",
+    "}\n",
+    "result_dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Coefficient Results:  X is None, please call intercept_inference to learn the constant!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table class=\"simpletable\">\n",
+       "<caption>CATE Intercept Results</caption>\n",
+       "<tr>\n",
+       "         <td></td>        <th>point_estimate</th>  <th>stderr</th>  <th>zstat</th> <th>pvalue</th> <th>ci_lower</th> <th>ci_upper</th>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>cate_intercept</th>    <td>8923.022</td>    <td>1328.008</td> <td>6.719</td>   <td>0.0</td>  <td>6320.175</td> <td>11525.87</td>\n",
+       "</tr>\n",
+       "</table><br/><br/><sub>A linear parametric conditional average treatment effect (CATE) model was fitted:<br/>$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$<br/>where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:<br/>$\\Theta_{ij}(X) = X' coef_{ij} + cate\\_intercept_{ij}$<br/>Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.</sub>"
+      ],
+      "text/plain": [
+       "<class 'econml.utilities.Summary'>\n",
+       "\"\"\"\n",
+       "                        CATE Intercept Results                       \n",
+       "=====================================================================\n",
+       "               point_estimate  stderr  zstat pvalue ci_lower ci_upper\n",
+       "---------------------------------------------------------------------\n",
+       "cate_intercept       8923.022 1328.008 6.719    0.0 6320.175 11525.87\n",
+       "---------------------------------------------------------------------\n",
+       "\n",
+       "<sub>A linear parametric conditional average treatment effect (CATE) model was fitted:\n",
+       "$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$\n",
+       "where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:\n",
+       "$\\Theta_{ij}(X) = X' coef_{ij} + cate\\_intercept_{ij}$\n",
+       "Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.</sub>\n",
+       "\"\"\""
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from econml.dml import LinearDML\n",
+    "\n",
+    "est = LinearDML(model_y=RandomForestRegressor(), model_t=RandomForestRegressor()).fit(\n",
+    "    Y=dml_data.y, T=dml_data.d, W=dml_data.x, cache_values=True\n",
+    ")\n",
+    "est.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "store econml nuisances to use inside dml_sim func for debugging"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "econ_y_res, econ_t_res = est._cached_values.nuisances"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### bonus data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dml_data = dml.datasets.fetch_bonus()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== DoubleMLPLR Object ==================\n",
+      "\n",
+      "------------------ Data summary      ------------------\n",
+      "Outcome variable: inuidur1\n",
+      "Treatment variable(s): ['tg']\n",
+      "Covariates: ['female', 'black', 'othrace', 'dep1', 'dep2', 'q2', 'q3', 'q4', 'q5', 'q6', 'agelt35', 'agegt54', 'durable', 'lusd', 'husd']\n",
+      "Instrument variable(s): None\n",
+      "No. Observations: 5099\n",
+      "\n",
+      "------------------ Score & algorithm ------------------\n",
+      "Score function: partialling out\n",
+      "\n",
+      "------------------ Machine learner   ------------------\n",
+      "Learner ml_l: RandomForestRegressor()\n",
+      "Learner ml_m: RandomForestRegressor()\n",
+      "Out-of-sample Performance:\n",
+      "Regression:\n",
+      "Learner ml_l RMSE: [[1.28984655]]\n",
+      "Learner ml_m RMSE: [[0.5034491]]\n",
+      "\n",
+      "------------------ Resampling        ------------------\n",
+      "No. folds: 2\n",
+      "No. repeated sample splits: 1\n",
+      "\n",
+      "------------------ Fit summary       ------------------\n",
+      "        coef   std err         t     P>|t|     2.5 %    97.5 %\n",
+      "tg -0.079585  0.035971 -2.212475  0.026934 -0.150086 -0.009083\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj = dml.DoubleMLPLR(dml_data,\n",
+    "                          ml_l=RandomForestRegressor(),\n",
+    "                          ml_m=RandomForestRegressor(),\n",
+    "                          n_folds=2,\n",
+    "                          score='partialling out',)\n",
+    "dml_obj.fit()\n",
+    "print(dml_obj)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== Sensitivity Analysis ==================\n",
+      "\n",
+      "------------------ Scenario          ------------------\n",
+      "Significance Level: level=0.95\n",
+      "Sensitivity parameters: cf_y=0.04; cf_d=0.04, rho=1.0\n",
+      "\n",
+      "------------------ Bounds with CI    ------------------\n",
+      "   CI lower  theta lower     theta  theta upper  CI upper\n",
+      "0 -0.243371    -0.184128 -0.079585     0.024959  0.084101\n",
+      "\n",
+      "------------------ Robustness Values ------------------\n",
+      "   H_0    RV (%)   RVa (%)\n",
+      "0  0.0  3.059967  0.794663\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj.sensitivity_analysis(cf_y=0.04, cf_d=0.04, rho=1.)\n",
+    "print(dml_obj.sensitivity_summary)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n",
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\sklearn\\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  return fit_method(estimator, *args, **kwargs)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'alpha': 0.05,\n",
+       " 'sensitivity_interval': (-1.9404570165795167, 0.9777681976085163),\n",
+       " 'RV': 0}"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sig_level=0.05\n",
+    "\n",
+    "theta, sigma, nu, sig = dml_sim(\n",
+    "    RandomForestRegressor(), \n",
+    "    RandomForestRegressor(), \n",
+    "    5, \n",
+    "    dml_data.y.reshape(-1, 1),\n",
+    "    dml_data.d.reshape(-1, 1),\n",
+    "    dml_data.x\n",
+    ")\n",
+    "\n",
+    "result_dict = {\n",
+    "    'alpha': sig_level,\n",
+    "    'sensitivity_interval': sensitivity_interval(theta, sigma, nu, sig, sig_level, 0.05, 0.05, 1),\n",
+    "    'RV': RV(theta, sigma, nu, sig, sig_level)\n",
+    "}\n",
+    "result_dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "dev_env2",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/prototypes/sensitivity_analysis/ovb_dr.ipynb b/prototypes/sensitivity_analysis/ovb_dr.ipynb
new file mode 100644
index 000000000..c2f402349
--- /dev/null
+++ b/prototypes/sensitivity_analysis/ovb_dr.ipynb
@@ -0,0 +1,1312 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from sklearn.model_selection import KFold, StratifiedKFold\n",
+    "from sklearn.preprocessing import OneHotEncoder\n",
+    "from sklearn.linear_model import LogisticRegression, LinearRegression\n",
+    "\n",
+    "from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression\n",
+    "from econml.utilities import _safe_norm_ppf\n",
+    "\n",
+    "\n",
+    "def dr_sim(model_t, model_y, n_folds, Y, T, X, min_t=0.01):\n",
+    "    \"\"\"\n",
+    "    Doubly-robust learner\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    model_t : object\n",
+    "        A classifier for the treatment.\n",
+    "    model_y : object\n",
+    "        A regressor for the outcome.\n",
+    "    n_folds : int\n",
+    "        The number of folds to use in cross-validation.\n",
+    "    Y : array-like\n",
+    "        The outcome variable.\n",
+    "    T : array-like\n",
+    "        The treatment variable.\n",
+    "    X : array-like\n",
+    "        The covariates.\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    theta : array-like\n",
+    "        The estimated treatment effect per treatment.\n",
+    "    sigma : array-like\n",
+    "        The estimated variance of the outcome residuals per treatment.\n",
+    "    nu : array-like\n",
+    "        The estimated variance of the treatment per treatment.\n",
+    "    cov : array-like\n",
+    "        The covariance matrix of the estimated parameters, of shape (n_treatments, 3, 3).\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Initialize KFold cross-validation\n",
+    "    kf = StratifiedKFold(n_splits=n_folds, shuffle=True)\n",
+    "    T_ohe = OneHotEncoder(sparse_output=False, drop='first').fit_transform(T.reshape(-1, 1))\n",
+    "\n",
+    "    # Initialize arrays to hold predictions\n",
+    "    y_pred = np.zeros((Y.shape[0], T_ohe.shape[1]+1)) # include prediction for T=0\n",
+    "    t_pred = np.zeros((T_ohe.shape[0], T_ohe.shape[1]+1)) # include prediction for T=0\n",
+    "    alpha = np.zeros_like(T_ohe)\n",
+    "\n",
+    "    # one value of theta, sigma, nu for each non-control treatment\n",
+    "    theta = np.zeros(T_ohe.shape[1])\n",
+    "    sigma = np.zeros(T_ohe.shape[1])\n",
+    "    nu = np.zeros(T_ohe.shape[1])\n",
+    "\n",
+    "    # one theta, sigma, nu covariance matrix for each non-control treatment\n",
+    "    cov = np.zeros((T_ohe.shape[1], 3, 3))\n",
+    "\n",
+    "    # Cross-validation loop\n",
+    "    for train_index, test_index in kf.split(X,T):\n",
+    "        X_train, X_test = X[train_index], X[test_index]\n",
+    "        Y_train, Y_test = Y[train_index], Y[test_index]\n",
+    "        T_train = T[train_index]\n",
+    "        To_train, To_test = T_ohe[train_index], T_ohe[test_index]\n",
+    "     \n",
+    "        # Fit the treatment model\n",
+    "        t_pred[test_index] = model_t.fit(X_train, T_train).predict_proba(X_test)\n",
+    "\n",
+    "        t_pred[test_index] = np.clip(t_pred[test_index], min_t, 1-min_t)  # avoid division by zero\n",
+    "        t_pred[test_index] = t_pred[test_index] / np.sum(t_pred[test_index], axis=1, keepdims=True)  # normalize to sum to 1\n",
+    "        \n",
+    "        # Fit the outcome model\n",
+    "        model_y.fit(np.hstack([X_train, To_train]), Y_train)\n",
+    "        T_hypo = np.zeros_like(To_test)\n",
+    "        y_pred[test_index,0] += model_y.predict(np.hstack([X_test, T_hypo]))\n",
+    "        for i in range(To_train.shape[1]):\n",
+    "            T_hypo = np.zeros_like(To_test)\n",
+    "            T_hypo[:,i] = 1\n",
+    "            y_pred[test_index,i+1] += model_y.predict(np.hstack([X_test, T_hypo]))\n",
+    "\n",
+    "    # ATE, sigma^2, and nu^2\n",
+    "    for i in range(T_ohe.shape[1]):\n",
+    "        theta_score = y_pred[:,i+1] - y_pred[:,0] + (Y-y_pred[:,i+1]) * (T_ohe[:,i] == 1)/t_pred[:,i+1] - (Y-y_pred[:,0]) * (np.all(T_ohe==0, axis=1)/t_pred[:,0])\n",
+    "        sigma_score = (Y-np.choose(T, y_pred.T))**2 # exclude rows with other treatments\n",
+    "        alpha[:,i] = (T_ohe[:,i] == 1)/t_pred[:,i+1] - (np.all(T_ohe==0, axis=1))/t_pred[:,0]\n",
+    "        nu_score = 2*(1/t_pred[:,i+1]+1/t_pred[:,0])-alpha[:,i]**2\n",
+    "        theta[i] = np.mean(theta_score)\n",
+    "        sigma[i] = np.mean(sigma_score)\n",
+    "        nu[i] = np.mean(nu_score)\n",
+    "        scores = np.stack([theta_score-theta[i], sigma_score-sigma[i], nu_score-nu[i]], axis=1)\n",
+    "        cov[i,:,:] = (scores.T @ scores / len(scores)) / len(scores)\n",
+    "\n",
+    "    return theta, sigma, nu, cov\n",
+    "\n",
+    "def sensitivity_interval(theta, sigma, nu, cov, alpha, c_y, c_t, rho):\n",
+    "    \"\"\"\n",
+    "    Calculate the sensitivity interval for a doubly-robust learner.\n",
+    "    \"\"\"\n",
+    "    C = np.abs(rho) * np.sqrt(c_y) * np.sqrt(c_t/(1-c_t))/2\n",
+    "    ests = np.array([theta, sigma, nu])\n",
+    "    \n",
+    "    coefs_p = np.array([1, C*np.sqrt(nu/sigma), C*np.sqrt(sigma/nu)])\n",
+    "    coefs_n = np.array([1, -C*np.sqrt(nu/sigma), -C*np.sqrt(sigma/nu)])\n",
+    "    # One dimensional normal distribution:\n",
+    "    sigma_p = coefs_p @ cov @ coefs_p\n",
+    "    sigma_n = coefs_n @ cov @ coefs_n\n",
+    "\n",
+    "    # print(f\"theta bounds: {ests @ coefs_n}, {ests @ coefs_p}\")\n",
+    "    # print(f\"sigma bounds: {sigma_n}, {sigma_p}\")\n",
+    "\n",
+    "    lb = _safe_norm_ppf(alpha / 2, loc=ests @ coefs_n, scale=np.sqrt(sigma_n))\n",
+    "    ub = _safe_norm_ppf(1 - alpha / 2, loc=ests @ coefs_p, scale=np.sqrt(sigma_p))\n",
+    "\n",
+    "    return (lb, ub)\n",
+    "\n",
+    "\n",
+    "def RV(theta, sigma, nu, cov, alpha):\n",
+    "    # The robustness value is the degree of confounding of *both* the treatment and the outcome that still produces an interval\n",
+    "    # that excludes zero.\n",
+    "\n",
+    "    # We're looking for a value of r such that the sensitivity bounds just touch zero\n",
+    "\n",
+    "    r = 0\n",
+    "    r_up = 1\n",
+    "    r_down = 0\n",
+    "    lb, ub = sensitivity_interval(theta, sigma, nu, cov, alpha, 0, 0, 1)\n",
+    "    if lb < 0 and ub > 0:\n",
+    "        return 0\n",
+    "    \n",
+    "    else:\n",
+    "        if lb > 0:\n",
+    "            target = 0\n",
+    "            mult = 1\n",
+    "            d = lb\n",
+    "        else:\n",
+    "            target = 1\n",
+    "            mult = -1\n",
+    "            d = ub\n",
+    "\n",
+    "    while abs(d) > 1e-6:\n",
+    "        d = mult * sensitivity_interval(theta, sigma, nu, cov, alpha, r, r, 1)[target]\n",
+    "        if d > 0:\n",
+    "            r_down = r\n",
+    "        else:\n",
+    "            r_up = r\n",
+    "\n",
+    "        r = (r_down + r_up) / 2\n",
+    "        \n",
+    "    return r\n",
+    "\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Simulate data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(3)\n",
+    "n = 10000\n",
+    "alpha = np.random.normal(size=5)\n",
+    "X = np.random.normal(size=(n,5), loc=[1,0.5,0,0,0])/3 # closest to center 1, then 2, then 3\n",
+    "centers = np.array([[1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0]]) # trinary treatment\n",
+    "# centers = np.array([[1,0,0,0,0], [0,1,0,0,0]]) # uncomment for binary treatment\n",
+    "\n",
+    "ds = X[:,None,:]-centers[None,:,:]\n",
+    "ds = np.einsum(\"nci,nci->nc\", ds, ds)\n",
+    "\n",
+    "ps_r = np.exp(-ds)\n",
+    "ps = ps_r / np.sum(ps_r, axis=1, keepdims=True)\n",
+    "\n",
+    "T = np.random.default_rng().multinomial(1, ps) @ np.arange(len(centers))\n",
+    "\n",
+    "Y = np.random.normal(size=n) + 3*(T == 1)*X[:,1] - (T == 2) + 2 * X @ alpha\n",
+    "\n",
+    "true_theta = np.mean(3*X[:,1]), -1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Simple example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Run simple dml and calculate intermediate values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sig_level = 0.1\n",
+    "\n",
+    "theta, sigma, nu, sig = dr_sim(LogisticRegression(), LinearRegression(), 2, Y, T, X)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Calculate a \"sensitivity interval\". \n",
+    "\n",
+    "Need to supply \"strength of latent confounder\" as argument."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'i': 0, 'lb': -2.4222316375247765, 'ub': 3.4062936431220736}\n",
+      "{'i': 1, 'lb': -4.248998723655173, 'ub': 2.1903767876822107}\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(theta.shape[0]):\n",
+    "    lb, ub = sensitivity_interval(theta[i], sigma[i], nu[i], sig[i], sig_level, 0.6, 0.6, 1)\n",
+    "    print({'i': i, 'lb': lb, 'ub': ub})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Calculate Robustness Value. \n",
+    "\n",
+    "The required strength of a latent confounder in order for the confidence interval to include 0."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'i': 0, 'RV': 0.1342604160308838}\n",
+      "{'i': 1, 'RV': 0.25589704513549805}\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(theta.shape[0]):\n",
+    "    rv = RV(theta[i], sigma[i], nu[i], sig[i], sig_level)\n",
+    "    print({'i': i, 'RV': rv})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Ablations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'i': 1,\n",
+       "  'alpha': 0.1,\n",
+       "  't_ind': 1,\n",
+       "  'sensitivity_interval': (-5.595590315127781, 3.648832519620443),\n",
+       "  'RV': 0.17194628715515137},\n",
+       " {'i': 5,\n",
+       "  'alpha': 0.1,\n",
+       "  't_ind': 1,\n",
+       "  'sensitivity_interval': (-4.242302203189104, 2.1724266319271592),\n",
+       "  'RV': 0.2583136558532715},\n",
+       " {'i': 15,\n",
+       "  'alpha': 0.1,\n",
+       "  't_ind': 1,\n",
+       "  'sensitivity_interval': (-4.252962106477528, 2.1979664010821613),\n",
+       "  'RV': 0.2550685405731201},\n",
+       " {'i': 30,\n",
+       "  'alpha': 0.1,\n",
+       "  't_ind': 1,\n",
+       "  'sensitivity_interval': (-4.247614757215935, 2.1916647643905867),\n",
+       "  'RV': 0.2557647228240967}]"
+      ]
+     },
+     "execution_count": 88,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sig_level = 0.1\n",
+    "t_ind = 1\n",
+    "\n",
+    "results = []\n",
+    "for i in [1, 5, 15, 30]:\n",
+    "    theta, sigma, nu, sig = dr_sim(LogisticRegression(), LinearRegression(), 2, Y, T, X[:,:i])\n",
+    "    result_dict = {\n",
+    "        'i': i,\n",
+    "        'alpha': sig_level,\n",
+    "        't_ind': t_ind,\n",
+    "        'sensitivity_interval': sensitivity_interval(theta[t_ind], sigma[t_ind], nu[t_ind], sig[t_ind], sig_level, 0.6, 0.6, 1),\n",
+    "        'RV': RV(theta[t_ind], sigma[t_ind], nu[t_ind], sig[t_ind], sig_level)\n",
+    "    }\n",
+    "    results.append(result_dict)\n",
+    "                   \n",
+    "results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>i</th>\n",
+       "      <th>alpha</th>\n",
+       "      <th>t_ind</th>\n",
+       "      <th>sensitivity_interval</th>\n",
+       "      <th>RV</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>(-5.595590315127781, 3.648832519620443)</td>\n",
+       "      <td>0.171946</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>5</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>(-4.242302203189104, 2.1724266319271592)</td>\n",
+       "      <td>0.258314</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>15</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>(-4.252962106477528, 2.1979664010821613)</td>\n",
+       "      <td>0.255069</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>30</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>(-4.247614757215935, 2.1916647643905867)</td>\n",
+       "      <td>0.255765</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    i  alpha  t_ind                      sensitivity_interval        RV\n",
+       "0   1    0.1      1   (-5.595590315127781, 3.648832519620443)  0.171946\n",
+       "1   5    0.1      1  (-4.242302203189104, 2.1724266319271592)  0.258314\n",
+       "2  15    0.1      1  (-4.252962106477528, 2.1979664010821613)  0.255069\n",
+       "3  30    0.1      1  (-4.247614757215935, 2.1916647643905867)  0.255765"
+      ]
+     },
+     "execution_count": 89,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(\n",
+    "    pd.DataFrame(results)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# DoubleML"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import doubleml as dml\n",
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Y</th>\n",
+       "      <th>T</th>\n",
+       "      <th>X0</th>\n",
+       "      <th>X1</th>\n",
+       "      <th>X2</th>\n",
+       "      <th>X3</th>\n",
+       "      <th>X4</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.025162</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.215080</td>\n",
+       "      <td>0.139086</td>\n",
+       "      <td>-0.209000</td>\n",
+       "      <td>-0.014606</td>\n",
+       "      <td>-0.159073</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.576181</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-0.104622</td>\n",
+       "      <td>0.461541</td>\n",
+       "      <td>0.293773</td>\n",
+       "      <td>0.569858</td>\n",
+       "      <td>0.016678</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-1.676531</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.198441</td>\n",
+       "      <td>-0.015120</td>\n",
+       "      <td>-0.515492</td>\n",
+       "      <td>0.327456</td>\n",
+       "      <td>-0.367023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.059101</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-0.061682</td>\n",
+       "      <td>0.098117</td>\n",
+       "      <td>0.495383</td>\n",
+       "      <td>0.078905</td>\n",
+       "      <td>-0.341262</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2.764248</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.095669</td>\n",
+       "      <td>0.375082</td>\n",
+       "      <td>-0.053504</td>\n",
+       "      <td>-0.256279</td>\n",
+       "      <td>-0.076677</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          Y  T        X0        X1        X2        X3        X4\n",
+       "0  1.025162  1  0.215080  0.139086 -0.209000 -0.014606 -0.159073\n",
+       "1  0.576181  1 -0.104622  0.461541  0.293773  0.569858  0.016678\n",
+       "2 -1.676531  1  0.198441 -0.015120 -0.515492  0.327456 -0.367023\n",
+       "3  0.059101  1 -0.061682  0.098117  0.495383  0.078905 -0.341262\n",
+       "4  2.764248  1  0.095669  0.375082 -0.053504 -0.256279 -0.076677"
+      ]
+     },
+     "execution_count": 91,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.concat([\n",
+    "    pd.DataFrame(Y).squeeze().to_frame('Y'),\n",
+    "    pd.DataFrame(T).squeeze().to_frame('T'),\n",
+    "    pd.DataFrame(X).add_prefix('X'),\n",
+    "], axis=1)\n",
+    "\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<doubleml.double_ml_data.DoubleMLData at 0x1f0917cfca0>"
+      ]
+     },
+     "execution_count": 95,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dml_data = dml.DoubleMLData(df, 'Y', 'T')\n",
+    "dml_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Incompatible data. To fit an IRM model with DML exactly one binary variable with values 0 and 1 needs to be specified as treatment variable.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[96], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m dml_obj \u001b[38;5;241m=\u001b[39m \u001b[43mdml\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDoubleMLIRM\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdml_data\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m      2\u001b[0m \u001b[43m                          \u001b[49m\u001b[43mml_g\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mLinearRegression\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m      3\u001b[0m \u001b[43m                          \u001b[49m\u001b[43mml_m\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mLogisticRegression\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m      4\u001b[0m \u001b[43m                          \u001b[49m\u001b[43mn_folds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m      5\u001b[0m \u001b[43m                          \u001b[49m\u001b[43mscore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpartialling out\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m      6\u001b[0m dml_obj\u001b[38;5;241m.\u001b[39mfit()\n\u001b[0;32m      7\u001b[0m \u001b[38;5;28mprint\u001b[39m(dml_obj)\n",
+      "File \u001b[1;32mc:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\doubleml\\irm\\irm.py:134\u001b[0m, in \u001b[0;36mDoubleMLIRM.__init__\u001b[1;34m(self, obj_dml_data, ml_g, ml_m, n_folds, n_rep, score, weights, normalize_ipw, trimming_rule, trimming_threshold, draw_sample_splitting)\u001b[0m\n\u001b[0;32m    116\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m    117\u001b[0m              obj_dml_data,\n\u001b[0;32m    118\u001b[0m              ml_g,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    126\u001b[0m              trimming_threshold\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1e-2\u001b[39m,\n\u001b[0;32m    127\u001b[0m              draw_sample_splitting\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[0;32m    128\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(obj_dml_data,\n\u001b[0;32m    129\u001b[0m                      n_folds,\n\u001b[0;32m    130\u001b[0m                      n_rep,\n\u001b[0;32m    131\u001b[0m                      score,\n\u001b[0;32m    132\u001b[0m                      draw_sample_splitting)\n\u001b[1;32m--> 134\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_check_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dml_data\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    135\u001b[0m     valid_scores \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mATE\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mATTE\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m    136\u001b[0m     _check_score(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscore, valid_scores, allow_callable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
+      "File \u001b[1;32mc:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\doubleml\\irm\\irm.py:247\u001b[0m, in \u001b[0;36mDoubleMLIRM._check_data\u001b[1;34m(self, obj_dml_data)\u001b[0m\n\u001b[0;32m    245\u001b[0m zero_one_treat \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mall((np\u001b[38;5;241m.\u001b[39mpower(obj_dml_data\u001b[38;5;241m.\u001b[39md, \u001b[38;5;241m2\u001b[39m) \u001b[38;5;241m-\u001b[39m obj_dml_data\u001b[38;5;241m.\u001b[39md) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m)\n\u001b[0;32m    246\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (one_treat \u001b[38;5;241m&\u001b[39m binary_treat \u001b[38;5;241m&\u001b[39m zero_one_treat):\n\u001b[1;32m--> 247\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mIncompatible data. \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m    248\u001b[0m                      \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTo fit an IRM model with DML \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m    249\u001b[0m                      \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mexactly one binary variable with values 0 and 1 \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m    250\u001b[0m                      \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mneeds to be specified as treatment variable.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m    251\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n",
+      "\u001b[1;31mValueError\u001b[0m: Incompatible data. To fit an IRM model with DML exactly one binary variable with values 0 and 1 needs to be specified as treatment variable."
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj = dml.DoubleMLIRM(dml_data,\n",
+    "                          ml_g=LinearRegression(),\n",
+    "                          ml_m=LogisticRegression(),\n",
+    "                          n_folds=2,\n",
+    "                          score='partialling out',)\n",
+    "dml_obj.fit()\n",
+    "print(dml_obj)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== Sensitivity Analysis ==================\n",
+      "\n",
+      "------------------ Scenario          ------------------\n",
+      "Significance Level: level=0.95\n",
+      "Sensitivity parameters: cf_y=0.03; cf_d=0.03, rho=1.0\n",
+      "\n",
+      "------------------ Bounds with CI    ------------------\n",
+      "   CI lower  theta lower     theta  theta upper  CI upper\n",
+      "0 -0.471507       -0.446 -0.397119    -0.348237 -0.322857\n",
+      "\n",
+      "------------------ Robustness Values ------------------\n",
+      "   H_0     RV (%)    RVa (%)\n",
+      "0  0.0  21.873191  20.629383\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj.sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.)\n",
+    "print(dml_obj.sensitivity_summary)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "       cf_y      cf_d  rho  delta_theta\n",
+      "T  0.019825  0.000095 -1.0      -0.0022\n"
+     ]
+    }
+   ],
+   "source": [
+    "sens_benchmark = dml_obj.sensitivity_benchmark(benchmarking_set=[\"X4\"])\n",
+    "print(sens_benchmark)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# New datasets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### DoubleML confounded synthetic data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from doubleml.datasets import make_confounded_plr_data, make_confounded_irm_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\doubleml\\datasets.py:1050: UserWarning: Propensity score is close to 0 or 1. Trimming is at 0.01 and 0.99 is applied\n",
+      "  warnings.warn(f'Propensity score is close to 0 or 1. '\n"
+     ]
+    }
+   ],
+   "source": [
+    "cf_y = 0.1\n",
+    "cf_d = 0.1\n",
+    "theta = 5.0\n",
+    "dpg_dict = make_confounded_irm_data(n_obs=10000, cf_y=cf_y, cf_d=cf_d, theta=theta)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_cols = [f'X{i + 1}' for i in np.arange(dpg_dict['x'].shape[1])]\n",
+    "df = pd.DataFrame(np.column_stack((dpg_dict['x'], dpg_dict['y'], dpg_dict['d'])), columns=x_cols + ['y', 'd'])\n",
+    "dml_data = dml.DoubleMLData(df, 'y', 'd')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== DoubleMLIRM Object ==================\n",
+      "\n",
+      "------------------ Data summary      ------------------\n",
+      "Outcome variable: y\n",
+      "Treatment variable(s): ['d']\n",
+      "Covariates: ['X1', 'X2', 'X3', 'X4', 'X5']\n",
+      "Instrument variable(s): None\n",
+      "No. Observations: 10000\n",
+      "\n",
+      "------------------ Score & algorithm ------------------\n",
+      "Score function: ATE\n",
+      "\n",
+      "------------------ Machine learner   ------------------\n",
+      "Learner ml_g: RandomForestRegressor()\n",
+      "Learner ml_m: RandomForestClassifier()\n",
+      "Out-of-sample Performance:\n",
+      "Regression:\n",
+      "Learner ml_g0 RMSE: [[1.08528722]]\n",
+      "Learner ml_g1 RMSE: [[1.14348731]]\n",
+      "Classification:\n",
+      "Learner ml_m Log Loss: [[0.67196054]]\n",
+      "\n",
+      "------------------ Resampling        ------------------\n",
+      "No. folds: 2\n",
+      "No. repeated sample splits: 1\n",
+      "\n",
+      "------------------ Fit summary       ------------------\n",
+      "       coef   std err          t  P>|t|   2.5 %   97.5 %\n",
+      "d  5.145625  0.062208  82.716619    0.0  5.0237  5.26755\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\doubleml\\utils\\_checks.py:205: UserWarning: Propensity predictions from learner RandomForestClassifier() for ml_m are close to zero or one (eps=1e-12).\n",
+      "  warnings.warn(f'Propensity predictions from learner {str(learner)} for'\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj = dml.DoubleMLIRM(dml_data,\n",
+    "                          ml_g=RandomForestRegressor(),\n",
+    "                          ml_m=RandomForestClassifier(),\n",
+    "                          n_folds=2,\n",
+    "                          score='ATE',)\n",
+    "dml_obj.fit()\n",
+    "print(dml_obj)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== Sensitivity Analysis ==================\n",
+      "\n",
+      "------------------ Scenario          ------------------\n",
+      "Significance Level: level=0.95\n",
+      "Sensitivity parameters: cf_y=0.1; cf_d=0.1, rho=1.0\n",
+      "\n",
+      "------------------ Bounds with CI    ------------------\n",
+      "   CI lower  theta lower     theta  theta upper  CI upper\n",
+      "0  4.858139     4.961768  5.145625     5.329482  5.466168\n",
+      "\n",
+      "------------------ Robustness Values ------------------\n",
+      "   H_0     RV (%)    RVa (%)\n",
+      "0  0.0  90.573845  84.951031\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj.sensitivity_analysis(cf_y=0.1, cf_d=0.1, rho=1.)\n",
+    "print(dml_obj.sensitivity_summary)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'alpha': 0.05,\n",
+       " 'sensitivity_interval': (2.567683475272766, 7.648542718465433),\n",
+       " 'RV': 0.8172124624252319}"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sig_level=0.05\n",
+    "t_ind = 0\n",
+    "\n",
+    "theta, sigma, nu, sig = dr_sim(\n",
+    "    RandomForestClassifier(), \n",
+    "    RandomForestRegressor(), \n",
+    "    2, \n",
+    "    dpg_dict['y'],\n",
+    "    dpg_dict['d'].astype(int),\n",
+    "    dpg_dict['x']\n",
+    ")\n",
+    "\n",
+    "result_dict = {\n",
+    "    'alpha': sig_level,\n",
+    "    'sensitivity_interval': sensitivity_interval(theta[t_ind], sigma[t_ind], nu[t_ind], sig[t_ind], sig_level, 0.6, 0.6, 1),\n",
+    "    'RV': RV(theta[t_ind], sigma[t_ind], nu[t_ind], sig[t_ind], sig_level)\n",
+    "}\n",
+    "result_dict"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 401k data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dml_data = dml.datasets.fetch_401K()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "model selection for propensity model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RandomForestClassifier selected with score: 0.6435\n",
+      "401k data treatment model selection:\n",
+      "Best classifier: RandomForestClassifier(max_depth=10, n_estimators=200, random_state=42)\n",
+      "Best score: 0.6435\n",
+      "best rf score: 0.6435\n",
+      "best lr score: 0.6578\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import GridSearchCV\n",
+    "\n",
+    "# Define parameter grid for model selection\n",
+    "# Define parameter grid for model selection\n",
+    "param_grid = {\n",
+    "    'C': [0.1, 1, 10, 100]  # For LogisticRegression\n",
+    "}\n",
+    "\n",
+    "# First find the best LogisticRegression model\n",
+    "lr_grid_search = GridSearchCV(\n",
+    "    estimator=LogisticRegression(max_iter=1000),\n",
+    "    param_grid=param_grid,\n",
+    "    cv=3,\n",
+    "    scoring='accuracy'\n",
+    ")\n",
+    "lr_grid_search.fit(dml_data.x, dml_data.d.astype(int))\n",
+    "best_lr = lr_grid_search.best_estimator_\n",
+    "best_lr_score = lr_grid_search.best_score_\n",
+    "\n",
+    "# Find the best RandomForestClassifier\n",
+    "rf_param_grid = {\n",
+    "    'n_estimators': [50, 100, 200],\n",
+    "    'max_depth': [None, 10, 20]\n",
+    "}\n",
+    "rf_grid_search = GridSearchCV(\n",
+    "    estimator=RandomForestClassifier(random_state=42),\n",
+    "    param_grid=rf_param_grid,\n",
+    "    cv=3,\n",
+    "    scoring='accuracy'\n",
+    ")\n",
+    "rf_grid_search.fit(dml_data.x, dml_data.d.astype(int))\n",
+    "best_rf = rf_grid_search.best_estimator_\n",
+    "best_rf_score = rf_grid_search.best_score_\n",
+    "\n",
+    "# Choose the overall best model\n",
+    "if best_lr_score >= best_rf_score:\n",
+    "    grid_search = lr_grid_search\n",
+    "    print(f\"LogisticRegression selected with score: {best_lr_score:.4f}\")\n",
+    "else:\n",
+    "    grid_search = rf_grid_search\n",
+    "print(f\"RandomForestClassifier selected with score: {best_rf_score:.4f}\")\n",
+    "\n",
+    "# Use the best estimator from grid search\n",
+    "best_classifier = grid_search.best_estimator_\n",
+    "print('401k data treatment model selection:')\n",
+    "print(f\"Best classifier: {best_classifier}\")\n",
+    "print(f\"Best score: {grid_search.best_score_:.4f}\")\n",
+    "print(f\"best rf score: {best_rf_score:.4f}\")\n",
+    "print(f\"best lr score: {best_lr_score:.4f}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== DoubleMLIRM Object ==================\n",
+      "\n",
+      "------------------ Data summary      ------------------\n",
+      "Outcome variable: inuidur1\n",
+      "Treatment variable(s): ['tg']\n",
+      "Covariates: ['female', 'black', 'othrace', 'dep1', 'dep2', 'q2', 'q3', 'q4', 'q5', 'q6', 'agelt35', 'agegt54', 'durable', 'lusd', 'husd']\n",
+      "Instrument variable(s): None\n",
+      "No. Observations: 5099\n",
+      "\n",
+      "------------------ Score & algorithm ------------------\n",
+      "Score function: ATE\n",
+      "\n",
+      "------------------ Machine learner   ------------------\n",
+      "Learner ml_g: RandomForestRegressor()\n",
+      "Learner ml_m: RandomForestClassifier()\n",
+      "Out-of-sample Performance:\n",
+      "Regression:\n",
+      "Learner ml_g0 RMSE: [[1.2687313]]\n",
+      "Learner ml_g1 RMSE: [[1.30580821]]\n",
+      "Classification:\n",
+      "Learner ml_m Log Loss: [[0.73043429]]\n",
+      "\n",
+      "------------------ Resampling        ------------------\n",
+      "No. folds: 5\n",
+      "No. repeated sample splits: 1\n",
+      "\n",
+      "------------------ Fit summary       ------------------\n",
+      "        coef   std err         t     P>|t|     2.5 %    97.5 %\n",
+      "tg -0.080843  0.215406 -0.375307  0.707432 -0.503031  0.341344\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Propensity predictions from learner RandomForestClassifier() for ml_m are close to zero or one (eps=1e-12).\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj = dml.DoubleMLIRM(dml_data,\n",
+    "                          ml_g=RandomForestRegressor(),\n",
+    "                          ml_m=RandomForestClassifier(),\n",
+    "                        #   ml_m=best_classifier,\n",
+    "                          n_folds=5,\n",
+    "                          score='ATE')\n",
+    "dml_obj.fit()\n",
+    "print(dml_obj)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "sensitivity_elements sigma2 and nu2 have to be positive. Got sigma2 [[[1.64234643]]] and nu2 [[[-103.64744455]]]. Most likely this is due to low quality learners (especially propensity scores).",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[82], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mdml_obj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msensitivity_analysis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcf_y\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.04\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcf_d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.04\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrho\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1.\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(dml_obj\u001b[38;5;241m.\u001b[39msensitivity_summary)\n",
+      "File \u001b[1;32mc:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\doubleml\\double_ml.py:1419\u001b[0m, in \u001b[0;36mDoubleML.sensitivity_analysis\u001b[1;34m(self, cf_y, cf_d, rho, level, null_hypothesis)\u001b[0m\n\u001b[0;32m   1417\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_framework \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m   1418\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mApply fit() before sensitivity_analysis().\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m-> 1419\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_framework\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msensitivity_analysis\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1420\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcf_y\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcf_y\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1421\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcf_d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcf_d\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1422\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrho\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrho\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1423\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1424\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnull_hypothesis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnull_hypothesis\u001b[49m\n\u001b[0;32m   1425\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1427\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n",
+      "File \u001b[1;32mc:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\doubleml\\double_ml_framework.py:613\u001b[0m, in \u001b[0;36mDoubleMLFramework.sensitivity_analysis\u001b[1;34m(self, cf_y, cf_d, rho, level, null_hypothesis)\u001b[0m\n\u001b[0;32m    609\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnull_hypothesis has to be of type float or np.ndarry. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    610\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(null_hypothesis)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m of type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mtype\u001b[39m(null_hypothesis))\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m was passed.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    612\u001b[0m \u001b[38;5;66;03m# compute sensitivity analysis\u001b[39;00m\n\u001b[1;32m--> 613\u001b[0m sensitivity_dict \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_calc_sensitivity_analysis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcf_y\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcf_y\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcf_d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcf_d\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrho\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrho\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    615\u001b[0m \u001b[38;5;66;03m# compute robustess values with respect to null_hypothesis\u001b[39;00m\n\u001b[0;32m    616\u001b[0m rv \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mfull(shape\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_n_thetas, fill_value\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mnan)\n",
+      "File \u001b[1;32mc:\\Users\\fabiovera\\AppData\\Local\\anaconda3\\envs\\dev_env2\\lib\\site-packages\\doubleml\\double_ml_framework.py:462\u001b[0m, in \u001b[0;36mDoubleMLFramework._calc_sensitivity_analysis\u001b[1;34m(self, cf_y, cf_d, rho, level)\u001b[0m\n\u001b[0;32m    459\u001b[0m psi_scaled \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_scaled_psi\n\u001b[0;32m    461\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (np\u001b[38;5;241m.\u001b[39many(sigma2 \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0\u001b[39m)) \u001b[38;5;241m|\u001b[39m (np\u001b[38;5;241m.\u001b[39many(nu2 \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0\u001b[39m)):\n\u001b[1;32m--> 462\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msensitivity_elements sigma2 and nu2 have to be positive. \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m    463\u001b[0m                      \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGot sigma2 \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(sigma2)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and nu2 \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(nu2)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    464\u001b[0m                      \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMost likely this is due to low quality learners (especially propensity scores).\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m    466\u001b[0m \u001b[38;5;66;03m# elementwise operations\u001b[39;00m\n\u001b[0;32m    467\u001b[0m confounding_strength \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mmultiply(np\u001b[38;5;241m.\u001b[39mabs(rho), np\u001b[38;5;241m.\u001b[39msqrt(np\u001b[38;5;241m.\u001b[39mmultiply(cf_y, np\u001b[38;5;241m.\u001b[39mdivide(cf_d, \u001b[38;5;241m1.0\u001b[39m\u001b[38;5;241m-\u001b[39mcf_d))))\n",
+      "\u001b[1;31mValueError\u001b[0m: sensitivity_elements sigma2 and nu2 have to be positive. Got sigma2 [[[1.64234643]]] and nu2 [[[-103.64744455]]]. Most likely this is due to low quality learners (especially propensity scores)."
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj.sensitivity_analysis(cf_y=0.04, cf_d=0.04, rho=1.)\n",
+    "print(dml_obj.sensitivity_summary)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'alpha': 0.05,\n",
+       " 'sensitivity_interval': (-0.15765123322817087, 0.01212238369880464),\n",
+       " 'RV': 0}"
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sig_level=0.05\n",
+    "t_ind = 0\n",
+    "\n",
+    "theta, sigma, nu, sig = dr_sim(\n",
+    "    best_classifier, \n",
+    "    RandomForestRegressor(), \n",
+    "    5, \n",
+    "    dml_data.y,\n",
+    "    dml_data.d.astype(int),\n",
+    "    dml_data.x\n",
+    ")\n",
+    "\n",
+    "result_dict = {\n",
+    "    'alpha': sig_level,\n",
+    "    'sensitivity_interval': sensitivity_interval(theta[t_ind], sigma[t_ind], nu[t_ind], sig[t_ind], sig_level, 0.00001, 0.0001, 1),\n",
+    "    'RV': RV(theta[t_ind], sigma[t_ind], nu[t_ind], sig[t_ind], sig_level)\n",
+    "}\n",
+    "result_dict"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### bonus data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dml_data = dml.datasets.fetch_bonus()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "model selection for propensity model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LogisticRegression selected with score: -0.6442\n",
+      "RandomForestClassifier selected with score: -0.6624\n",
+      "bonus data treatment model selection:\n",
+      "Best classifier: LogisticRegression(C=0.1, max_iter=1000)\n",
+      "Best score: -0.6442\n",
+      "best rf score: -0.6624\n",
+      "best lr score: -0.6442\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import GridSearchCV\n",
+    "\n",
+    "# Define parameter grid for model selection\n",
+    "# Define parameter grid for model selection\n",
+    "param_grid = {\n",
+    "    'C': [0.1, 1, 10, 100]  # For LogisticRegression\n",
+    "}\n",
+    "\n",
+    "# First find the best LogisticRegression model\n",
+    "lr_grid_search = GridSearchCV(\n",
+    "    estimator=LogisticRegression(max_iter=1000),\n",
+    "    param_grid=param_grid,\n",
+    "    cv=3,\n",
+    "    scoring='neg_log_loss'\n",
+    ")\n",
+    "lr_grid_search.fit(dml_data.x, dml_data.d.astype(int))\n",
+    "best_lr = lr_grid_search.best_estimator_\n",
+    "best_lr_score = lr_grid_search.best_score_\n",
+    "\n",
+    "# Find the best RandomForestClassifier\n",
+    "rf_param_grid = {\n",
+    "    'n_estimators': [50, 100, 200],\n",
+    "    'max_depth': [None, 10, 20]\n",
+    "}\n",
+    "rf_grid_search = GridSearchCV(\n",
+    "    estimator=RandomForestClassifier(random_state=42),\n",
+    "    param_grid=rf_param_grid,\n",
+    "    cv=3,\n",
+    "    scoring='neg_log_loss'\n",
+    ")\n",
+    "rf_grid_search.fit(dml_data.x, dml_data.d.astype(int))\n",
+    "best_rf = rf_grid_search.best_estimator_\n",
+    "best_rf_score = rf_grid_search.best_score_\n",
+    "\n",
+    "# Choose the overall best model\n",
+    "if best_lr_score >= best_rf_score:\n",
+    "    grid_search = lr_grid_search\n",
+    "    print(f\"LogisticRegression selected with score: {best_lr_score:.4f}\")\n",
+    "else:\n",
+    "    grid_search = rf_grid_search\n",
+    "print(f\"RandomForestClassifier selected with score: {best_rf_score:.4f}\")\n",
+    "\n",
+    "# Use the best estimator from grid search\n",
+    "best_classifier = grid_search.best_estimator_\n",
+    "print('bonus data treatment model selection:')\n",
+    "print(f\"Best classifier: {best_classifier}\")\n",
+    "print(f\"Best score: {grid_search.best_score_:.4f}\")\n",
+    "print(f\"best rf score: {best_rf_score:.4f}\")\n",
+    "print(f\"best lr score: {best_lr_score:.4f}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== DoubleMLIRM Object ==================\n",
+      "\n",
+      "------------------ Data summary      ------------------\n",
+      "Outcome variable: inuidur1\n",
+      "Treatment variable(s): ['tg']\n",
+      "Covariates: ['female', 'black', 'othrace', 'dep1', 'dep2', 'q2', 'q3', 'q4', 'q5', 'q6', 'agelt35', 'agegt54', 'durable', 'lusd', 'husd']\n",
+      "Instrument variable(s): None\n",
+      "No. Observations: 5099\n",
+      "\n",
+      "------------------ Score & algorithm ------------------\n",
+      "Score function: ATE\n",
+      "\n",
+      "------------------ Machine learner   ------------------\n",
+      "Learner ml_g: RandomForestRegressor()\n",
+      "Learner ml_m: LogisticRegression(C=0.1, max_iter=1000)\n",
+      "Out-of-sample Performance:\n",
+      "Regression:\n",
+      "Learner ml_g0 RMSE: [[1.29747417]]\n",
+      "Learner ml_g1 RMSE: [[1.33043293]]\n",
+      "Classification:\n",
+      "Learner ml_m Log Loss: [[0.64522511]]\n",
+      "\n",
+      "------------------ Resampling        ------------------\n",
+      "No. folds: 2\n",
+      "No. repeated sample splits: 1\n",
+      "\n",
+      "------------------ Fit summary       ------------------\n",
+      "        coef   std err         t     P>|t|     2.5 %    97.5 %\n",
+      "tg -0.059058  0.038039 -1.552562  0.120528 -0.133613  0.015497\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj = dml.DoubleMLIRM(dml_data,\n",
+    "                          ml_g=RandomForestRegressor(),\n",
+    "                          ml_m=best_classifier,\n",
+    "                          n_folds=2,\n",
+    "                          score='ATE',)\n",
+    "dml_obj.fit()\n",
+    "print(dml_obj)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================== Sensitivity Analysis ==================\n",
+      "\n",
+      "------------------ Scenario          ------------------\n",
+      "Significance Level: level=0.95\n",
+      "Sensitivity parameters: cf_y=0.0; cf_d=0.0, rho=1.0\n",
+      "\n",
+      "------------------ Bounds with CI    ------------------\n",
+      "   CI lower  theta lower     theta  theta upper  CI upper\n",
+      "0 -0.121627    -0.059058 -0.059058    -0.059058  0.003511\n",
+      "\n",
+      "------------------ Robustness Values ------------------\n",
+      "   H_0    RV (%)   RVa (%)\n",
+      "0  0.0  2.128758  0.000434\n"
+     ]
+    }
+   ],
+   "source": [
+    "dml_obj.sensitivity_analysis(cf_y=0.04, cf_d=0.04, rho=1.)\n",
+    "print(dml_obj.sensitivity_summary)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'alpha': 0.05,\n",
+       " 'sensitivity_interval': (-0.29151616548820364, 0.1365988298703495),\n",
+       " 'RV': 0}"
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sig_level=0.05\n",
+    "t_ind = 0\n",
+    "\n",
+    "theta, sigma, nu, sig = dr_sim(\n",
+    "    best_classifier, \n",
+    "    RandomForestRegressor(), \n",
+    "    5, \n",
+    "    dml_data.y,\n",
+    "    dml_data.d.astype(int),\n",
+    "    dml_data.x\n",
+    ")\n",
+    "\n",
+    "result_dict = {\n",
+    "    'alpha': sig_level,\n",
+    "    'sensitivity_interval': sensitivity_interval(theta[t_ind], sigma[t_ind], nu[t_ind], sig[t_ind], sig_level, 0.05, 0.05, 1),\n",
+    "    'RV': RV(theta[t_ind], sigma[t_ind], nu[t_ind], sig[t_ind], sig_level)\n",
+    "}\n",
+    "result_dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "dev_env2",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}