py-why · fverac · May 22, 2025 · Apr 11, 2025 · Apr 17, 2025 · Apr 22, 2025
diff --git a/econml/dml/causal_forest.py b/econml/dml/causal_forest.py
@@ -17,7 +17,8 @@
 from .._cate_estimator import LinearCateEstimator
 from .._shap import _shap_explain_multitask_model_cate
 from .._ortho_learner import _OrthoLearner
-from ..validate import sensitivity_interval, RV, dml_sensitivity_values
+from ..validate.sensitivity_analysis import (sensitivity_interval, RV, dml_sensitivity_values,
+                                             sensitivity_summary)
 
 
 class _CausalForestFinalWrapper:
@@ -817,6 +818,35 @@ def tune(self, Y, T, *, X=None, W=None,
 
         return self
 
+    def sensitivity_summary(self, null_hypothesis=0, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., decimals=3):
+        """
+        Generate a summary of the sensitivity analysis for the ATE.
+
+        Parameters
+        ----------
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
+        alpha: float, default 0.05
+            The significance level for the sensitivity interval.
+
+        c_y: float, default 0.05
+            The level of confounding in the outcome. Ranges from 0 to 1.
+
+        c_d: float, default 0.05
+            The level of confounding in the treatment. Ranges from 0 to 1.
+
+        decimals: int, default 3
+            Number of decimal places to round each column to.
+
+        """
+        if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
+            raise ValueError(
+                "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
+        sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
+        return sensitivity_summary(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis, alpha=alpha,
+                                    c_y=c_y, c_t=c_t, rho=rho, decimals=decimals)
+
     def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
         """
         Calculate the sensitivity interval for the ATE.
@@ -851,25 +881,32 @@ def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_
             raise ValueError(
                 "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
         sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
-        return sensitivity_interval(**sensitivity_params, alpha=alpha,
+        return sensitivity_interval(**sensitivity_params._asdict(), alpha=alpha,
                                     c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)
 
-    def robustness_value(self, alpha=0.05, interval_type='ci'):
+    def robustness_value(self, null_hypothesis=0, alpha=0.05, interval_type='ci'):
         """
         Calculate the robustness value for the ATE.
 
         The robustness value is the level of confounding (between 0 and 1) in
-        *both* the treatment and outcome that would make
-        the ATE not statistically significant. A higher value indicates
-        a more robust estimate.
-        Returns 0 if the original interval already includes zero.
+        *both* the treatment and outcome that would result in enough omitted variable bias such that
+        we can no longer reject the null hypothesis. When null_hypothesis is the default of 0, the robustness value
+        has the interpretation that it is the level of confounding that would make the
+        ATE statistically insignificant.
+
+        A higher value indicates a more robust estimate.
+
+        Returns 0 if the original interval already includes the null_hypothesis.
 
         Can only be calculated when Y and T are single arrays, and T is binary or continuous.
 
         Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_
 
         Parameters
         ----------
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
         alpha: float, default 0.05
             The significance level for the robustness value.
 
@@ -885,7 +922,8 @@ def robustness_value(self, alpha=0.05, interval_type='ci'):
             raise ValueError(
                 "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
         sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
-        return RV(**sensitivity_params, alpha=alpha, interval_type=interval_type)
+        return RV(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis,
+                  alpha=alpha, interval_type=interval_type)
 
     # override only so that we can update the docstring to indicate support for `blb`
     def fit(self, Y, T, *, X=None, W=None, sample_weight=None, groups=None,

diff --git a/econml/dml/dml.py b/econml/dml/dml.py
@@ -25,7 +25,8 @@
                          shape, get_feature_names_or_default, filter_none_kwargs)
 from .._shap import _shap_explain_model_cate
 from ..sklearn_extensions.model_selection import get_selector, SingleModelSelector
-from ..validate import sensitivity_interval, RV, dml_sensitivity_values
+from ..validate.sensitivity_analysis import (sensitivity_interval, RV, dml_sensitivity_values,
+                                             sensitivity_summary)
 
 
 def _combine(X, W, n_samples):
@@ -606,6 +607,36 @@ def bias_part_of_coef(self):
     def fit_cate_intercept_(self):
         return self.rlearner_model_final_._fit_cate_intercept
 
+    def sensitivity_summary(self, null_hypothesis=0, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., decimals=3):
+        """
+        Generate a summary of the sensitivity analysis for the ATE.
+
+        Parameters
+        ----------
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
+        alpha: float, default 0.05
+            The significance level for the sensitivity interval.
+
+        c_y: float, default 0.05
+            The level of confounding in the outcome. Ranges from 0 to 1.
+
+        c_d: float, default 0.05
+            The level of confounding in the treatment. Ranges from 0 to 1.
+
+        decimals: int, default 3
+            Number of decimal places to round each column to.
+
+        """
+        if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
+            raise ValueError(
+                "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
+        sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
+        return sensitivity_summary(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis, alpha=alpha,
+                                    c_y=c_y, c_t=c_t, rho=rho, decimals=decimals)
+
+
     def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
         """
         Calculate the sensitivity interval for the ATE.
@@ -640,25 +671,32 @@ def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_
             raise ValueError(
                 "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
         sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
-        return sensitivity_interval(**sensitivity_params, alpha=alpha,
+        return sensitivity_interval(**sensitivity_params._asdict(), alpha=alpha,
                                     c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)
 
-    def robustness_value(self, alpha=0.05, interval_type='ci'):
+    def robustness_value(self, null_hypothesis=0, alpha=0.05, interval_type='ci'):
         """
         Calculate the robustness value for the ATE.
 
         The robustness value is the level of confounding (between 0 and 1) in
-        *both* the treatment and outcome that would make
-        the ATE not statistically significant. A higher value indicates
-        a more robust estimate.
-        Returns 0 if the original interval already includes zero.
+        *both* the treatment and outcome that would result in enough omitted variable bias such that
+        we can no longer reject the null hypothesis. When null_hypothesis is the default of 0, the robustness value
+        has the interpretation that it is the level of confounding that would make the
+        ATE statistically insignificant.
+
+        A higher value indicates a more robust estimate.
+
+        Returns 0 if the original interval already includes the null_hypothesis.
 
         Can only be calculated when Y and T are single arrays, and T is binary or continuous.
 
         Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_
 
         Parameters
         ----------
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
         alpha: float, default 0.05
             The significance level for the robustness value.
 
@@ -674,7 +712,8 @@ def robustness_value(self, alpha=0.05, interval_type='ci'):
             raise ValueError(
                 "Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
         sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
-        return RV(**sensitivity_params, alpha=alpha, interval_type=interval_type)
+        return RV(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis,
+                  alpha=alpha, interval_type=interval_type)
 
 
 class LinearDML(StatsModelsCateEstimatorMixin, DML):

diff --git a/econml/dr/_drlearner.py b/econml/dr/_drlearner.py
@@ -52,7 +52,8 @@
 from ..utilities import (check_high_dimensional,
                          filter_none_kwargs, inverse_onehot, get_feature_names_or_default)
 from .._shap import _shap_explain_multitask_model_cate, _shap_explain_model_cate
-from ..validate import sensitivity_interval, RV, dr_sensitivity_values
+from ..validate.sensitivity_analysis import (sensitivity_interval, RV,
+                                             sensitivity_summary, dr_sensitivity_values)
 
 
 class _ModelNuisance(ModelSelector):
@@ -756,6 +757,39 @@ def shap_values(self, X, *, feature_names=None, treatment_names=None, output_nam
                                             background_samples=background_samples)
     shap_values.__doc__ = LinearCateEstimator.shap_values.__doc__
 
+    def sensitivity_summary(self, T, target=0, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., decimals=3):
+        """
+        Generate a summary of the sensitivity analysis for the ATE for a given treatment.
+
+        Parameters
+        ----------
+        target: float, default 0
+            The target value for the ATE.
+
+        alpha: float, default 0.05
+            The significance level for the sensitivity interval.
+
+        c_y: float, default 0.05
+            The level of confounding in the outcome. Ranges from 0 to 1.
+
+        c_d: float, default 0.05
+            The level of confounding in the treatment. Ranges from 0 to 1.
+
+        decimals: int, default 3
+            Number of decimal places to round each column to.
+
+        """
+        if T not in self.transformer.categories_[0]:
+            # raise own ValueError here because sometimes error from sklearn is not transparent
+            raise ValueError(f"Treatment {T} not in the list of treatments {self.transformer.categories_[0]}")
+        _, T = self._expand_treatments(None, T)
+        T_ind = inverse_onehot(T).item() - 1
+        assert T_ind >= 0, "No model was fitted for the control"
+        sensitivity_params = {
+            k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params._asdict().items()}
+        return sensitivity_summary(**sensitivity_params, target=target, alpha=alpha,
+                                    c_y=c_y, c_t=c_t, rho=rho, decimals=decimals)
+
     def sensitivity_interval(self, T, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
         """
         Calculate the sensitivity interval for the ATE for a given treatment category.
@@ -793,20 +827,25 @@ def sensitivity_interval(self, T, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interv
         _, T = self._expand_treatments(None, T)
         T_ind = inverse_onehot(T).item() - 1
         assert T_ind >= 0, "No model was fitted for the control"
-        sensitivity_params = {k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params.items()}
+        sensitivity_params = {
+            k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params._asdict().items()}
         return sensitivity_interval(**sensitivity_params, alpha=alpha,
                                     c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)
 
 
-    def robustness_value(self, T, alpha=0.05, interval_type='ci'):
+    def robustness_value(self, T, null_hypothesis=0, alpha=0.05, interval_type='ci'):
         """
         Calculate the robustness value for the ATE for a given treatment category.
 
         The robustness value is the level of confounding (between 0 and 1) in
-        *both* the treatment and outcome that would make
-        the ATE not statistically significant. A higher value indicates
-        a more robust estimate.
-        Returns 0 if the original interval already includes zero.
+        *both* the treatment and outcome that would result in enough omitted variable bias such that
+        we can no longer reject the null hypothesis. When null_hypothesis is the default of 0, the robustness value
+        has the interpretation that it is the level of confounding that would make the
+        ATE statistically insignificant.
+
+        A higher value indicates a more robust estimate.
+
+        Returns 0 if the original interval already includes the null_hypothesis.
 
         Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_
 
@@ -815,6 +854,9 @@ def robustness_value(self, T, alpha=0.05, interval_type='ci'):
         T: alphanumeric
             The treatment with respect to calculate the robustness value.
 
+        null_hypothesis: float, default 0
+            The null_hypothesis value for the ATE.
+
         alpha: float, default 0.05
             The significance level for the robustness value.
 
@@ -832,8 +874,9 @@ def robustness_value(self, T, alpha=0.05, interval_type='ci'):
         _, T = self._expand_treatments(None, T)
         T_ind = inverse_onehot(T).item() - 1
         assert T_ind >= 0, "No model was fitted for the control"
-        sensitivity_params = {k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params.items()}
-        return RV(**sensitivity_params, alpha=alpha, interval_type=interval_type)
+        sensitivity_params = {
+            k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params._asdict().items()}
+        return RV(**sensitivity_params, null_hypothesis=null_hypothesis, alpha=alpha, interval_type=interval_type)
 
 
 class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):

diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py
@@ -259,6 +259,7 @@ def make_random(n, is_discrete, d):
                                                 else:
                                                     est.sensitivity_interval()
                                                     est.robustness_value()
+                                                    est.sensitivity_summary()
 
                                                 if inf is not None:
                                                     const_marg_eff_int = est.const_marginal_effect_interval(X)

diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py
@@ -148,20 +148,28 @@ def make_random(is_discrete, d):
                                     est.sensitivity_interval(T='c')
                                     est.robustness_value(T='c')
 
+                                    est.sensitivity_summary(T='b')
+                                    est.sensitivity_summary(T='c')
+
                                     # ensure sensitivity analysis fails on control
                                     with pytest.raises(AssertionError):
                                         est.sensitivity_interval(T='a')
 
                                     with pytest.raises(AssertionError):
                                         est.robustness_value(T='a')
 
+                                    with pytest.raises(AssertionError):
+                                        est.sensitivity_summary(T='a')
+
                                     # ensure failure on unknown treatment values
                                     with pytest.raises(ValueError):
                                         est.sensitivity_interval(T=1)
 
                                     with pytest.raises(ValueError):
                                         est.robustness_value(T=1)
 
+                                    with pytest.raises(ValueError):
+                                        est.sensitivity_summary(T=1)
 
 
                                     # make sure we can call the marginal_effect and effect methods

diff --git a/econml/tests/test_sensitivity_analysis.py b/econml/tests/test_sensitivity_analysis.py
@@ -6,6 +6,7 @@
 
 from econml.dml import LinearDML, CausalForestDML
 from econml.dr import LinearDRLearner
+from econml.validate.sensitivity_analysis import sensitivity_interval
 from sklearn.linear_model import LinearRegression, LogisticRegression
 import numpy as np
 
@@ -92,7 +93,41 @@ def test_params(self):
             rv4 = est.robustness_value(**T_arg, alpha=0.05, interval_type='theta')
             self.assertTrue(rv4 > rv)
 
-            # ensure failure on invalid interval_type
-            with pytest.raises(ValueError):
-                est.sensitivity_interval(**T_arg, alpha=0.05,
-                                         c_y=0.05, c_t=0.05, rho=1, interval_type='foo')
+            # check that null_hypothesis is passed through
+            rv5 = est.robustness_value(**T_arg, alpha=0.05, null_hypothesis=10)
+            self.assertNotEqual(rv5, rv)
+
+
+    def test_invalid_params(self):
+
+        theta = 0.5
+        sigma = 0.5
+        nu = 0.5
+        cov = np.random.normal(size=(3, 3))
+
+        sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1)
+
+        # check that c_y, c_y, rho are constrained
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=-0.5, c_t=0.05, rho=1)
+
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=-0.5, rho=1)
+
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=1.5, c_t=0.05, rho=1)
+
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=-1.5)
+
+        # ensure we raise an error on invalid sigma, nu
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, -1, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1)
+
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma,-1, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1)
+
+        # ensure failure on invalid interval_type
+        with pytest.raises(ValueError):
+            sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1, interval_type='foo')
+
diff --git a/econml/validate/__init__.py b/econml/validate/__init__.py
@@ -5,8 +5,6 @@
 
 from .drtester import DRTester
 from .results import BLPEvaluationResults, CalibrationEvaluationResults, UpliftEvaluationResults, EvaluationResults
-from .sensitivity_analysis import sensitivity_interval, RV, dml_sensitivity_values, dr_sensitivity_values
 
 __all__ = ['DRTester',
-           'BLPEvaluationResults', 'CalibrationEvaluationResults', 'UpliftEvaluationResults', 'EvaluationResults',
-           'sensitivity_interval', 'RV', 'dml_sensitivity_values', 'dr_sensitivity_values']
+           'BLPEvaluationResults', 'CalibrationEvaluationResults', 'UpliftEvaluationResults', 'EvaluationResults']