Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 46 additions & 8 deletions econml/dml/causal_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from .._cate_estimator import LinearCateEstimator
from .._shap import _shap_explain_multitask_model_cate
from .._ortho_learner import _OrthoLearner
from ..validate import sensitivity_interval, RV, dml_sensitivity_values
from ..validate.sensitivity_analysis import (sensitivity_interval, RV, dml_sensitivity_values,
sensitivity_summary)


class _CausalForestFinalWrapper:
Expand Down Expand Up @@ -817,6 +818,35 @@ def tune(self, Y, T, *, X=None, W=None,

return self

def sensitivity_summary(self, null_hypothesis=0, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., decimals=3):
"""
Generate a summary of the sensitivity analysis for the ATE.

Parameters
----------
null_hypothesis: float, default 0
The null_hypothesis value for the ATE.

alpha: float, default 0.05
The significance level for the sensitivity interval.

c_y: float, default 0.05
The level of confounding in the outcome. Ranges from 0 to 1.

c_d: float, default 0.05
The level of confounding in the treatment. Ranges from 0 to 1.

decimals: int, default 3
Number of decimal places to round each column to.

"""
if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
raise ValueError(
"Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
return sensitivity_summary(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis, alpha=alpha,
c_y=c_y, c_t=c_t, rho=rho, decimals=decimals)

def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
"""
Calculate the sensitivity interval for the ATE.
Expand Down Expand Up @@ -851,25 +881,32 @@ def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_
raise ValueError(
"Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
return sensitivity_interval(**sensitivity_params, alpha=alpha,
return sensitivity_interval(**sensitivity_params._asdict(), alpha=alpha,
c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)

def robustness_value(self, alpha=0.05, interval_type='ci'):
def robustness_value(self, null_hypothesis=0, alpha=0.05, interval_type='ci'):
"""
Calculate the robustness value for the ATE.

The robustness value is the level of confounding (between 0 and 1) in
*both* the treatment and outcome that would make
the ATE not statistically significant. A higher value indicates
a more robust estimate.
Returns 0 if the original interval already includes zero.
*both* the treatment and outcome that would result in enough omitted variable bias such that
we can no longer reject the null hypothesis. When null_hypothesis is the default of 0, the robustness value
has the interpretation that it is the level of confounding that would make the
ATE statistically insignificant.

A higher value indicates a more robust estimate.

Returns 0 if the original interval already includes the null_hypothesis.

Can only be calculated when Y and T are single arrays, and T is binary or continuous.

Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_

Parameters
----------
null_hypothesis: float, default 0
The null_hypothesis value for the ATE.

alpha: float, default 0.05
The significance level for the robustness value.

Expand All @@ -885,7 +922,8 @@ def robustness_value(self, alpha=0.05, interval_type='ci'):
raise ValueError(
"Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
return RV(**sensitivity_params, alpha=alpha, interval_type=interval_type)
return RV(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis,
alpha=alpha, interval_type=interval_type)

# override only so that we can update the docstring to indicate support for `blb`
def fit(self, Y, T, *, X=None, W=None, sample_weight=None, groups=None,
Expand Down
55 changes: 47 additions & 8 deletions econml/dml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
shape, get_feature_names_or_default, filter_none_kwargs)
from .._shap import _shap_explain_model_cate
from ..sklearn_extensions.model_selection import get_selector, SingleModelSelector
from ..validate import sensitivity_interval, RV, dml_sensitivity_values
from ..validate.sensitivity_analysis import (sensitivity_interval, RV, dml_sensitivity_values,
sensitivity_summary)


def _combine(X, W, n_samples):
Expand Down Expand Up @@ -606,6 +607,36 @@ def bias_part_of_coef(self):
def fit_cate_intercept_(self):
return self.rlearner_model_final_._fit_cate_intercept

def sensitivity_summary(self, null_hypothesis=0, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., decimals=3):
"""
Generate a summary of the sensitivity analysis for the ATE.

Parameters
----------
null_hypothesis: float, default 0
The null_hypothesis value for the ATE.

alpha: float, default 0.05
The significance level for the sensitivity interval.

c_y: float, default 0.05
The level of confounding in the outcome. Ranges from 0 to 1.

c_d: float, default 0.05
The level of confounding in the treatment. Ranges from 0 to 1.

decimals: int, default 3
Number of decimal places to round each column to.

"""
if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
raise ValueError(
"Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
return sensitivity_summary(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis, alpha=alpha,
c_y=c_y, c_t=c_t, rho=rho, decimals=decimals)


def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
"""
Calculate the sensitivity interval for the ATE.
Expand Down Expand Up @@ -640,25 +671,32 @@ def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_
raise ValueError(
"Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
return sensitivity_interval(**sensitivity_params, alpha=alpha,
return sensitivity_interval(**sensitivity_params._asdict(), alpha=alpha,
c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)

def robustness_value(self, alpha=0.05, interval_type='ci'):
def robustness_value(self, null_hypothesis=0, alpha=0.05, interval_type='ci'):
"""
Calculate the robustness value for the ATE.

The robustness value is the level of confounding (between 0 and 1) in
*both* the treatment and outcome that would make
the ATE not statistically significant. A higher value indicates
a more robust estimate.
Returns 0 if the original interval already includes zero.
*both* the treatment and outcome that would result in enough omitted variable bias such that
we can no longer reject the null hypothesis. When null_hypothesis is the default of 0, the robustness value
has the interpretation that it is the level of confounding that would make the
ATE statistically insignificant.

A higher value indicates a more robust estimate.

Returns 0 if the original interval already includes the null_hypothesis.

Can only be calculated when Y and T are single arrays, and T is binary or continuous.

Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_

Parameters
----------
null_hypothesis: float, default 0
The null_hypothesis value for the ATE.

alpha: float, default 0.05
The significance level for the robustness value.

Expand All @@ -674,7 +712,8 @@ def robustness_value(self, alpha=0.05, interval_type='ci'):
raise ValueError(
"Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
return RV(**sensitivity_params, alpha=alpha, interval_type=interval_type)
return RV(**sensitivity_params._asdict(), null_hypothesis=null_hypothesis,
alpha=alpha, interval_type=interval_type)


class LinearDML(StatsModelsCateEstimatorMixin, DML):
Expand Down
61 changes: 52 additions & 9 deletions econml/dr/_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
from ..utilities import (check_high_dimensional,
filter_none_kwargs, inverse_onehot, get_feature_names_or_default)
from .._shap import _shap_explain_multitask_model_cate, _shap_explain_model_cate
from ..validate import sensitivity_interval, RV, dr_sensitivity_values
from ..validate.sensitivity_analysis import (sensitivity_interval, RV,
sensitivity_summary, dr_sensitivity_values)


class _ModelNuisance(ModelSelector):
Expand Down Expand Up @@ -756,6 +757,39 @@ def shap_values(self, X, *, feature_names=None, treatment_names=None, output_nam
background_samples=background_samples)
shap_values.__doc__ = LinearCateEstimator.shap_values.__doc__

def sensitivity_summary(self, T, target=0, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., decimals=3):
"""
Generate a summary of the sensitivity analysis for the ATE for a given treatment.

Parameters
----------
target: float, default 0
The target value for the ATE.

alpha: float, default 0.05
The significance level for the sensitivity interval.

c_y: float, default 0.05
The level of confounding in the outcome. Ranges from 0 to 1.

c_d: float, default 0.05
The level of confounding in the treatment. Ranges from 0 to 1.

decimals: int, default 3
Number of decimal places to round each column to.

"""
if T not in self.transformer.categories_[0]:
# raise own ValueError here because sometimes error from sklearn is not transparent
raise ValueError(f"Treatment {T} not in the list of treatments {self.transformer.categories_[0]}")
_, T = self._expand_treatments(None, T)
T_ind = inverse_onehot(T).item() - 1
assert T_ind >= 0, "No model was fitted for the control"
sensitivity_params = {
k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params._asdict().items()}
return sensitivity_summary(**sensitivity_params, target=target, alpha=alpha,
c_y=c_y, c_t=c_t, rho=rho, decimals=decimals)

def sensitivity_interval(self, T, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
"""
Calculate the sensitivity interval for the ATE for a given treatment category.
Expand Down Expand Up @@ -793,20 +827,25 @@ def sensitivity_interval(self, T, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interv
_, T = self._expand_treatments(None, T)
T_ind = inverse_onehot(T).item() - 1
assert T_ind >= 0, "No model was fitted for the control"
sensitivity_params = {k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params.items()}
sensitivity_params = {
k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params._asdict().items()}
return sensitivity_interval(**sensitivity_params, alpha=alpha,
c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)


def robustness_value(self, T, alpha=0.05, interval_type='ci'):
def robustness_value(self, T, null_hypothesis=0, alpha=0.05, interval_type='ci'):
"""
Calculate the robustness value for the ATE for a given treatment category.

The robustness value is the level of confounding (between 0 and 1) in
*both* the treatment and outcome that would make
the ATE not statistically significant. A higher value indicates
a more robust estimate.
Returns 0 if the original interval already includes zero.
*both* the treatment and outcome that would result in enough omitted variable bias such that
we can no longer reject the null hypothesis. When null_hypothesis is the default of 0, the robustness value
has the interpretation that it is the level of confounding that would make the
ATE statistically insignificant.

A higher value indicates a more robust estimate.

Returns 0 if the original interval already includes the null_hypothesis.

Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_

Expand All @@ -815,6 +854,9 @@ def robustness_value(self, T, alpha=0.05, interval_type='ci'):
T: alphanumeric
The treatment with respect to calculate the robustness value.

null_hypothesis: float, default 0
The null_hypothesis value for the ATE.

alpha: float, default 0.05
The significance level for the robustness value.

Expand All @@ -832,8 +874,9 @@ def robustness_value(self, T, alpha=0.05, interval_type='ci'):
_, T = self._expand_treatments(None, T)
T_ind = inverse_onehot(T).item() - 1
assert T_ind >= 0, "No model was fitted for the control"
sensitivity_params = {k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params.items()}
return RV(**sensitivity_params, alpha=alpha, interval_type=interval_type)
sensitivity_params = {
k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params._asdict().items()}
return RV(**sensitivity_params, null_hypothesis=null_hypothesis, alpha=alpha, interval_type=interval_type)


class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
Expand Down
1 change: 1 addition & 0 deletions econml/tests/test_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def make_random(n, is_discrete, d):
else:
est.sensitivity_interval()
est.robustness_value()
est.sensitivity_summary()

if inf is not None:
const_marg_eff_int = est.const_marginal_effect_interval(X)
Expand Down
8 changes: 8 additions & 0 deletions econml/tests/test_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,20 +148,28 @@ def make_random(is_discrete, d):
est.sensitivity_interval(T='c')
est.robustness_value(T='c')

est.sensitivity_summary(T='b')
est.sensitivity_summary(T='c')

# ensure sensitivity analysis fails on control
with pytest.raises(AssertionError):
est.sensitivity_interval(T='a')

with pytest.raises(AssertionError):
est.robustness_value(T='a')

with pytest.raises(AssertionError):
est.sensitivity_summary(T='a')

# ensure failure on unknown treatment values
with pytest.raises(ValueError):
est.sensitivity_interval(T=1)

with pytest.raises(ValueError):
est.robustness_value(T=1)

with pytest.raises(ValueError):
est.sensitivity_summary(T=1)


# make sure we can call the marginal_effect and effect methods
Expand Down
43 changes: 39 additions & 4 deletions econml/tests/test_sensitivity_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from econml.dml import LinearDML, CausalForestDML
from econml.dr import LinearDRLearner
from econml.validate.sensitivity_analysis import sensitivity_interval
from sklearn.linear_model import LinearRegression, LogisticRegression
import numpy as np

Expand Down Expand Up @@ -92,7 +93,41 @@ def test_params(self):
rv4 = est.robustness_value(**T_arg, alpha=0.05, interval_type='theta')
self.assertTrue(rv4 > rv)

# ensure failure on invalid interval_type
with pytest.raises(ValueError):
est.sensitivity_interval(**T_arg, alpha=0.05,
c_y=0.05, c_t=0.05, rho=1, interval_type='foo')
# check that null_hypothesis is passed through
rv5 = est.robustness_value(**T_arg, alpha=0.05, null_hypothesis=10)
self.assertNotEqual(rv5, rv)


def test_invalid_params(self):

theta = 0.5
sigma = 0.5
nu = 0.5
cov = np.random.normal(size=(3, 3))

sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1)

# check that c_y, c_y, rho are constrained
with pytest.raises(ValueError):
sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=-0.5, c_t=0.05, rho=1)

with pytest.raises(ValueError):
sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=-0.5, rho=1)

with pytest.raises(ValueError):
sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=1.5, c_t=0.05, rho=1)

with pytest.raises(ValueError):
sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=-1.5)

# ensure we raise an error on invalid sigma, nu
with pytest.raises(ValueError):
sensitivity_interval(theta, -1, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1)

with pytest.raises(ValueError):
sensitivity_interval(theta, sigma,-1, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1)

# ensure failure on invalid interval_type
with pytest.raises(ValueError):
sensitivity_interval(theta, sigma, nu, cov, alpha=0.05, c_y=0.05, c_t=0.05, rho=1, interval_type='foo')

4 changes: 1 addition & 3 deletions econml/validate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

from .drtester import DRTester
from .results import BLPEvaluationResults, CalibrationEvaluationResults, UpliftEvaluationResults, EvaluationResults
from .sensitivity_analysis import sensitivity_interval, RV, dml_sensitivity_values, dr_sensitivity_values

__all__ = ['DRTester',
'BLPEvaluationResults', 'CalibrationEvaluationResults', 'UpliftEvaluationResults', 'EvaluationResults',
'sensitivity_interval', 'RV', 'dml_sensitivity_values', 'dr_sensitivity_values']
'BLPEvaluationResults', 'CalibrationEvaluationResults', 'UpliftEvaluationResults', 'EvaluationResults']
Loading