Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions econml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
'dowhy',
'utilities',
'federated_learning',
'validate',
'__version__']

from ._version import __version__
6 changes: 3 additions & 3 deletions econml/iv/dr/_dr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2186,10 +2186,10 @@ def true_heterogeneity_function(X):
est.fit(Y=y, T=T, Z=Z, X=X)

>>> est.effect(X[:3])
array([-1.74672..., 1.57225..., -1.58916...])
array([-1.74672..., 1.57..., -1.58916...])
>>> est.effect_interval(X[:3])
(array([-7.05230..., -6.78656..., -5.11344...]),
array([3.55885..., 9.93108..., 1.93512...]))
(array([-7.05230..., -6..., -5.11344...]),
array([3.55885..., 9.9..., 1.93512...]))
"""

def __init__(self, *,
Expand Down
263 changes: 263 additions & 0 deletions econml/tests/test_drtester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
import unittest

import numpy as np
import pandas as pd
import scipy.stats as st
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor

from econml.validate.drtester import DRtester
from econml.dml import DML


class TestDRTester(unittest.TestCase):

@staticmethod
def _get_data(num_treatments=1):
np.random.seed(576)

N = 20000 # number of units
K = 5 # number of covariates

# Generate random Xs
X_mu = np.zeros(5) # Means of Xs
# Random covariance matrix of Xs
X_sig = np.diag(np.random.rand(5))
X = st.multivariate_normal(X_mu, X_sig).rvs(N)

# Effect of Xs on outcome
X_beta = np.random.uniform(0, 5, K)
# Effect of treatment on outcomes
D_beta = np.arange(num_treatments + 1)
# Effect of treatment on outcome conditional on X1
DX1_beta = np.array([0] * num_treatments + [3])

# Generate treatments based on X and random noise
beta_treat = np.random.uniform(-1, 1, (num_treatments + 1, K))
D1 = np.zeros((N, num_treatments + 1))
for k in range(num_treatments + 1):
D1[:, k] = X @ beta_treat[k, :] + np.random.gumbel(0, 1, N)
D = np.array([np.where(D1[i, :] == np.max(D1[i, :]))[0][0] for i in range(N)])
D_dum = pd.get_dummies(D)

# Generate Y (based on X, D, and random noise)
Y_sig = 1 # Variance of random outcome noise
Y = X @ X_beta + (D_dum @ D_beta) + X[:, 1] * (D_dum @ DX1_beta) + np.random.normal(0, Y_sig, N)
Y = Y.to_numpy()

train_prop = .5
train_N = np.ceil(train_prop * N)
ind = np.array(range(N))
train_ind = np.random.choice(N, int(train_N), replace=False)
val_ind = ind[~np.isin(ind, train_ind)]

Xtrain, Dtrain, Ytrain = X[train_ind], D[train_ind], Y[train_ind]
Xval, Dval, Yval = X[val_ind], D[val_ind], Y[val_ind]

return Xtrain, Dtrain, Ytrain, Xval, Dval, Yval

def test_multi(self):
Xtrain, Dtrain, Ytrain, Xval, Dval, Yval = self._get_data(num_treatments=2)

# Simple classifier and regressor for propensity, outcome, and cate
reg_t = RandomForestClassifier(random_state=0)
reg_y = GradientBoostingRegressor(random_state=0)

cate = DML(
model_y=reg_y,
model_t=reg_t,
model_final=reg_y
).fit(Y=Ytrain, T=Dtrain, X=Xtrain)

# test the DR outcome difference
my_dr_tester = DRtester(
model_regression=reg_y,
model_propensity=reg_t,
cate=cate
).fit_nuisance(
Xval, Dval, Yval, Xtrain, Dtrain, Ytrain
)
dr_outcomes = my_dr_tester.dr_val_

ates = dr_outcomes.mean(axis=0)
for k in range(dr_outcomes.shape[1]):
ate_errs = np.sqrt(((dr_outcomes[:, k] - ates[k]) ** 2).sum() /
(dr_outcomes.shape[0] * (dr_outcomes.shape[0] - 1)))

self.assertLess(abs(ates[k] - (k + 1)), 2 * ate_errs)

res = my_dr_tester.evaluate_all(Xval, Xtrain)
res_df = res.summary()

for k in range(3):
if k == 0:
with self.assertRaises(Exception) as exc:
res.plot_cal(k)
self.assertTrue(str(exc.exception) == 'Plotting only supported for treated units (not controls)')
else:
self.assertTrue(res.plot_cal(k) is not None)

self.assertGreater(res_df.blp_pval.values[0], 0.1) # no heterogeneity
self.assertLess(res_df.blp_pval.values[1], 0.05) # heterogeneity

self.assertLess(res_df.cal_r_squared.values[0], 0) # poor R2
self.assertGreater(res_df.cal_r_squared.values[1], 0) # good R2

self.assertLess(res_df.qini_pval.values[1], res_df.qini_pval.values[0])

def test_binary(self):
Xtrain, Dtrain, Ytrain, Xval, Dval, Yval = self._get_data(num_treatments=1)

# Simple classifier and regressor for propensity, outcome, and cate
reg_t = RandomForestClassifier(random_state=0)
reg_y = GradientBoostingRegressor(random_state=0)

cate = DML(
model_y=reg_y,
model_t=reg_t,
model_final=reg_y
).fit(Y=Ytrain, T=Dtrain, X=Xtrain)

# test the DR outcome difference
my_dr_tester = DRtester(
model_regression=reg_y,
model_propensity=reg_t,
cate=cate
).fit_nuisance(
Xval, Dval, Yval, Xtrain, Dtrain, Ytrain
)
dr_outcomes = my_dr_tester.dr_val_

ate = dr_outcomes.mean(axis=0)
ate_err = np.sqrt(((dr_outcomes - ate) ** 2).sum() /
(dr_outcomes.shape[0] * (dr_outcomes.shape[0] - 1)))
truth = 1
self.assertLess(abs(ate - truth), 2 * ate_err)

res = my_dr_tester.evaluate_all(Xval, Xtrain)
res_df = res.summary()

for k in range(2):
if k == 0:
with self.assertRaises(Exception) as exc:
res.plot_cal(k)
self.assertTrue(str(exc.exception) == 'Plotting only supported for treated units (not controls)')
else:
self.assertTrue(res.plot_cal(k) is not None)

self.assertLess(res_df.blp_pval.values[0], 0.05) # heterogeneity
self.assertGreater(res_df.cal_r_squared.values[0], 0) # good R2
self.assertLess(res_df.qini_pval.values[0], 0.05) # heterogeneity

def test_nuisance_val_fit(self):
Xtrain, Dtrain, Ytrain, Xval, Dval, Yval = self._get_data(num_treatments=1)

# Simple classifier and regressor for propensity, outcome, and cate
reg_t = RandomForestClassifier(random_state=0)
reg_y = GradientBoostingRegressor(random_state=0)

cate = DML(
model_y=reg_y,
model_t=reg_t,
model_final=reg_y
).fit(Y=Ytrain, T=Dtrain, X=Xtrain)

# test the DR outcome difference
my_dr_tester = DRtester(
model_regression=reg_y,
model_propensity=reg_t,
cate=cate
).fit_nuisance(Xval, Dval, Yval)

dr_outcomes = my_dr_tester.dr_val_

ate = dr_outcomes.mean(axis=0)
ate_err = np.sqrt(((dr_outcomes - ate) ** 2).sum() /
(dr_outcomes.shape[0] * (dr_outcomes.shape[0] - 1)))
truth = 1
self.assertLess(abs(ate - truth), 2 * ate_err)

# use evaluate_blp to fit on validation only
blp_res = my_dr_tester.evaluate_blp(Xval)

self.assertLess(blp_res.pvals[0], 0.05) # heterogeneity

for kwargs in [{}, {'Xval': Xval}]:
with self.assertRaises(Exception) as exc:
my_dr_tester.evaluate_cal(kwargs)
self.assertTrue(
str(exc.exception) == "Must fit nuisance models on training sample data to use calibration test"
)

def test_exceptions(self):
Xtrain, Dtrain, Ytrain, Xval, Dval, Yval = self._get_data(num_treatments=1)

# Simple classifier and regressor for propensity, outcome, and cate
reg_t = RandomForestClassifier(random_state=0)
reg_y = GradientBoostingRegressor(random_state=0)

cate = DML(
model_y=reg_y,
model_t=reg_t,
model_final=reg_y
).fit(Y=Ytrain, T=Dtrain, X=Xtrain)

# test the DR outcome difference
my_dr_tester = DRtester(
model_regression=reg_y,
model_propensity=reg_t,
cate=cate
)

# fit nothing
for func in [my_dr_tester.evaluate_blp, my_dr_tester.evaluate_cal, my_dr_tester.evaluate_qini]:
with self.assertRaises(Exception) as exc:
func()
if func.__name__ == 'evaluate_cal':
self.assertTrue(
str(exc.exception) == "Must fit nuisance models on training sample data to use calibration test"
)
else:
self.assertTrue(str(exc.exception) == "Must fit nuisances before evaluating")

my_dr_tester = my_dr_tester.fit_nuisance(
Xval, Dval, Yval, Xtrain, Dtrain, Ytrain
)

for func in [
my_dr_tester.evaluate_blp,
my_dr_tester.evaluate_cal,
my_dr_tester.evaluate_qini,
my_dr_tester.evaluate_all
]:
with self.assertRaises(Exception) as exc:
func()
if func.__name__ == 'evaluate_blp':
self.assertTrue(
str(exc.exception) == "CATE predictions not yet calculated - must provide Xval"
)
else:
self.assertTrue(str(exc.exception) ==
"CATE predictions not yet calculated - must provide both Xval, Xtrain")

for func in [
my_dr_tester.evaluate_cal,
my_dr_tester.evaluate_qini,
my_dr_tester.evaluate_all
]:
with self.assertRaises(Exception) as exc:
func(Xval=Xval)
self.assertTrue(
str(exc.exception) == "CATE predictions not yet calculated - must provide both Xval, Xtrain")

cal_res = my_dr_tester.evaluate_cal(Xval, Xtrain)
self.assertGreater(cal_res.cal_r_squared[0], 0) # good R2

my_dr_tester = DRtester(
model_regression=reg_y,
model_propensity=reg_t,
cate=cate
).fit_nuisance(
Xval, Dval, Yval, Xtrain, Dtrain, Ytrain
)
qini_res = my_dr_tester.evaluate_qini(Xval, Xtrain)
self.assertLess(qini_res.pvals[0], 0.05)
11 changes: 11 additions & 0 deletions econml/validate/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) PyWhy contributors. All rights reserved.
# Licensed under the MIT License.

"""
A suite of validation methods for CATE models.
"""

from .drtester import DRtester


__all__ = ['DRtester']
Loading