EconML
EconML copied to clipboard
Error with method name for DMLIV
Hi all, I am trying to run a model with DMLIV, of a binary treatment with CATE estimation. However, I have a problem calling the method_name. Additionally, there is any demo for DMLIV to I understand the method?
Here is my dataset: Data_Nino.csv
and here is my code:
`
importing required libraries
import os, warnings, random import dowhy import econml from dowhy import CausalModel import pandas as pd import numpy as np import econml from econml.iv.dml import NonParamDMLIV from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LassoCV from sklearn.ensemble import GradientBoostingRegressor from econml.inference import BootstrapInference import numpy as np, scipy.stats as st import arviz as az
#El Nino vs Neutral data_nino = pd.read_csv("Data_Nino.csv", encoding='latin-1') data_nino = data_nino.dropna()
data_leish_nino = data_nino.drop(['Code.DANE.period'], axis=1) data_leish_nino.head() data_leish_nino = data_leish_nino.astype({"Treatment":'bool'}, copy=False)
###colombia Colombia_nino = data_leish_nino
#Step 1: Modeling the causal mechanism model_leish=CausalModel( data = Colombia_nino, treatment='Treatment', outcome='incidence100k', intrumental_variables='darwin', effect_modifiers='Forest', graph="digraph {darwin->Treatment;Treatment->incidence100k;Forest->incidence100k;}")
#view model model_leish.view_model()
#Step 2: Identifying effects identified_estimand_nino = model_leish.identify_effect(proceed_when_unidentifiable=True) print(identified_estimand_nino)
#Step 3: Estimation of the effect #with DML dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = lambda Colombia_nino: Colombia_nino["Forest"]>1, # condition used for CATE
#HERE I HAVE THE ERROR WITH THE METHOD NAME
method_name="iv.econml.dml.DMLIV",
method_params={
'init_params': {'model_y_xw':GradientBoostingRegressor(),
'model_t_xw': GradientBoostingRegressor(),
'model_t_xwz': GradientBoostingRegressor(),
'featurizer': PolynomialFeatures(degree=1, include_bias=False),
'model_final': LassoCV(fit_intercept=False),
'discrete_treatment': True,
'cv': 5,
'random_state': 123},
'fit_params': {}
})
print(dml_estimate_nino)
dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = 1, # condition used for CATE method_name="iv.econml.dml.DMLIV", method_params={ 'init_params': {'model_y_xw':GradientBoostingRegressor(), 'model_t_xw': GradientBoostingRegressor(), 'model_t_xwz': GradientBoostingRegressor(), 'featurizer': PolynomialFeatures(degree=1, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {} }) print(dml_estimate_nino)#los ITE estan en estimator/outcome
dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, target_units = "ate", # condition used for CATE #test_significance=True, #confidence_intervals=True, method_name="iv.econml.dml.DMLIV", method_params={ 'init_params': {'model_y_xw':GradientBoostingRegressor(), 'model_t_xw': GradientBoostingRegressor(), 'model_t_xwz': GradientBoostingRegressor(), 'featurizer': PolynomialFeatures(degree=1, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {'inference': BootstrapInference(n_bootstrap_samples=25, n_jobs=-1), } }) print(dml_estimate_nino) `
It looks like perhaps the method name is incorrect; instead of 'iv.econml.dml.DMLIV'
, try 'econml.iv.dml.dmliv'
.
@kbattocchi I changed the line to: method_name="econml.iv.dml.dmliv", but I get this error message
ImportError: iv is not an existing causal estimator.
Sorry, looks like you need to keep the estimation method ('iv') at the front even though it is also in the class's path. Try 'iv.econml.iv.dml.DMLIV'
.
@kbattocchi Thanks a lot for the suggestion. However, I have a couple of errors when I want to refute the estimation, particularly adding a placebo I get this error: "UnboundLocalError: local variable 'refuter_class' referenced before assignment" and with an unobserved common cause, the first time that I run the line I get this error message: "ValueError: n_splits=5 cannot be greater than the number of members in each class" and the next times running the same line I get this error: "MemoryError: Unable to allocate 26.8 GiB for an array with shape (60024, 60023) and data type float64"
This is the data Data_Nino.csv
This is my code
`
importing required libraries
import os, warnings, random import dowhy import econml from dowhy import CausalModel import pandas as pd import numpy as np import econml from econml.iv.dml import NonParamDMLIV from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LassoCV from sklearn.ensemble import GradientBoostingRegressor from econml.inference import BootstrapInference import numpy as np, scipy.stats as st import arviz as az import lightgbm as lgb import scipy.stats as stats
#####El Nino vs Neutral data_nino = pd.read_csv("Data_Nino.csv", encoding='latin-1') data_nino = data_nino.dropna()
data_leish_nino = data_nino.drop(['Code.DANE.period'], axis=1) data_leish_nino.head() data_leish_nino = data_leish_nino.astype({"Treatment":'bool'}, copy=False)
###colombia Colombia_nino = data_leish_nino
transfor variables to sd units
Colombia_nino.darwin = stats.zscore(Colombia_nino.darwin)
Colombia_nino.wpac850 = stats.zscore(Colombia_nino.wpac850)
Colombia_nino.Forest = stats.zscore(Colombia_nino.Forest)
#Step 1: Modeling the causal mechanism model_leish=CausalModel( data = Colombia_nino, treatment='Treatment', outcome='incidence100k', intrumental_variables=['darwin', 'wpac850'], effect_modifiers='Forest', graph="digraph {darwin->Treatment;wpac850->Treatment;Treatment->incidence100k;Forest->incidence100k;}")
#view model model_leish.view_model()
#Step 2: Identifying effects identified_estimand_nino = model_leish.identify_effect(proceed_when_unidentifiable=True) print(identified_estimand_nino)
#Step 3: Estimation of the effect #with DML dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = lambda Colombia_nino: Colombia_nino["Forest"]>1, # condition used for CATE method_name="iv.econml.iv.dml.DMLIV", method_params={ 'init_params': {'model_y_xw': lgb.LGBMRegressor(), 'model_t_xw': lgb.LGBMClassifier(), 'model_t_xwz': lgb.LGBMClassifier(), 'featurizer': PolynomialFeatures(degree=3, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {} }) print(dml_estimate_nino)
dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = 1, # condition used for CATE method_name="iv.econml.iv.dml.DMLIV", method_params={ 'init_params': {'model_y_xw': lgb.LGBMRegressor(), 'model_t_xw': lgb.LGBMClassifier(), 'model_t_xwz': lgb.LGBMClassifier(), 'featurizer': PolynomialFeatures(degree=3, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {} }) print(dml_estimate_nino)#los ITE estan en estimator/outcome
dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, target_units = "ate", # condition used for CATE #test_significance=True, #confidence_intervals=True, method_name="iv.econml.iv.dml.DMLIV", method_params={ 'init_params': {'model_y_xw': lgb.LGBMRegressor(), 'model_t_xw': lgb.LGBMClassifier(), 'model_t_xwz': lgb.LGBMClassifier(), 'featurizer': PolynomialFeatures(degree=3, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {'inference': BootstrapInference(n_bootstrap_samples=25, n_jobs=-1), } }) print(dml_estimate_nino)
#ATE El Nino ate_Colombia_nino = dml_estimate_nino.value print(ate_Colombia_nino)
#p-value El NIno p_value_nino = dml_estimate_nino.test_stat_significance(method="bootstrap") print(p_value_nino)
##confidence interval with boostrap El Nino ci_Colombia_boost_nino = dml_estimate_nino.get_confidence_intervals(method="bootstrap", confidence_level=0.95, num_simulations=10, sample_size_fraction=0.7) print(ci_Colombia_boost_nino)
#cate El Nino cate_Colombia_nino = dml_estimate_nino.cate_estimates Q1Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q1' mean_Q1Forest_Colombia_nino = Colombia_nino.loc[Q1Forest_Colombia, 'CATE'].mean() Q2Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q2' mean_Q2Forest_Colombia_nino = Colombia_nino.loc[Q2Forest_Colombia, 'CATE'].mean() Q3Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q3' mean_Q3Forest_Colombia_nino = Colombia_nino.loc[Q3Forest_Colombia, 'CATE'].mean() Q4Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q4' mean_Q4Forest_Colombia_nino = Colombia_nino.loc[Q4Forest_Colombia, 'CATE'].mean() print(mean_Q1Forest_Colombia_nino, mean_Q2Forest_Colombia_nino, mean_Q3Forest_Colombia_nino, mean_Q4Forest_Colombia_nino)
#Step 4: Refutations
#HERE IS THE PROBLEM WITH UNOBSERVED COMMON CAUSE #with add unobserved common cause #How sensitive is the effect estimate when we add an additional common cause (confounder) to the dataset that is correlated with the treatment and the outcome? (Hint: It should not be too sensitive) nino_unobserved_dml = model_leish.refute_estimate(identified_estimand_nino, dml_estimate_nino, method_name="add_unobserved_common_cause", confounders_effect_on_treatment="linear", confounders_effect_on_outcome="linear", effect_strength_on_treatment=0.05, effect_strength_on_outcome=0.5, random_state=123) print(nino_unobserved_dml)
#HERE IS THE PROBLEM WITH PLACEBO #with placebo #What happens to the estimated causal effect when we replace the true treatment variable with an independent random variable? (Hint: the effect should go to zero) nino_placebo_dml = model_leish.refute_estimate(identified_estimand_nino, dml_estimate_nino, random_state=123, #placebo_type="permute", #method_name="placebo_treatment_refuter", num_simulations=10, placebo_type="permute") print(nino_placebo_dml) `