EconML icon indicating copy to clipboard operation
EconML copied to clipboard

Error with method name for DMLIV

Open juandavidgutier opened this issue 3 years ago • 4 comments

Hi all, I am trying to run a model with DMLIV, of a binary treatment with CATE estimation. However, I have a problem calling the method_name. Additionally, there is any demo for DMLIV to I understand the method?

Here is my dataset: Data_Nino.csv

and here is my code:

`

importing required libraries

import os, warnings, random import dowhy import econml from dowhy import CausalModel import pandas as pd import numpy as np import econml from econml.iv.dml import NonParamDMLIV from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LassoCV from sklearn.ensemble import GradientBoostingRegressor from econml.inference import BootstrapInference import numpy as np, scipy.stats as st import arviz as az

#El Nino vs Neutral data_nino = pd.read_csv("Data_Nino.csv", encoding='latin-1') data_nino = data_nino.dropna()

data_leish_nino = data_nino.drop(['Code.DANE.period'], axis=1) data_leish_nino.head() data_leish_nino = data_leish_nino.astype({"Treatment":'bool'}, copy=False)

###colombia Colombia_nino = data_leish_nino

#Step 1: Modeling the causal mechanism model_leish=CausalModel( data = Colombia_nino, treatment='Treatment', outcome='incidence100k', intrumental_variables='darwin', effect_modifiers='Forest', graph="digraph {darwin->Treatment;Treatment->incidence100k;Forest->incidence100k;}")

#view model model_leish.view_model()

#Step 2: Identifying effects identified_estimand_nino = model_leish.identify_effect(proceed_when_unidentifiable=True) print(identified_estimand_nino)

#Step 3: Estimation of the effect #with DML dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = lambda Colombia_nino: Colombia_nino["Forest"]>1, # condition used for CATE

#HERE I HAVE THE ERROR WITH THE METHOD NAME

                                method_name="iv.econml.dml.DMLIV",
                                method_params={
                                    'init_params': {'model_y_xw':GradientBoostingRegressor(),
                                                    'model_t_xw': GradientBoostingRegressor(),
                                                    'model_t_xwz': GradientBoostingRegressor(),
                                                    'featurizer': PolynomialFeatures(degree=1, include_bias=False),
                                                    'model_final': LassoCV(fit_intercept=False),
                                                    'discrete_treatment': True,
                                                    'cv': 5,
                                                    'random_state': 123},
                                    'fit_params': {}
                                 })

print(dml_estimate_nino)

dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = 1, # condition used for CATE method_name="iv.econml.dml.DMLIV", method_params={ 'init_params': {'model_y_xw':GradientBoostingRegressor(), 'model_t_xw': GradientBoostingRegressor(), 'model_t_xwz': GradientBoostingRegressor(), 'featurizer': PolynomialFeatures(degree=1, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {} }) print(dml_estimate_nino)#los ITE estan en estimator/outcome

dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, target_units = "ate", # condition used for CATE #test_significance=True, #confidence_intervals=True, method_name="iv.econml.dml.DMLIV", method_params={ 'init_params': {'model_y_xw':GradientBoostingRegressor(), 'model_t_xw': GradientBoostingRegressor(), 'model_t_xwz': GradientBoostingRegressor(), 'featurizer': PolynomialFeatures(degree=1, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {'inference': BootstrapInference(n_bootstrap_samples=25, n_jobs=-1), } }) print(dml_estimate_nino) `

juandavidgutier avatar Nov 05 '21 18:11 juandavidgutier

It looks like perhaps the method name is incorrect; instead of 'iv.econml.dml.DMLIV', try 'econml.iv.dml.dmliv'.

kbattocchi avatar Nov 10 '21 16:11 kbattocchi

@kbattocchi I changed the line to: method_name="econml.iv.dml.dmliv", but I get this error message

ImportError: iv is not an existing causal estimator.

juandavidgutier avatar Nov 11 '21 13:11 juandavidgutier

Sorry, looks like you need to keep the estimation method ('iv') at the front even though it is also in the class's path. Try 'iv.econml.iv.dml.DMLIV'.

kbattocchi avatar Nov 11 '21 17:11 kbattocchi

@kbattocchi Thanks a lot for the suggestion. However, I have a couple of errors when I want to refute the estimation, particularly adding a placebo I get this error: "UnboundLocalError: local variable 'refuter_class' referenced before assignment" and with an unobserved common cause, the first time that I run the line I get this error message: "ValueError: n_splits=5 cannot be greater than the number of members in each class" and the next times running the same line I get this error: "MemoryError: Unable to allocate 26.8 GiB for an array with shape (60024, 60023) and data type float64"

This is the data Data_Nino.csv

This is my code

`

importing required libraries

import os, warnings, random import dowhy import econml from dowhy import CausalModel import pandas as pd import numpy as np import econml from econml.iv.dml import NonParamDMLIV from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LassoCV from sklearn.ensemble import GradientBoostingRegressor from econml.inference import BootstrapInference import numpy as np, scipy.stats as st import arviz as az import lightgbm as lgb import scipy.stats as stats

#####El Nino vs Neutral data_nino = pd.read_csv("Data_Nino.csv", encoding='latin-1') data_nino = data_nino.dropna()

data_leish_nino = data_nino.drop(['Code.DANE.period'], axis=1) data_leish_nino.head() data_leish_nino = data_leish_nino.astype({"Treatment":'bool'}, copy=False)

###colombia Colombia_nino = data_leish_nino

transfor variables to sd units

Colombia_nino.darwin = stats.zscore(Colombia_nino.darwin) Colombia_nino.wpac850 = stats.zscore(Colombia_nino.wpac850)
Colombia_nino.Forest = stats.zscore(Colombia_nino.Forest)

#Step 1: Modeling the causal mechanism model_leish=CausalModel( data = Colombia_nino, treatment='Treatment', outcome='incidence100k', intrumental_variables=['darwin', 'wpac850'], effect_modifiers='Forest', graph="digraph {darwin->Treatment;wpac850->Treatment;Treatment->incidence100k;Forest->incidence100k;}")

#view model model_leish.view_model()

#Step 2: Identifying effects identified_estimand_nino = model_leish.identify_effect(proceed_when_unidentifiable=True) print(identified_estimand_nino)

#Step 3: Estimation of the effect #with DML dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = lambda Colombia_nino: Colombia_nino["Forest"]>1, # condition used for CATE method_name="iv.econml.iv.dml.DMLIV", method_params={ 'init_params': {'model_y_xw': lgb.LGBMRegressor(), 'model_t_xw': lgb.LGBMClassifier(), 'model_t_xwz': lgb.LGBMClassifier(), 'featurizer': PolynomialFeatures(degree=3, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {} }) print(dml_estimate_nino)

dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = 1, # condition used for CATE method_name="iv.econml.iv.dml.DMLIV", method_params={ 'init_params': {'model_y_xw': lgb.LGBMRegressor(), 'model_t_xw': lgb.LGBMClassifier(), 'model_t_xwz': lgb.LGBMClassifier(), 'featurizer': PolynomialFeatures(degree=3, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {} }) print(dml_estimate_nino)#los ITE estan en estimator/outcome

dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, target_units = "ate", # condition used for CATE #test_significance=True, #confidence_intervals=True, method_name="iv.econml.iv.dml.DMLIV", method_params={ 'init_params': {'model_y_xw': lgb.LGBMRegressor(), 'model_t_xw': lgb.LGBMClassifier(), 'model_t_xwz': lgb.LGBMClassifier(), 'featurizer': PolynomialFeatures(degree=3, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {'inference': BootstrapInference(n_bootstrap_samples=25, n_jobs=-1), } }) print(dml_estimate_nino)

#ATE El Nino ate_Colombia_nino = dml_estimate_nino.value print(ate_Colombia_nino)

#p-value El NIno p_value_nino = dml_estimate_nino.test_stat_significance(method="bootstrap") print(p_value_nino)

##confidence interval with boostrap El Nino ci_Colombia_boost_nino = dml_estimate_nino.get_confidence_intervals(method="bootstrap", confidence_level=0.95, num_simulations=10, sample_size_fraction=0.7) print(ci_Colombia_boost_nino)

#cate El Nino cate_Colombia_nino = dml_estimate_nino.cate_estimates Q1Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q1' mean_Q1Forest_Colombia_nino = Colombia_nino.loc[Q1Forest_Colombia, 'CATE'].mean() Q2Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q2' mean_Q2Forest_Colombia_nino = Colombia_nino.loc[Q2Forest_Colombia, 'CATE'].mean() Q3Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q3' mean_Q3Forest_Colombia_nino = Colombia_nino.loc[Q3Forest_Colombia, 'CATE'].mean() Q4Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q4' mean_Q4Forest_Colombia_nino = Colombia_nino.loc[Q4Forest_Colombia, 'CATE'].mean() print(mean_Q1Forest_Colombia_nino, mean_Q2Forest_Colombia_nino, mean_Q3Forest_Colombia_nino, mean_Q4Forest_Colombia_nino)

#Step 4: Refutations

#HERE IS THE PROBLEM WITH UNOBSERVED COMMON CAUSE #with add unobserved common cause #How sensitive is the effect estimate when we add an additional common cause (confounder) to the dataset that is correlated with the treatment and the outcome? (Hint: It should not be too sensitive) nino_unobserved_dml = model_leish.refute_estimate(identified_estimand_nino, dml_estimate_nino, method_name="add_unobserved_common_cause", confounders_effect_on_treatment="linear", confounders_effect_on_outcome="linear", effect_strength_on_treatment=0.05, effect_strength_on_outcome=0.5, random_state=123) print(nino_unobserved_dml)

#HERE IS THE PROBLEM WITH PLACEBO #with placebo #What happens to the estimated causal effect when we replace the true treatment variable with an independent random variable? (Hint: the effect should go to zero) nino_placebo_dml = model_leish.refute_estimate(identified_estimand_nino, dml_estimate_nino, random_state=123, #placebo_type="permute", #method_name="placebo_treatment_refuter", num_simulations=10, placebo_type="permute") print(nino_placebo_dml) `

juandavidgutier avatar Nov 13 '21 23:11 juandavidgutier