dowhy
dowhy copied to clipboard
Estimate refutation throws error: PicklingError: logger cannot be pickled
Estimate refutation throws the following error:
PicklingError: logger cannot be pickled
from dowhy.causal_model import CausalModel
import networkx as nx
import pandas as pd
import numpy as np
edge_list = [
('a', 'b'),
('a', 'c'),
('c', 'd'),
('b', 'd'),
]
expert_G = nx.DiGraph()
for edge in edge_list:
expert_G.add_edge(edge[0],edge[1])
dot_graph = to_pydot(expert_G)
data = pd.DataFrame(
np.random.binomial(1, np.random.uniform(0,1,size=4), size=(100,4)),
columns=list('abcd')
)
model = CausalModel(
data=data,
treatment='b',
outcome='d',
graph=str(dot_graph)
)
import statsmodels
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
identified_estimand.identifier.logger = None
estimate = model.estimate_effect(identified_estimand,
method_name="backdoor.generalized_linear_model",
method_params={
'glm_family':statsmodels.api.families.Binomial()
},
target_units="ate")
print(estimate.interpret('textual_effect_interpreter'))
refutel = model.refute_estimate(identified_estimand, estimate, "random_common_cause")
This can also include a verbatim copy of outputs, or screenshots.
Expected behavior A clear and concise description of what you expected to happen.
Version information:
- DoWhy version 0.9.1
Full Traceback
---------------------------------------------------------------------------
PicklingError Traceback (most recent call last)
Cell In[16], line 41
31 estimate = model.estimate_effect(identified_estimand,
32 method_name="backdoor.generalized_linear_model",
33 method_params={
34 'glm_family':statsmodels.api.families.Binomial()
35 },
36 target_units="ate")
39 print(estimate.interpret('textual_effect_interpreter'))
---> 41 refutel = model.refute_estimate(identified_estimand, estimate, "random_common_cause")
File [~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/dowhy/causal_model.py:417](https://file+.vscode-resource.vscode-cdn.net/home/***/intell/repos/dev/causal/ml/notebooks/~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/dowhy/causal_model.py:417), in CausalModel.refute_estimate(self, estimand, estimate, method_name, show_progress_bar, **kwargs)
414 refuter_class = causal_refuters.get_class_object(method_name)
416 refuter = refuter_class(self._data, identified_estimand=estimand, estimate=estimate, **kwargs)
--> 417 res = refuter.refute_estimate(show_progress_bar)
418 return res
File [~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/dowhy/causal_refuters/random_common_cause.py:45](https://file+.vscode-resource.vscode-cdn.net/home/***/intell/repos/dev/causal/ml/notebooks/~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/dowhy/causal_refuters/random_common_cause.py:45), in RandomCommonCause.refute_estimate(self, show_progress_bar)
44 def refute_estimate(self, show_progress_bar=False):
---> 45 refute = refute_random_common_cause(
46 self._data,
47 self._target_estimand,
48 self._estimate,
49 self._num_simulations,
50 self._random_state,
51 show_progress_bar,
52 self._n_jobs,
53 self._verbose,
54 )
55 refute.add_refuter(self)
56 return refute
File [~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/dowhy/causal_refuters/random_common_cause.py:120](https://file+.vscode-resource.vscode-cdn.net/home/***/intell/repos/dev/causal/ml/notebooks/~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/dowhy/causal_refuters/random_common_cause.py:120), in refute_random_common_cause(data, target_estimand, estimate, num_simulations, random_state, show_progress_bar, n_jobs, verbose, **_)
117 random_state = np.random.RandomState(seed=random_state)
119 # Run refutation in parallel
--> 120 sample_estimates = Parallel(n_jobs=n_jobs, verbose=verbose)(
121 delayed(_refute_once)(data, identified_estimand, estimate, random_state)
122 for _ in tqdm(
123 range(num_simulations),
124 colour=CausalRefuter.PROGRESS_BAR_COLOR,
125 disable=not show_progress_bar,
126 desc="Refuting Estimates: ",
127 )
128 )
129 sample_estimates = np.array(sample_estimates)
131 refute = CausalRefutation(
132 estimate.value, np.mean(sample_estimates), refutation_type="Refute: Add a random common cause"
133 )
File [~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/parallel.py:1085](https://file+.vscode-resource.vscode-cdn.net/home/***/intell/repos/dev/causal/ml/notebooks/~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/parallel.py:1085), in Parallel.__call__(self, iterable)
1076 try:
1077 # Only set self._iterating to True if at least a batch
1078 # was dispatched. In particular this covers the edge
(...)
1082 # was very quick and its callback already dispatched all the
1083 # remaining jobs.
1084 self._iterating = False
-> 1085 if self.dispatch_one_batch(iterator):
1086 self._iterating = self._original_iterator is not None
1088 while self.dispatch_one_batch(iterator):
File [~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/parallel.py:901](https://file+.vscode-resource.vscode-cdn.net/home/***/intell/repos/dev/causal/ml/notebooks/~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/parallel.py:901), in Parallel.dispatch_one_batch(self, iterator)
899 return False
900 else:
--> 901 self._dispatch(tasks)
902 return True
File [~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/parallel.py:819](https://file+.vscode-resource.vscode-cdn.net/home/***/intell/repos/dev/causal/ml/notebooks/~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/parallel.py:819), in Parallel._dispatch(self, batch)
817 with self._lock:
818 job_idx = len(self._jobs)
--> 819 job = self._backend.apply_async(batch, callback=cb)
820 # A job can complete so quickly than its callback is
821 # called before we get here, causing self._jobs to
822 # grow. To ensure correct results ordering, .insert is
823 # used (rather than .append) in the following line
824 self._jobs.insert(job_idx, job)
File [~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/_parallel_backends.py:208](https://file+.vscode-resource.vscode-cdn.net/home/***/intell/repos/dev/causal/ml/notebooks/~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/_parallel_backends.py:208), in SequentialBackend.apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
File [~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/_parallel_backends.py:597](https://file+.vscode-resource.vscode-cdn.net/home/***/intell/repos/dev/causal/ml/notebooks/~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/_parallel_backends.py:597), in ImmediateResult.__init__(self, batch)
594 def __init__(self, batch):
595 # Don't delay the application, to avoid keeping the input
596 # arguments in memory
--> 597 self.results = batch()
File [~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/parallel.py:288](https://file+.vscode-resource.vscode-cdn.net/home/***/intell/repos/dev/causal/ml/notebooks/~/.cache/pypoetry/virtualenvs/causal-JHqzxB11-py3.8/lib/python3.8/site-packages/joblib/parallel.py:288), in BatchedCalls.__call__(self)
284 def __call__(self):
285 # Set the default nested backend to self._backend but do not set the
286 # change the default number of processes to -1
...
1739 import pickle
-> 1740 raise pickle.PicklingError('logger cannot be pickled')
1741 return getLogger, (self.name,)
PicklingError: logger cannot be pickled
When I run this code, I'm unable to reproduce the error. I had to create my own data since encode_data is not available and your graph does not include the approval
node. Can you provide a self-contained example to debug this error?
My bad. Sorry for the incomplete example. I'll update this.
@amit-sharma thanks for the notification. I've updated the example, should work now.