evalml
evalml copied to clipboard
Prediction explanations should be serializable when output_format is dict
trafficstars
Repro:
from evalml.demos import load_fraud
from evalml.pipelines import BinaryClassificationPipeline
from evalml.model_understanding import explain_predictions_best_worst
import json
class Fraud(BinaryClassificationPipeline):
custom_name = "Fraud Pipeline"
component_graph = ["Imputer", "DateTime Featurization Component",
"Text Featurization Component", "One Hot Encoder",
"Logistic Regression Classifier"]
X, y = load_fraud(1000)
fraud = Fraud({})
fraud.fit(X, y)
exp = explain_prediction(fraud, X, y, index_to_explain=10, top_k_features=12, output_format="dict")
json.dumps(exp, indent=2)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-2-5ed23ab224b5> in <module>
19 exp = explain_prediction(fraud, X, y, index_to_explain=10, top_k_features=12, output_format="dict")
20
---> 21 json.dumps(exp, indent=2)
~/miniconda3/envs/evalml/lib/python3.8/json/__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
232 if cls is None:
233 cls = JSONEncoder
--> 234 return cls(
235 skipkeys=skipkeys, ensure_ascii=ensure_ascii,
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in encode(self, o)
199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
--> 201 chunks = list(chunks)
202 return ''.join(chunks)
203
~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode(o, _current_indent_level)
429 yield from _iterencode_list(o, _current_indent_level)
430 elif isinstance(o, dict):
--> 431 yield from _iterencode_dict(o, _current_indent_level)
432 else:
433 if markers is not None:
~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode_dict(dct, _current_indent_level)
403 else:
404 chunks = _iterencode(value, _current_indent_level)
--> 405 yield from chunks
406 if newline_indent is not None:
407 _current_indent_level -= 1
~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode_list(lst, _current_indent_level)
323 else:
324 chunks = _iterencode(value, _current_indent_level)
--> 325 yield from chunks
326 if newline_indent is not None:
327 _current_indent_level -= 1
~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode_dict(dct, _current_indent_level)
403 else:
404 chunks = _iterencode(value, _current_indent_level)
--> 405 yield from chunks
406 if newline_indent is not None:
407 _current_indent_level -= 1
~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode_list(lst, _current_indent_level)
323 else:
324 chunks = _iterencode(value, _current_indent_level)
--> 325 yield from chunks
326 if newline_indent is not None:
327 _current_indent_level -= 1
~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode(o, _current_indent_level)
436 raise ValueError("Circular reference detected")
437 markers[markerid] = o
--> 438 o = _default(o)
439 yield from _iterencode(o, _current_indent_level)
440 if markers is not None:
~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in default(self, o)
177
178 """
--> 179 raise TypeError(f'Object of type {o.__class__.__name__} '
180 f'is not JSON serializable')
181
TypeError: Object of type int64 is not JSON serializable
The issue is that the feature values are numpy types that are not json-serializable by default.
Should this be a requirement?
We currently use json serialization in our docs but that's just for pretty-printing.