evalml icon indicating copy to clipboard operation
evalml copied to clipboard

Prediction explanations should be serializable when output_format is dict

Open freddyaboulton opened this issue 4 years ago • 0 comments
trafficstars

Repro:

from evalml.demos import load_fraud
from evalml.pipelines import BinaryClassificationPipeline
from evalml.model_understanding import explain_predictions_best_worst
import json

class Fraud(BinaryClassificationPipeline):
    custom_name = "Fraud Pipeline"
    component_graph = ["Imputer", "DateTime Featurization Component",
                       "Text Featurization Component", "One Hot Encoder",
                       "Logistic Regression Classifier"]

    
X, y = load_fraud(1000)

fraud = Fraud({})

fraud.fit(X, y)

exp = explain_prediction(fraud, X, y, index_to_explain=10, top_k_features=12, output_format="dict")

json.dumps(exp, indent=2)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-2-5ed23ab224b5> in <module>
     19 exp = explain_prediction(fraud, X, y, index_to_explain=10, top_k_features=12, output_format="dict")
     20 
---> 21 json.dumps(exp, indent=2)

~/miniconda3/envs/evalml/lib/python3.8/json/__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    232     if cls is None:
    233         cls = JSONEncoder
--> 234     return cls(
    235         skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    236         check_circular=check_circular, allow_nan=allow_nan, indent=indent,

~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in encode(self, o)
    199         chunks = self.iterencode(o, _one_shot=True)
    200         if not isinstance(chunks, (list, tuple)):
--> 201             chunks = list(chunks)
    202         return ''.join(chunks)
    203 

~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode(o, _current_indent_level)
    429             yield from _iterencode_list(o, _current_indent_level)
    430         elif isinstance(o, dict):
--> 431             yield from _iterencode_dict(o, _current_indent_level)
    432         else:
    433             if markers is not None:

~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode_dict(dct, _current_indent_level)
    403                 else:
    404                     chunks = _iterencode(value, _current_indent_level)
--> 405                 yield from chunks
    406         if newline_indent is not None:
    407             _current_indent_level -= 1

~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode_list(lst, _current_indent_level)
    323                 else:
    324                     chunks = _iterencode(value, _current_indent_level)
--> 325                 yield from chunks
    326         if newline_indent is not None:
    327             _current_indent_level -= 1

~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode_dict(dct, _current_indent_level)
    403                 else:
    404                     chunks = _iterencode(value, _current_indent_level)
--> 405                 yield from chunks
    406         if newline_indent is not None:
    407             _current_indent_level -= 1

~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode_list(lst, _current_indent_level)
    323                 else:
    324                     chunks = _iterencode(value, _current_indent_level)
--> 325                 yield from chunks
    326         if newline_indent is not None:
    327             _current_indent_level -= 1

~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in _iterencode(o, _current_indent_level)
    436                     raise ValueError("Circular reference detected")
    437                 markers[markerid] = o
--> 438             o = _default(o)
    439             yield from _iterencode(o, _current_indent_level)
    440             if markers is not None:

~/miniconda3/envs/evalml/lib/python3.8/json/encoder.py in default(self, o)
    177 
    178         """
--> 179         raise TypeError(f'Object of type {o.__class__.__name__} '
    180                         f'is not JSON serializable')
    181 

TypeError: Object of type int64 is not JSON serializable

The issue is that the feature values are numpy types that are not json-serializable by default.

Should this be a requirement?

We currently use json serialization in our docs but that's just for pretty-printing.

freddyaboulton avatar Feb 23 '21 16:02 freddyaboulton