h2o-tutorials
h2o-tutorials copied to clipboard
Relevel factor H2OResponseError
Hi,
I'm using relevel() function to set base level in column of my dataframe and getting this error in case column's values consists '+', '-' or ' ' symbols (mostly there are more, but I've tried this ones).
import pandas as pd
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
import h2o
import re
h2o.init()
data = {
'name': ['Xavier', 'Ann', 'Jana', 'Yi', 'Robin', 'Amal', 'Nori'],
'city': ['Mexico City', 'Toronto', 'Prague', 'Shanghai',
'Manchester', 'Cairo', 'Osaka'],
'age': [41, 28, 33, 34, 38, 31, 37],
'py-score': [88.0, 79.0, 81.0, 80.0, 68.0, 61.0, 84.0]
}
df = pd.DataFrame(data=data)
hf = h2o.H2OFrame(df)
hf['city'].asfactor().relevel('Mexico City')
This will cause H2OResponseError:
---------------------------------------------------------------------------
H2OResponseError Traceback (most recent call last)
~\PycharmProjects\h2o\venv\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~\PycharmProjects\h2o\venv\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
~\PycharmProjects\h2o\venv\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in __repr__(self)
579 stk = traceback.extract_stack()
580 if not ("IPython" in stk[-2][0] and "info" == stk[-2][2]):
--> 581 self.show()
582 return ""
583
~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in show(self, use_pandas, rows, cols)
610 print("This H2OFrame is empty and not initialized.")
611 return
--> 612 if self.nrows == 0:
613 print("This H2OFrame is empty.")
614 return
~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in nrows(self)
319 if not self._ex._cache.nrows_valid():
320 self._ex._cache.flush()
--> 321 self._frame(fill_cache=True)
322 return self._ex._cache.nrows
323
~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in _frame(self, rows, rows_offset, cols, cols_offset, fill_cache)
729
730 def _frame(self, rows=10, rows_offset=0, cols=-1, cols_offset=0, fill_cache=False):
--> 731 self._ex._eager_frame()
732 if fill_cache:
733 self._ex._cache.fill(rows=rows, rows_offset=rows_offset, cols=cols, cols_offset=cols_offset)
~\PycharmProjects\h2o\venv\lib\site-packages\h2o\expr.py in _eager_frame(self)
88 if not self._cache.is_empty(): return
89 if self._cache._id is not None: return # Data already computed under ID, but not cached locally
---> 90 self._eval_driver('frame')
91
92 def _eager_scalar(self): # returns a scalar (or a list of scalars)
~\PycharmProjects\h2o\venv\lib\site-packages\h2o\expr.py in _eval_driver(self, top)
112 """
113 exec_str = self._get_ast_str(top)
--> 114 res = ExprNode.rapids(exec_str)
115 if 'scalar' in res:
116 if isinstance(res['scalar'], list):
~\PycharmProjects\h2o\venv\lib\site-packages\h2o\expr.py in rapids(expr)
256 :returns: The JSON response (as a python dictionary) of the Rapids execution
257 """
--> 258 return h2o.api("POST /99/Rapids", data={"ast": expr, "session_id": h2o.connection().session_id})
259
260
~\PycharmProjects\h2o\venv\lib\site-packages\h2o\h2o.py in api(endpoint, data, json, filename, save_to)
111 # type checks are performed in H2OConnection class
112 _check_connection()
--> 113 return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
114
115
~\PycharmProjects\h2o\venv\lib\site-packages\h2o\backend\connection.py in request(self, endpoint, data, json, filename, save_to)
479 save_to = save_to(resp)
480 self._log_end_transaction(start_time, resp)
--> 481 return self._process_response(resp, save_to)
482
483 except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e:
~\PycharmProjects\h2o\venv\lib\site-packages\h2o\backend\connection.py in _process_response(response, save_to)
817 if status_code in {400, 404, 412} and isinstance(data, H2OErrorV3):
818 data.show_stacktrace = False
--> 819 raise H2OResponseError(data)
820
821 # Server errors (notably 500 = "Server Error")
H2OResponseError: Server error java.lang.IllegalArgumentException:
Error: Did not find level `Mexico%20City` in the column.
Request: POST /99/Rapids
data: {'ast': "(tmp= py_140_sid_b771 (relevel (as.factor (cols_py Key_Frame__upload_bda4861347c26f55bb24425d8760491c.hex 'city')) 'Mexico%20City'))", 'session_id': '_sid_b771'}
Any ideas how can I relevel data with whitespaces and other symbols?
Thanks.