hail
hail copied to clipboard
Java heapspace error in to_pandas
To report a bug, fill in the information below. For support and feature requests, please use the discussion forum: https://discuss.hail.is/
Please include the full Hail version and as much detail as possible.
Reposting this here from the Forum
https://discuss.hail.is/t/getting-java-heap-error-tried-a-bunch-of-things-with-the-executor-and-memory-settings/2753
I did some more tweaking with the memory and got a new exception
---------------------------------------------------------------------------
Py4JError Traceback (most recent call last)
Input In [3], in <cell line: 2>()
1 ## Import scores table
----> 2 score_pd = ht_score.to_pandas()
File <decorator-gen-1091>:2, in to_pandas(self, flatten)
File ~/mambaforge/lib/python3.9/site-packages/hail/typecheck/check.py:577, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
574 @decorator
575 def wrapper(__original_func, *args, **kwargs):
576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577 return __original_func(*args_, **kwargs_)
File ~/mambaforge/lib/python3.9/site-packages/hail/table.py:3340, in Table.to_pandas(self, flatten)
3338 dtypes_struct = table.row.dtype
3339 collect_dict = {key: hl.agg.collect(value) for key, value in table.row.items()}
-> 3340 column_struct_array = table.aggregate(hl.struct(**collect_dict))
3341 columns = list(column_struct_array.keys())
3342 data_dict = {}
File <decorator-gen-1037>:2, in aggregate(self, expr, _localize)
File ~/mambaforge/lib/python3.9/site-packages/hail/typecheck/check.py:577, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
574 @decorator
575 def wrapper(__original_func, *args, **kwargs):
576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577 return __original_func(*args_, **kwargs_)
File ~/mambaforge/lib/python3.9/site-packages/hail/table.py:1231, in Table.aggregate(self, expr, _localize)
1228 agg_ir = ir.TableAggregate(base._tir, expr._ir)
1230 if _localize:
-> 1231 return Env.backend().execute(hl.ir.MakeTuple([agg_ir]))[0]
1233 return construct_expr(ir.LiftMeOut(agg_ir), expr.dtype)
File ~/mambaforge/lib/python3.9/site-packages/hail/backend/py4j_backend.py:99, in Py4JBackend.execute(self, ir, timed)
97 try:
98 result_tuple = self._jbackend.executeEncode(jir, stream_codec)
---> 99 (result, timings) = (result_tuple._1(), result_tuple._2())
100 value = ir.typ._from_encoding(result)
102 return (value, timings) if timed else value
File ~/mambaforge/lib/python3.9/site-packages/py4j/java_gateway.py:1304, in JavaMember.__call__(self, *args)
1298 command = proto.CALL_COMMAND_NAME +\
1299 self.command_header +\
1300 args_command +\
1301 proto.END_COMMAND_PART
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1307 for temp_arg in temp_args:
1308 temp_arg._detach()
File ~/mambaforge/lib/python3.9/site-packages/hail/backend/py4j_backend.py:21, in handle_java_exception.<locals>.deco(*args, **kwargs)
19 import pyspark
20 try:
---> 21 return f(*args, **kwargs)
22 except py4j.protocol.Py4JJavaError as e:
23 s = e.java_exception.toString()
File ~/mambaforge/lib/python3.9/site-packages/py4j/protocol.py:330, in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
--> 330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
333 else:
334 raise Py4JError(
335 "An error occurred while calling {0}{1}{2}".
336 format(target_id, ".", name))
Py4JError: An error occurred while calling o83._1. Trace:
java.lang.NegativeArraySizeException: -1966455376
at py4j.Base64.encodeToChar(Base64.java:681)
at py4j.Base64.encodeToString(Base64.java:734)
at py4j.Protocol.encodeBytes(Protocol.java:154)
at py4j.ReturnObject.getPrimitiveReturnObject(ReturnObject.java:150)
at py4j.Gateway.getReturnObject(Gateway.java:188)
at py4j.Gateway.invoke(Gateway.java:283)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:829)
Sorted