hail icon indicating copy to clipboard operation
hail copied to clipboard

Java heapspace error in to_pandas

Open jerome-f opened this issue 1 year ago • 1 comments

To report a bug, fill in the information below. For support and feature requests, please use the discussion forum: https://discuss.hail.is/

Please include the full Hail version and as much detail as possible.


Reposting this here from the Forum

https://discuss.hail.is/t/getting-java-heap-error-tried-a-bunch-of-things-with-the-executor-and-memory-settings/2753

jerome-f avatar Jul 15 '22 22:07 jerome-f

I did some more tweaking with the memory and got a new exception

 ---------------------------------------------------------------------------
Py4JError                                 Traceback (most recent call last)
Input In [3], in <cell line: 2>()
      1 ## Import scores  table
----> 2 score_pd = ht_score.to_pandas()

File <decorator-gen-1091>:2, in to_pandas(self, flatten)

File ~/mambaforge/lib/python3.9/site-packages/hail/typecheck/check.py:577, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
    574 @decorator
    575 def wrapper(__original_func, *args, **kwargs):
    576     args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577     return __original_func(*args_, **kwargs_)

File ~/mambaforge/lib/python3.9/site-packages/hail/table.py:3340, in Table.to_pandas(self, flatten)
   3338 dtypes_struct = table.row.dtype
   3339 collect_dict = {key: hl.agg.collect(value) for key, value in table.row.items()}
-> 3340 column_struct_array = table.aggregate(hl.struct(**collect_dict))
   3341 columns = list(column_struct_array.keys())
   3342 data_dict = {}

File <decorator-gen-1037>:2, in aggregate(self, expr, _localize)

File ~/mambaforge/lib/python3.9/site-packages/hail/typecheck/check.py:577, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
    574 @decorator
    575 def wrapper(__original_func, *args, **kwargs):
    576     args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577     return __original_func(*args_, **kwargs_)

File ~/mambaforge/lib/python3.9/site-packages/hail/table.py:1231, in Table.aggregate(self, expr, _localize)
   1228 agg_ir = ir.TableAggregate(base._tir, expr._ir)
   1230 if _localize:
-> 1231     return Env.backend().execute(hl.ir.MakeTuple([agg_ir]))[0]
   1233 return construct_expr(ir.LiftMeOut(agg_ir), expr.dtype)

File ~/mambaforge/lib/python3.9/site-packages/hail/backend/py4j_backend.py:99, in Py4JBackend.execute(self, ir, timed)
     97 try:
     98     result_tuple = self._jbackend.executeEncode(jir, stream_codec)
---> 99     (result, timings) = (result_tuple._1(), result_tuple._2())
    100     value = ir.typ._from_encoding(result)
    102     return (value, timings) if timed else value

File ~/mambaforge/lib/python3.9/site-packages/py4j/java_gateway.py:1304, in JavaMember.__call__(self, *args)
   1298 command = proto.CALL_COMMAND_NAME +\
   1299     self.command_header +\
   1300     args_command +\
   1301     proto.END_COMMAND_PART
   1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
   1305     answer, self.gateway_client, self.target_id, self.name)
   1307 for temp_arg in temp_args:
   1308     temp_arg._detach()

File ~/mambaforge/lib/python3.9/site-packages/hail/backend/py4j_backend.py:21, in handle_java_exception.<locals>.deco(*args, **kwargs)
     19 import pyspark
     20 try:
---> 21     return f(*args, **kwargs)
     22 except py4j.protocol.Py4JJavaError as e:
     23     s = e.java_exception.toString()

File ~/mambaforge/lib/python3.9/site-packages/py4j/protocol.py:330, in get_return_value(answer, gateway_client, target_id, name)
    326         raise Py4JJavaError(
    327             "An error occurred while calling {0}{1}{2}.\n".
    328             format(target_id, ".", name), value)
    329     else:
--> 330         raise Py4JError(
    331             "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
    332             format(target_id, ".", name, value))
    333 else:
    334     raise Py4JError(
    335         "An error occurred while calling {0}{1}{2}".
    336         format(target_id, ".", name))

Py4JError: An error occurred while calling o83._1. Trace:
java.lang.NegativeArraySizeException: -1966455376
	at py4j.Base64.encodeToChar(Base64.java:681)
	at py4j.Base64.encodeToString(Base64.java:734)
	at py4j.Protocol.encodeBytes(Protocol.java:154)
	at py4j.ReturnObject.getPrimitiveReturnObject(ReturnObject.java:150)
	at py4j.Gateway.getReturnObject(Gateway.java:188)
	at py4j.Gateway.invoke(Gateway.java:283)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.base/java.lang.Thread.run(Thread.java:829)

jerome-f avatar Jul 15 '22 23:07 jerome-f

Sorted

jerome-f avatar Aug 26 '22 15:08 jerome-f