glow icon indicating copy to clipboard operation
glow copied to clipboard

Python tests fail with KeyError: '_glow_regression_values'

Open a0x8o opened this issue 2 years ago • 1 comments

                  py4j.protocol.Py4JJavaError: An error occurred while calling o1740.getResult.
E                   : org.apache.spark.SparkException: Exception thrown in awaitResult: 
E                   	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
E                   	at org.apache.spark.security.SocketAuthServer.getResult(SocketAuthServer.scala:97)
E                   	at org.apache.spark.security.SocketAuthServer.getResult(SocketAuthServer.scala:93)
E                   	at sun.reflect.GeneratedMethodAccessor118.invoke(Unknown Source)
E                   	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
E                   	at java.lang.reflect.Method.invoke(Method.java:498)
E                   	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
E                   	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
E                   	at py4j.Gateway.invoke(Gateway.java:282)
E                   	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
E                   	at py4j.commands.CallCommand.execute(CallCommand.java:79)
E                   	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
E                   	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
E                   	at java.lang.Thread.run(Thread.java:750)
E                   Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 81.0 failed 1 times, most recent failure: Lost task 1.0 in stage 81.0 (TID 127) (74bb240623d8 executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
E                     File "/home/circleci/glow/python/glow/gwas/lin_reg.py", line 276, in map_func
E                       yield gwas_fx._loco_dispatch(pdf, Y_state, _linear_regression_inner, Y_mask, Y_scale, Q,
E                     File "/home/circleci/glow/python/glow/gwas/functions.py", line 97, in _loco_dispatch
E                       return genotype_pdf.groupby('contigName', sort=False, as_index=False)\
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/groupby.py", line 1286, in apply
E                       return self._python_apply_general(f, self._selected_obj)
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/groupby.py", line 1309, in _python_apply_general
E                       keys, values, mutated = self.grouper.apply(f, data, self.axis)
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/ops.py", line 820, in apply
E                       pass
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/ops.py", line 1360, in fast_apply
E                       return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
E                     File "pandas/_libs/reduction.pyx", line 381, in pandas._libs.reduction.apply_frame_axis0
E                     File "/home/circleci/glow/python/glow/gwas/functions.py", line 98, in <lambda>
E                       .apply(lambda pdf: f(pdf, state[pdf['contigName'].iloc[0]], *args))
E                     File "/home/circleci/glow/python/glow/gwas/lin_reg.py", line 223, in _linear_regression_inner
E                       genotype_values = genotype_pdf[_VALUES_COLUMN_NAME].array
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/frame.py", line 3458, in __getitem__
E                       indexer = self.columns.get_loc(key)
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3363, in get_loc
E                       raise KeyError(key) from err
E                   KeyError: '_glow_regression_values'
E                   
E                   	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:555)
E                   	at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:101)
E                   	at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:50)
E                   	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:508)
E                   	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
E                   	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
E                   	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
E                   	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
E                   	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
E                   	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759)
E                   	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99)
E                   	at scala.collection.Iterator.foreach(Iterator.scala:941)
E                   	at scala.collection.Iterator.foreach$(Iterator.scala:941)
E                   	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97)
E                   	at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
E                   	at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
E                   	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
E                   	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
E                   	at scala.collection.TraversableOnce.to(TraversableOnce.scala:313)
E                   	at scala.collection.TraversableOnce.to$(TraversableOnce.scala:311)
E                   	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97)
E                   	at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:305)
E                   	at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:305)
E                   	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97)
E                   	at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:292)
E                   	at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:286)
E                   	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97)
E                   	at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$6(Dataset.scala:3650)
E                   	at org.apache.spark.SparkContext.$anonfun$runJob$6(SparkContext.scala:2308)
E                   	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
E                   	at org.apache.spark.scheduler.Task.run(Task.scala:131)
E                   	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
E                   	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
E                   	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
E                   	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
E                   	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
E                   	at java.lang.Thread.run(Thread.java:750)
E                   
E                   Driver stacktrace:
E                   	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2454)
E                   	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2403)
E                   	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2402)
E                   	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
E                   	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
E                   	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
E                   	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2402)
E                   	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1160)
E                   	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1160)
E                   	at scala.Option.foreach(Option.scala:274)
E                   	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1160)
E                   	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2642)
E                   	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2584)
E                   	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2573)
E                   	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
E                   	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938)
E                   	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214)
E                   	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2309)
E                   	at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$5(Dataset.scala:3648)
E                   	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
E                   	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
E                   	at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$2(Dataset.scala:3652)
E                   	at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$2$adapted(Dataset.scala:3629)
E                   	at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3706)
E                   	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
E                   	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
E                   	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
E                   	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
E                   	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
E                   	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3704)
E                   	at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$1(Dataset.scala:3629)
E                   	at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$1$adapted(Dataset.scala:3628)
E                   	at org.apache.spark.security.SocketAuthServer$.$anonfun$serveToStream$2(SocketAuthServer.scala:139)
E                   	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
E                   	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
E                   	at org.apache.spark.security.SocketAuthServer$.$anonfun$serveToStream$1(SocketAuthServer.scala:141)
E                   	at org.apache.spark.security.SocketAuthServer$.$anonfun$serveToStream$1$adapted(SocketAuthServer.scala:136)
E                   	at org.apache.spark.security.SocketFuncServer.handleConnection(SocketAuthServer.scala:113)
E                   	at org.apache.spark.security.SocketFuncServer.handleConnection(SocketAuthServer.scala:107)
E                   	at org.apache.spark.security.SocketAuthServer$$anon$1.$anonfun$run$4(SocketAuthServer.scala:68)
E                   	at scala.util.Try$.apply(Try.scala:213)
E                   	at org.apache.spark.security.SocketAuthServer$$anon$1.run(SocketAuthServer.scala:68)
E                   Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
E                     File "/home/circleci/glow/python/glow/gwas/lin_reg.py", line 276, in map_func
E                       yield gwas_fx._loco_dispatch(pdf, Y_state, _linear_regression_inner, Y_mask, Y_scale, Q,
E                     File "/home/circleci/glow/python/glow/gwas/functions.py", line 97, in _loco_dispatch
E                       return genotype_pdf.groupby('contigName', sort=False, as_index=False)\
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/groupby.py", line 1286, in apply
E                       return self._python_apply_general(f, self._selected_obj)
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/groupby.py", line 1309, in _python_apply_general
E                       keys, values, mutated = self.grouper.apply(f, data, self.axis)
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/ops.py", line 820, in apply
E                       pass
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/ops.py", line 1360, in fast_apply
E                       return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
E                     File "pandas/_libs/reduction.pyx", line 381, in pandas._libs.reduction.apply_frame_axis0
E                     File "/home/circleci/glow/python/glow/gwas/functions.py", line 98, in <lambda>
E                       .apply(lambda pdf: f(pdf, state[pdf['contigName'].iloc[0]], *args))
E                     File "/home/circleci/glow/python/glow/gwas/lin_reg.py", line 223, in _linear_regression_inner
E                       genotype_values = genotype_pdf[_VALUES_COLUMN_NAME].array
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/frame.py", line 3458, in __getitem__
E                       indexer = self.columns.get_loc(key)
E                     File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3363, in get_loc
E                       raise KeyError(key) from err
E                   KeyError: '_glow_regression_values'
E                   
E                   	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:555)
E                   	at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:101)
E                   	at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:50)
E                   	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:508)
E                   	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
E                   	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
E                   	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
E                   	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
E                   	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
E                   	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759)
E                   	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99)
E                   	at scala.collection.Iterator.foreach(Iterator.scala:941)
E                   	at scala.collection.Iterator.foreach$(Iterator.scala:941)
E                   	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97)
E                   	at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
E                   	at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
E                   	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
E                   	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
E                   	at scala.collection.TraversableOnce.to(TraversableOnce.scala:313)
E                   	at scala.collection.TraversableOnce.to$(TraversableOnce.scala:311)
E                   	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97)
E                   	at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:305)
E                   	at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:305)
E                   	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97)
E                   	at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:292)
E                   	at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:286)
E                   	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97)
E                   	at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$6(Dataset.scala:3650)
E                   	at org.apache.spark.SparkContext.$anonfun$runJob$6(SparkContext.scala:2308)
E                   	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
E                   	at org.apache.spark.scheduler.Task.run(Task.scala:131)
E                   	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
E                   	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
E                   	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
E                   	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
E                   	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
E                   	at java.lang.Thread.run(Thread.java:750)

../conda/envs/glow/lib/python3.8/site-packages/py4j/protocol.py:326: Py4JJavaError

a0x8o avatar Feb 03 '23 15:02 a0x8o