glow
glow copied to clipboard
Python tests fail with KeyError: '_glow_regression_values'
py4j.protocol.Py4JJavaError: An error occurred while calling o1740.getResult.
E : org.apache.spark.SparkException: Exception thrown in awaitResult:
E at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
E at org.apache.spark.security.SocketAuthServer.getResult(SocketAuthServer.scala:97)
E at org.apache.spark.security.SocketAuthServer.getResult(SocketAuthServer.scala:93)
E at sun.reflect.GeneratedMethodAccessor118.invoke(Unknown Source)
E at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
E at java.lang.reflect.Method.invoke(Method.java:498)
E at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
E at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
E at py4j.Gateway.invoke(Gateway.java:282)
E at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
E at py4j.commands.CallCommand.execute(CallCommand.java:79)
E at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
E at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
E at java.lang.Thread.run(Thread.java:750)
E Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 81.0 failed 1 times, most recent failure: Lost task 1.0 in stage 81.0 (TID 127) (74bb240623d8 executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
E File "/home/circleci/glow/python/glow/gwas/lin_reg.py", line 276, in map_func
E yield gwas_fx._loco_dispatch(pdf, Y_state, _linear_regression_inner, Y_mask, Y_scale, Q,
E File "/home/circleci/glow/python/glow/gwas/functions.py", line 97, in _loco_dispatch
E return genotype_pdf.groupby('contigName', sort=False, as_index=False)\
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/groupby.py", line 1286, in apply
E return self._python_apply_general(f, self._selected_obj)
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/groupby.py", line 1309, in _python_apply_general
E keys, values, mutated = self.grouper.apply(f, data, self.axis)
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/ops.py", line 820, in apply
E pass
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/ops.py", line 1360, in fast_apply
E return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
E File "pandas/_libs/reduction.pyx", line 381, in pandas._libs.reduction.apply_frame_axis0
E File "/home/circleci/glow/python/glow/gwas/functions.py", line 98, in <lambda>
E .apply(lambda pdf: f(pdf, state[pdf['contigName'].iloc[0]], *args))
E File "/home/circleci/glow/python/glow/gwas/lin_reg.py", line 223, in _linear_regression_inner
E genotype_values = genotype_pdf[_VALUES_COLUMN_NAME].array
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/frame.py", line 3458, in __getitem__
E indexer = self.columns.get_loc(key)
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3363, in get_loc
E raise KeyError(key) from err
E KeyError: '_glow_regression_values'
E
E at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:555)
E at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:101)
E at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:50)
E at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:508)
E at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
E at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
E at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
E at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
E at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
E at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759)
E at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99)
E at scala.collection.Iterator.foreach(Iterator.scala:941)
E at scala.collection.Iterator.foreach$(Iterator.scala:941)
E at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97)
E at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
E at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
E at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
E at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
E at scala.collection.TraversableOnce.to(TraversableOnce.scala:313)
E at scala.collection.TraversableOnce.to$(TraversableOnce.scala:311)
E at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97)
E at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:305)
E at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:305)
E at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97)
E at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:292)
E at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:286)
E at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97)
E at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$6(Dataset.scala:3650)
E at org.apache.spark.SparkContext.$anonfun$runJob$6(SparkContext.scala:2308)
E at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
E at org.apache.spark.scheduler.Task.run(Task.scala:131)
E at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
E at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
E at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
E at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
E at java.lang.Thread.run(Thread.java:750)
E
E Driver stacktrace:
E at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2454)
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2403)
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2402)
E at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
E at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
E at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
E at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2402)
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1160)
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1160)
E at scala.Option.foreach(Option.scala:274)
E at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1160)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2642)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2584)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2573)
E at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
E at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938)
E at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214)
E at org.apache.spark.SparkContext.runJob(SparkContext.scala:2309)
E at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$5(Dataset.scala:3648)
E at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
E at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$2(Dataset.scala:3652)
E at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$2$adapted(Dataset.scala:3629)
E at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3706)
E at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
E at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
E at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
E at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
E at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
E at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3704)
E at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$1(Dataset.scala:3629)
E at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$1$adapted(Dataset.scala:3628)
E at org.apache.spark.security.SocketAuthServer$.$anonfun$serveToStream$2(SocketAuthServer.scala:139)
E at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
E at org.apache.spark.security.SocketAuthServer$.$anonfun$serveToStream$1(SocketAuthServer.scala:141)
E at org.apache.spark.security.SocketAuthServer$.$anonfun$serveToStream$1$adapted(SocketAuthServer.scala:136)
E at org.apache.spark.security.SocketFuncServer.handleConnection(SocketAuthServer.scala:113)
E at org.apache.spark.security.SocketFuncServer.handleConnection(SocketAuthServer.scala:107)
E at org.apache.spark.security.SocketAuthServer$$anon$1.$anonfun$run$4(SocketAuthServer.scala:68)
E at scala.util.Try$.apply(Try.scala:213)
E at org.apache.spark.security.SocketAuthServer$$anon$1.run(SocketAuthServer.scala:68)
E Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
E File "/home/circleci/glow/python/glow/gwas/lin_reg.py", line 276, in map_func
E yield gwas_fx._loco_dispatch(pdf, Y_state, _linear_regression_inner, Y_mask, Y_scale, Q,
E File "/home/circleci/glow/python/glow/gwas/functions.py", line 97, in _loco_dispatch
E return genotype_pdf.groupby('contigName', sort=False, as_index=False)\
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/groupby.py", line 1286, in apply
E return self._python_apply_general(f, self._selected_obj)
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/groupby.py", line 1309, in _python_apply_general
E keys, values, mutated = self.grouper.apply(f, data, self.axis)
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/ops.py", line 820, in apply
E pass
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/groupby/ops.py", line 1360, in fast_apply
E return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
E File "pandas/_libs/reduction.pyx", line 381, in pandas._libs.reduction.apply_frame_axis0
E File "/home/circleci/glow/python/glow/gwas/functions.py", line 98, in <lambda>
E .apply(lambda pdf: f(pdf, state[pdf['contigName'].iloc[0]], *args))
E File "/home/circleci/glow/python/glow/gwas/lin_reg.py", line 223, in _linear_regression_inner
E genotype_values = genotype_pdf[_VALUES_COLUMN_NAME].array
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/frame.py", line 3458, in __getitem__
E indexer = self.columns.get_loc(key)
E File "/home/circleci/conda/envs/glow/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3363, in get_loc
E raise KeyError(key) from err
E KeyError: '_glow_regression_values'
E
E at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:555)
E at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:101)
E at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:50)
E at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:508)
E at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
E at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
E at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
E at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
E at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
E at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759)
E at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99)
E at scala.collection.Iterator.foreach(Iterator.scala:941)
E at scala.collection.Iterator.foreach$(Iterator.scala:941)
E at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97)
E at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
E at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
E at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
E at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
E at scala.collection.TraversableOnce.to(TraversableOnce.scala:313)
E at scala.collection.TraversableOnce.to$(TraversableOnce.scala:311)
E at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97)
E at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:305)
E at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:305)
E at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97)
E at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:292)
E at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:286)
E at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97)
E at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToPython$6(Dataset.scala:3650)
E at org.apache.spark.SparkContext.$anonfun$runJob$6(SparkContext.scala:2308)
E at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
E at org.apache.spark.scheduler.Task.run(Task.scala:131)
E at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
E at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
E at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
E at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
E at java.lang.Thread.run(Thread.java:750)
../conda/envs/glow/lib/python3.8/site-packages/py4j/protocol.py:326: Py4JJavaError