telemetry-analysis-service
telemetry-analysis-service copied to clipboard
No module named pyspark
From https://bugzilla.mozilla.org/show_bug.cgi?id=1373631:
Py4JJavaErrorTraceback (most recent call last)
/usr/lib/spark/python/pyspark/rdd.py in count(self) 1006 3 1007 """ -> 1008 return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum() 1009 1010 def stats(self):
/usr/lib/spark/python/pyspark/rdd.py in sum(self) 997 6.0 998 """ --> 999 return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add) 1000 1001 def count(self):
/usr/lib/spark/python/pyspark/rdd.py in fold(self, zeroValue, op) 871 # zeroValue provided to each partition is unique from the one provided 872 # to the final reduce call --> 873 vals = self.mapPartitions(func).collect() 874 return reduce(op, vals, zeroValue) 875
/usr/lib/spark/python/pyspark/rdd.py in collect(self) 774 """ 775 with SCCallSiteSync(self.context) as css: --> 776 port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd()) 777 return list(_load_from_socket(port, self._jrdd_deserializer)) 778
/usr/lib/spark/python/lib/py4j-0.10.3-src.zip/py4j/java_gateway.py in call(self, *args) 1131 answer = self.gateway_client.send_command(command) 1132 return_value = get_return_value( -> 1133 answer, self.gateway_client, self.target_id, self.name) 1134 1135 for temp_arg in temp_args:
/usr/lib/spark/python/pyspark/sql/utils.py in deco(*a, **kw) 61 def deco(*a, **kw): 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString()
/usr/lib/spark/python/lib/py4j-0.10.3-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 317 raise Py4JJavaError( 318 "An error occurred while calling {0}{1}{2}.\n". --> 319 format(target_id, ".", name), value) 320 else: 321 raise Py4JError(
Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 181 in stage 41.1 failed 4 times, most recent failure: Lost task 181.3 in stage 41.1 (TID 68634, ip-172-31-10-113.us-west-2.compute.internal): org.apache.spark.SparkException: Error from python worker: /mnt/anaconda2/bin/python: No module named pyspark PYTHONPATH was: /mnt/yarn/usercache/hadoop/filecache/12/__spark_libs__4881124833548485594.zip/spark-core_2.11-2.0.2.jar:/usr/lib/spark/python/lib/py4j-0.10.3-src.zip:/usr/lib/spark/python/:/usr/lib/spark/python/:/mnt1/yarn/usercache/hadoop/appcache/application_1497282355037_0001/container_1497282355037_0001_02_000264/pyspark.zip:/mnt1/yarn/usercache/hadoop/appcache/application_1497282355037_0001/container_1497282355037_0001_02_000264/py4j-0.10.3-src.zip java.io.EOFException at java.io.DataInputStream.readInt(DataInputStream.java:392) at org.apache.spark.api.python.PythonWorkerFactory.startDaemon(PythonWorkerFactory.scala:166) at org.apache.spark.api.python.PythonWorkerFactory.createThroughDaemon(PythonWorkerFactory.scala:89) at org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:65) at org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:114) at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:128) at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:63) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319) at org.apache.spark.rdd.RDD.iterator(RDD.scala:283) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) at org.apache.spark.scheduler.Task.run(Task.scala:86) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745)
Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1442) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1441) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1667) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1622) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1611) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1873) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1886) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1899) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1913) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:912) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:358) at org.apache.spark.rdd.RDD.collect(RDD.scala:911) at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:453) at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:280) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:214) at java.lang.Thread.run(Thread.java:745) Caused by: org.apache.spark.SparkException: Error from python worker: /mnt/anaconda2/bin/python: No module named pyspark PYTHONPATH was: /mnt/yarn/usercache/hadoop/filecache/12/__spark_libs__4881124833548485594.zip/spark-core_2.11-2.0.2.jar:/usr/lib/spark/python/lib/py4j-0.10.3-src.zip:/usr/lib/spark/python/:/usr/lib/spark/python/:/mnt1/yarn/usercache/hadoop/appcache/application_1497282355037_0001/container_1497282355037_0001_02_000264/pyspark.zip:/mnt1/yarn/usercache/hadoop/appcache/application_1497282355037_0001/container_1497282355037_0001_02_000264/py4j-0.10.3-src.zip java.io.EOFException at java.io.DataInputStream.readInt(DataInputStream.java:392) at org.apache.spark.api.python.PythonWorkerFactory.startDaemon(PythonWorkerFactory.scala:166) at org.apache.spark.api.python.PythonWorkerFactory.createThroughDaemon(PythonWorkerFactory.scala:89) at org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:65) at org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:114) at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:128) at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:63) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319) at org.apache.spark.rdd.RDD.iterator(RDD.scala:283) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) at org.apache.spark.scheduler.Task.run(Task.scala:86) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ... 1 more