Fregata icon indicating copy to clipboard operation
Fregata copied to clipboard

fail to train model with java.lang.NullPointerExce ption (null)

Open wangxiangbupt opened this issue 8 years ago • 2 comments

17/02/10 18:20:31 INFO TaskSetManager: Lost task 345.2 in stage 1.0 (TID 18697) on executor worker8.spark.training.m.com: java.lang.NullPointerException (null) [duplicate 908] Exception in thread "main" 17/02/10 18:20:31 INFO TaskSetManager: Lost task 141.2 in stage 1.0 (TID 18698) on executor worker3.spark.training.m.com: java.lang.NullPointerExce ption (null) [duplicate 909] org.apache.spark.SparkException: Job aborted due to stage failure: Task 370 in stage 1.0 failed 4 times, most recent failure: Lost task 370.3 in stage 1.0 (TID 18666, worker4 .spark.training.m.com): java.lang.NullPointerException at fregata.model.classification.LogisticRegression.run(LogisticRegression.scala:83) at fregata.model.classification.LogisticRegression.run(LogisticRegression.scala:73) at fregata.model.ModelTrainer$$anonfun$run$1.apply$mcVI$sp(ModelTrainer.scala:21) at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141) at fregata.model.ModelTrainer$class.run(ModelTrainer.scala:19) at fregata.model.classification.LogisticRegression.run(LogisticRegression.scala:73) at fregata.spark.model.SparkTrainer$$anonfun$1.apply(SparkTrainer.scala:26) at fregata.spark.model.SparkTrainer$$anonfun$1.apply(SparkTrainer.scala:24) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)

Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1952) at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1025) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at org.apache.spark.rdd.RDD.reduce(RDD.scala:1007) at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1.apply(RDD.scala:1150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at org.apache.spark.rdd.RDD.treeAggregate(RDD.scala:1127) at org.apache.spark.rdd.RDD$$anonfun$treeReduce$1.apply(RDD.scala:1058) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at org.apache.spark.rdd.RDD.treeReduce(RDD.scala:1036) at fregata.spark.model.SparkTrainer.run(SparkTrainer.scala:28) at fregata.spark.model.SparkTrainer$$anonfun$run$1.apply$mcVI$sp(SparkTrainer.scala:15) at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141) at fregata.spark.model.SparkTrainer.run(SparkTrainer.scala:13) at fregata.spark.model.classification.LogisticRegression$.run(LogisticRegression.scala:29) at com.meitu.rec.longTermRecTest$.fregata_lr(longTermRecTest.scala:69) at com.meitu.rec.longTermRecTest$.run(longTermRecTest.scala:59) at com.meitu.rec.longTermRecTest$$anonfun$main$1.apply(longTermRecTest.scala:49) at com.meitu.rec.longTermRecTest$$anonfun$main$1.apply(longTermRecTest.scala:48) at scala.Option.map(Option.scala:145) at com.meitu.rec.longTermRecTest$.main(longTermRecTest.scala:48) at com.meitu.rec.longTermRecTest.main(longTermRecTest.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.NullPointerException at fregata.model.classification.LogisticRegression.run(LogisticRegression.scala:83) at fregata.model.classification.LogisticRegression.run(LogisticRegression.scala:73) at fregata.model.ModelTrainer$$anonfun$run$1.apply$mcVI$sp(ModelTrainer.scala:21) at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141) at fregata.model.ModelTrainer$class.run(ModelTrainer.scala:19) at fregata.model.classification.LogisticRegression.run(LogisticRegression.scala:73) at fregata.spark.model.SparkTrainer$$anonfun$1.apply(SparkTrainer.scala:26) at fregata.spark.model.SparkTrainer$$anonfun$1.apply(SparkTrainer.scala:24) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)

wangxiangbupt avatar Feb 10 '17 10:02 wangxiangbupt

This maybe the bug https://github.com/TalkingData/Fregata/commit/b004ca50f4784a2879bbb46cf895412d263049aa fixed . Could you try this code ?

takun2s avatar Feb 14 '17 01:02 takun2s

i got the same error, may i know what is the fix

msantap avatar Apr 17 '19 23:04 msantap