djl icon indicating copy to clipboard operation
djl copied to clipboard

ai.djl.engine.EngineException: Failed to load PyTorch native library

Open shutter-cp opened this issue 2 years ago • 5 comments

23/02/01 16:09:44 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 2)
ai.djl.engine.EngineException: Failed to load PyTorch native library
	at ai.djl.pytorch.engine.PtEngine.newInstance(PtEngine.java:85)
	at ai.djl.pytorch.engine.PtEngineProvider.getEngine(PtEngineProvider.java:40)
	at ai.djl.engine.Engine.getEngine(Engine.java:186)
	at ai.djl.Model.newInstance(Model.java:99)
	at ai.djl.repository.zoo.BaseModelLoader.createModel(BaseModelLoader.java:189)
	at ai.djl.repository.zoo.BaseModelLoader.loadModel(BaseModelLoader.java:152)
	at ai.djl.repository.zoo.Criteria.loadModel(Criteria.java:168)
	at net.xxx.ai.recommender.core.NERModel$.loadModel(NERModel.scala:39)
	at net.xxx.ai.recommender.core.NERModel$.model$lzycompute$1(NERModel.scala:64)
	at net.xxx.ai.recommender.core.NERModel$.net$qihoo$ai$recommender$core$NERModel$$model$1(NERModel.scala:64)
	at net.xxx.ai.recommender.core.NERModel$$anonfun$3.apply(NERModel.scala:68)
	at net.xxx.ai.recommender.core.NERModel$$anonfun$3.apply(NERModel.scala:66)
	at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
	at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:868)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:868)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:415)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1403)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:421)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.UnsatisfiedLinkError: /data01/home/yarn/.djl.ai/pytorch/1.9.1-cpu-linux-x86_64/libc10.so: /usr/lib64/libstdc++.so.6: version `GLIBCXX_3.4.21' not found (required by /data01/home/yarn/.djl.ai/pytorch/1.9.1-cpu-linux-x86_64/libc10.so)
	at java.lang.ClassLoader$NativeLibrary.load(Native Method)
	at java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1938)
	at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1821)
	at java.lang.Runtime.load0(Runtime.java:809)
	at java.lang.System.load(System.java:1086)
	at ai.djl.pytorch.jni.LibUtils.loadNativeLibrary(LibUtils.java:368)
	at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184)
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
	at java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:175)
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
	at java.util.Iterator.forEachRemaining(Iterator.java:116)
	at java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
	at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:151)
	at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:174)
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
	at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418)
	at ai.djl.pytorch.jni.LibUtils.loadLibTorch(LibUtils.java:146)
	at ai.djl.pytorch.jni.LibUtils.loadLibrary(LibUtils.java:78)
	at ai.djl.pytorch.engine.PtEngine.newInstance(PtEngine.java:54)
	... 29 more
23/02/01 16:09:44 ERROR Executor: Exception in task 1.0 in stage 3.0 (TID 3)
ai.djl.engine.EngineException: Failed to load PyTorch native library
	at ai.djl.pytorch.engine.PtEngine.newInstance(PtEngine.java:85)
	at ai.djl.pytorch.engine.PtEngineProvider.getEngine(PtEngineProvider.java:40)
	at ai.djl.engine.Engine.getEngine(Engine.java:186)
	at ai.djl.Model.newInstance(Model.java:99)
	at ai.djl.repository.zoo.BaseModelLoader.createModel(BaseModelLoader.java:189)
	at ai.djl.repository.zoo.BaseModelLoader.loadModel(BaseModelLoader.java:152)
	at ai.djl.repository.zoo.Criteria.loadModel(Criteria.java:168)
	at net.xxx.ai.recommender.core.NERModel$.loadModel(NERModel.scala:39)
	at net.xxx.ai.recommender.core.NERModel$.model$lzycompute$1(NERModel.scala:64)
	at net.xxx.ai.recommender.core.NERModel$.net$qihoo$ai$recommender$core$NERModel$$model$1(NERModel.scala:64)
	at net.xxx.ai.recommender.core.NERModel$$anonfun$3.apply(NERModel.scala:68)
	at net.xxx.ai.recommender.core.NERModel$$anonfun$3.apply(NERModel.scala:66)
	at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
	at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:868)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:868)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:415)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1403)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:421)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.UnsatisfiedLinkError: /data01/home/yarn/.djl.ai/pytorch/1.9.1-cpu-linux-x86_64/libc10.so: /usr/lib64/libstdc++.so.6: version `GLIBCXX_3.4.21' not found (required by /data01/home/yarn/.djl.ai/pytorch/1.9.1-cpu-linux-x86_64/libc10.so)
	at java.lang.ClassLoader$NativeLibrary.load(Native Method)
	at java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1938)
	at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1821)
	at java.lang.Runtime.load0(Runtime.java:809)
	at java.lang.System.load(System.java:1086)
	at ai.djl.pytorch.jni.LibUtils.loadNativeLibrary(LibUtils.java:368)
	at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184)
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
	at java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:175)
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
	at java.util.Iterator.forEachRemaining(Iterator.java:116)
	at java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
	at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:151)
	at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:174)
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
	at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418)
	at ai.djl.pytorch.jni.LibUtils.loadLibTorch(LibUtils.java:146)
	at ai.djl.pytorch.jni.LibUtils.loadLibrary(LibUtils.java:78)
	at ai.djl.pytorch.engine.PtEngine.newInstance(PtEngine.java:54)
	... 29 more
23/02/01 16:10:16 ERROR CoarseGrainedExecutorBackend: RECEIVED SIGNAL TERM

shutter-cp avatar Feb 01 '23 08:02 shutter-cp