spark-search
spark-search copied to clipboard
Indexation fails with NoSuchFileException while indexing huge data
For small amounts of data not getting this issue. But if we try to process huge data, then we get the following exception. Any guidance here could be great help.
22/09/16 08:17:34 WARN org.apache.spark.scheduler.TaskSetManager: Lost task 32.0 in stage 47.0 (TID 1449) (offerexposure-cluster-naidu-1-w-10.c.wmt-mtech-offerexposure-stg.internal executor 1): org.apache.spark.search.SearchException: indexation failed on partition 1 and directory /tmp/spark-search/application_1663309524066_0008-sparksearch-rdd149-index-1
at org.apache.spark.search.rdd.SearchPartitionIndex.monitorIndexation(SearchPartitionIndex.java:145)
at org.apache.spark.search.rdd.SearchPartitionIndex.index(SearchPartitionIndex.java:82)
at org.apache.spark.search.rdd.SearchRDDIndexer.compute(SearchRDDIndexer.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.$anonfun$getOrCompute$1(RDD.scala:386)
at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$6(BlockManager.scala:1461)
at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$6$adapted(BlockManager.scala:1459)
at org.apache.spark.storage.DiskStore.put(DiskStore.scala:70)
at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1459)
at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1350)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1414)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1237)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:335)
at org.apache.spark.search.rdd.SearchRDDCartesian.compute(SearchRDDCartesian.scala:54)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:498)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:501)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by: java.nio.file.NoSuchFileException: /tmp/spark-search/application_1663309524066_0008-sparksearch-rdd149-index-1/pending_segments_1
at sun.nio.fs.UnixException.translateToIOException(UnixException.java:86)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107)
at sun.nio.fs.UnixFileSystemProvider.implDelete(UnixFileSystemProvider.java:244)
at sun.nio.fs.AbstractFileSystemProvider.delete(AbstractFileSystemProvider.java:103)
at java.nio.file.Files.delete(Files.java:1126)
at org.apache.lucene.store.FSDirectory.privateDeleteFile(FSDirectory.java:370)
at org.apache.lucene.store.FSDirectory.deleteFile(FSDirectory.java:339)
at org.apache.lucene.store.LockValidatingDirectoryWrapper.deleteFile(LockValidatingDirectoryWrapper.java:38)
at org.apache.lucene.index.IndexFileDeleter.deleteFile(IndexFileDeleter.java:705)
at org.apache.lucene.index.IndexFileDeleter.deleteFiles(IndexFileDeleter.java:699)
at org.apache.lucene.index.IndexFileDeleter.<init>(IndexFileDeleter.java:238)
at org.apache.lucene.index.IndexWriter.<init>(IndexWriter.java:1089)
at org.apache.spark.search.rdd.SearchPartitionIndex.lambda$index$1(SearchPartitionIndex.java:90)
at org.apache.spark.search.rdd.SearchPartitionIndex.monitorIndexation(SearchPartitionIndex.java:128)
... 29 more
Thanks, Naidu