spark-state-store
spark-state-store copied to clipboard
Does it support HDFS as state store ?
would like to know, does it support hdfs as state store ?
im getting this while using yarn mode
22/03/19 19:09:02 WARN TaskSetManager: Lost task 3.0 in stage 2.0 (TID 20) (ip-10-0-0-180.ec2.internal executor 2): org.apache.spark.util.TaskCompletionListenerException: requirement failed: No DB to close
Previous exception in task: Couldn't create directory /home/hadoop/state/db/state_962669983/0/3
org.apache.spark.sql.execution.streaming.state.RocksDbStateStoreProvider.getPath(RocksDbStateStoreProvider.scala:640)
org.apache.spark.sql.execution.streaming.state.RocksDbStateStoreProvider.rocksDbPath$lzycompute(RocksDbStateStoreProvider.scala:616)
org.apache.spark.sql.execution.streaming.state.RocksDbStateStoreProvider.rocksDbPath(RocksDbStateStoreProvider.scala:615)
org.apache.spark.sql.execution.streaming.state.RocksDbStateStoreProvider$RocksDbStateStore.initTransaction(RocksDbStateStoreProvider.scala:119)
org.apache.spark.sql.execution.streaming.state.RocksDbStateStoreProvider$RocksDbStateStore.getRange(RocksDbStateStoreProvider.scala:149)
org.apache.spark.sql.execution.streaming.state.FlatMapGroupsWithStateExecHelper$StateManagerImplBase.getAllState(FlatMapGroupsWithStateExecHelper.scala:107)
org.apache.spark.sql.execution.streaming.FlatMapGroupsWithStateExec$InputProcessor.processTimedOutState(FlatMapGroupsWithStateExec.scala:188)
org.apache.spark.sql.execution.streaming.FlatMapGroupsWithStateExec.$anonfun$doExecute$3(FlatMapGroupsWithStateExec.scala:134)
scala.collection.Iterator$ConcatIteratorCell.headIterator(Iterator.scala:248)
scala.collection.Iterator$ConcatIterator.advance(Iterator.scala:194)
scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:225)
org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:31)
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.processNext(Unknown Source)
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:35)
org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:832)
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:222)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.$anonfun$run$1(WriteToDataSourceV2Exec.scala:413)
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1473)
org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:452)
org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:360)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
org.apache.spark.scheduler.Task.run(Task.scala:131)
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:750)
at org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:145)
at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:124)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)```