[QST] Iceberg issue with rapids
I am using Spark 3.5.0 rapids-4-spark_2.12-23.12.2.jar org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.0 (but i tried 1.2 - 1.6)
When I try to read Iceberg table, I get the following issue below. I am unable to upgrade to higher then rapids-4-spark_2.12-23.12.2.jar as my GPU is not supported with 24. Also I can't disable Iceberg acceleration for some reason:
spark.rapids.sql.iceberg.metadataOnly.enabled false spark.sql.iceberg.vectorization.enabled true spark.rapids.sql.exec.DataWritingCommandExec.enabled false spark.rapids.sql.exec.BatchScanExec.enabled false spark.rapids.sql.format.iceberg.enabled false spark.rapids.sql.format.iceberg.read.enabled false
Could you advise me, please?
java.lang.ClassCastException: class org.apache.iceberg.BaseFileScanTask cannot be cast to class org.apache.iceberg.CombinedScanTask (org.apache.iceberg.BaseFileScanTask and org.apache.iceberg.CombinedScanTask are in unnamed module of loader org.apache.spark.util.MutableURLClassLoader @657b3b)
at com.nvidia.spark.rapids.iceberg.spark.source.GpuSparkBatchQueryScan.isMetadataScan(GpuSparkBatchQueryScan.java:92)
at com.nvidia.spark.rapids.iceberg.IcebergProviderImpl$$anon$1.tagSelfForGpu(IcebergProviderImpl.scala:51)
at com.nvidia.spark.rapids.RapidsMeta.tagForGpu(RapidsMeta.scala:315)
at com.nvidia.spark.rapids.RapidsMeta.$anonfun$tagForGpu$1(RapidsMeta.scala:289)
at com.nvidia.spark.rapids.RapidsMeta.$anonfun$tagForGpu$1$adapted(RapidsMeta.scala:289)
at scala.collection.immutable.List.foreach(List.scala:431)
at com.nvidia.spark.rapids.RapidsMeta.tagForGpu(RapidsMeta.scala:289)
at com.nvidia.spark.rapids.RapidsMeta.$anonfun$tagForGpu$6(RapidsMeta.scala:294)
at com.nvidia.spark.rapids.RapidsMeta.$anonfun$tagForGpu$6$adapted(RapidsMeta.scala:294)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at com.nvidia.spark.rapids.RapidsMeta.tagForGpu(RapidsMeta.scala:294)
at com.nvidia.spark.rapids.RapidsMeta.$anonfun$tagForGpu$6(RapidsMeta.scala:294)
at com.nvidia.spark.rapids.RapidsMeta.$anonfun$tagForGpu$6$adapted(RapidsMeta.scala:294)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at com.nvidia.spark.rapids.RapidsMeta.tagForGpu(RapidsMeta.scala:294)
at com.nvidia.spark.rapids.GpuOverrides$.wrapAndTagPlan(GpuOverrides.scala:4314)
at com.nvidia.spark.rapids.GpuOverrides.applyOverrides(GpuOverrides.scala:4640)
at com.nvidia.spark.rapids.GpuOverrides.$anonfun$applyWithContext$3(GpuOverrides.scala:4525)
at com.nvidia.spark.rapids.GpuOverrides$.logDuration(GpuOverrides.scala:452)
at com.nvidia.spark.rapids.GpuOverrides.$anonfun$applyWithContext$1(GpuOverrides.scala:4522)
at com.nvidia.spark.rapids.GpuOverrideUtil$.$anonfun$tryOverride$1(GpuOverrides.scala:4488)
at com.nvidia.spark.rapids.GpuOverrides.applyWithContext(GpuOverrides.scala:4542)
at com.nvidia.spark.rapids.GpuOverrides.apply(GpuOverrides.scala:4515)
at com.nvidia.spark.rapids.GpuOverrides.apply(GpuOverrides.scala:4511)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.$anonfun$apply$1(Columnar.scala:530)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.$anonfun$apply$1$adapted(Columnar.scala:530)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.apply(Columnar.scala:530)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.apply(Columnar.scala:482)
at org.apache.spark.sql.execution.QueryExecution$.$anonfun$prepareForExecution$1(QueryExecution.scala:477)
at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
at scala.collection.immutable.List.foldLeft(List.scala:91)
at org.apache.spark.sql.execution.QueryExecution$.prepareForExecution(QueryExecution.scala:476)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:186)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:186)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:179)
at org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:238)
at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:284)
at org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:252)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:117)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4332)
at org.apache.spark.sql.Dataset.head(Dataset.scala:3326)
at org.apache.spark.sql.Dataset.take(Dataset.scala:3549)
at org.apache.spark.sql.Dataset.getRows(Dataset.scala:280)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:315)
at org.apache.spark.sql.Dataset.show(Dataset.scala:839)
at org.apache.spark.sql.Dataset.show(Dataset.scala:798)
at org.apache.spark.sql.Dataset.show(Dataset.scala:807)
... 50 elided
Our latest spark rapids release (25.10) support following iceberg version: spark 3.5.0-3.5.6 iceberg 1.6.1
@arturzangiev let us know if using the newer release with a newer GPU addresses the issue. Closing for now, please reopen if you have further questions.