datafusion-comet
datafusion-comet copied to clipboard
Null pointer when spark.comet.parquet.enable.directBuffer is enabled
Describe the bug
I was experimenting with enabling spark.comet.parquet.enable.directBuffer
and this happened:
Caused by: org.apache.spark.SparkException: Encountered error while reading file file:///mnt/bigdata/tpcds/sf100/inventory.parquet/part1.parquet. Details:
at org.apache.spark.sql.errors.QueryExecutionErrors$.cannotReadFilesError(QueryExecutionErrors.scala:877)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:307)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:125)
at org.apache.spark.sql.comet.CometScanExec$$anon$1.hasNext(CometScanExec.scala:225)
at org.apache.comet.CometBatchIterator.next(CometBatchIterator.java:48)
at org.apache.comet.Native.executePlan(Native Method)
at org.apache.comet.CometExecIterator.getNextBatch(CometExecIterator.scala:105)
at org.apache.comet.CometExecIterator.hasNext(CometExecIterator.scala:128)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.comet.CometBatchIterator.next(CometBatchIterator.java:48)
at org.apache.comet.Native.executePlan(Native Method)
at org.apache.comet.CometExecIterator.getNextBatch(CometExecIterator.scala:105)
at org.apache.comet.CometExecIterator.hasNext(CometExecIterator.scala:128)
at org.apache.spark.sql.comet.execution.shuffle.CometShuffleWriteProcessor.write(CometShuffleExchangeExec.scala:496)
at org.apache.spark.sql.comet.shims.ShimCometShuffleWriteProcessor.write(ShimCometShuffleWriteProcessor.scala:35)
at org.apache.spark.sql.comet.shims.ShimCometShuffleWriteProcessor.write$(ShimCometShuffleWriteProcessor.scala:28)
at org.apache.spark.sql.comet.execution.shuffle.CometShuffleWriteProcessor.write(CometShuffleExchangeExec.scala:452)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:101)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
at java.base/java.lang.Thread.run(Thread.java:840)
Caused by: org.apache.comet.CometNativeException: Null pointer in get_direct_buffer_address return value
at org.apache.comet.parquet.Native.setPageBufferV1(Native Method)
at org.apache.comet.parquet.ColumnReader$1.visit(ColumnReader.java:279)
at org.apache.comet.parquet.ColumnReader$1.visit(ColumnReader.java:261)
at org.apache.parquet.column.page.DataPageV1.accept(DataPageV1.java:120)
at org.apache.comet.parquet.ColumnReader.readPage(ColumnReader.java:260)
at org.apache.comet.parquet.ColumnReader.readBatch(ColumnReader.java:128)
at org.apache.comet.parquet.BatchReader.nextBatch(BatchReader.java:495)
at org.apache.comet.parquet.BatchReader.nextBatch(BatchReader.java:487)
at org.apache.comet.parquet.BatchReader.nextKeyValue(BatchReader.java:399)
at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:125)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:297)
Steps to reproduce
I was running TPC-DS query 72.
Expected behavior
No response
Additional context
No response