spark-rapids
spark-rapids copied to clipboard
[BUG] adaptive query executor and delta optimized table writes don't work on databricks
Describe the bug When attempting to write a delta table using pyspark on Azure Databricks 7.2 I get the following exception (reduced; full exception attached):
Py4JJavaError: An error occurred while calling o438.save.
: org.apache.spark.SparkException: Job aborted.
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:229)
at com.databricks.sql.transaction.tahoe.files.TransactionalWriteEdge.$anonfun$writeFiles$5(TransactionalWriteEdge.scala:179)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:116)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:101)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:835)
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:198)
at com.databricks.sql.transaction.tahoe.files.TransactionalWriteEdge.$anonfun$writeFiles$1(TransactionalWriteEdge.scala:133)
at com.databricks.logging.UsageLogging.$anonfun$recordOperation$4(UsageLogging.scala:429)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:237)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:232)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:229)
at com.databricks.spark.util.PublicDBLogging.withAttributionContext(DatabricksSparkUsageLogger.scala:18)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:274)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:267)
at com.databricks.spark.util.PublicDBLogging.withAttributionTags(DatabricksSparkUsageLogger.scala:18)
at com.databricks.logging.UsageLogging.recordOperation(UsageLogging.scala:410)
at com.databricks.logging.UsageLogging.recordOperation$(UsageLogging.scala:336)
at com.databricks.spark.util.PublicDBLogging.recordOperation(DatabricksSparkUsageLogger.scala:18)
at com.databricks.spark.util.PublicDBLogging.recordOperation0(DatabricksSparkUsageLogger.scala:55)
at com.databricks.spark.util.DatabricksSparkUsageLogger.recordOperation(DatabricksSparkUsageLogger.scala:98)
at com.databricks.spark.util.UsageLogger.recordOperation(UsageLogger.scala:71)
at com.databricks.spark.util.UsageLogger.recordOperation$(UsageLogger.scala:58)
at com.databricks.spark.util.DatabricksSparkUsageLogger.recordOperation(DatabricksSparkUsageLogger.scala:67)
at com.databricks.spark.util.UsageLogging.recordOperation(UsageLogger.scala:346)
at com.databricks.spark.util.UsageLogging.recordOperation$(UsageLogger.scala:325)
at com.databricks.sql.transaction.tahoe.OptimisticTransaction.recordOperation(OptimisticTransaction.scala:83)
at com.databricks.sql.transaction.tahoe.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:108)
at com.databricks.sql.transaction.tahoe.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:94)
at com.databricks.sql.transaction.tahoe.OptimisticTransaction.recordDeltaOperation(OptimisticTransaction.scala:83)
at com.databricks.sql.transaction.tahoe.files.TransactionalWriteEdge.writeFiles(TransactionalWriteEdge.scala:90)
at com.databricks.sql.transaction.tahoe.files.TransactionalWriteEdge.writeFiles$(TransactionalWriteEdge.scala:86)
at com.databricks.sql.transaction.tahoe.OptimisticTransaction.writeFiles(OptimisticTransaction.scala:83)
at com.databricks.sql.transaction.tahoe.files.TransactionalWrite.writeFiles(TransactionalWrite.scala:110)
at com.databricks.sql.transaction.tahoe.files.TransactionalWrite.writeFiles$(TransactionalWrite.scala:109)
at com.databricks.sql.transaction.tahoe.OptimisticTransaction.writeFiles(OptimisticTransaction.scala:83)
at com.databricks.sql.transaction.tahoe.commands.WriteIntoDelta.write(WriteIntoDelta.scala:112)
at com.databricks.sql.transaction.tahoe.commands.WriteIntoDelta.$anonfun$run$2(WriteIntoDelta.scala:71)
at com.databricks.sql.transaction.tahoe.commands.WriteIntoDelta.$anonfun$run$2$adapted(WriteIntoDelta.scala:70)
at com.databricks.sql.transaction.tahoe.DeltaLog.withNewTransaction(DeltaLog.scala:203)
at com.databricks.sql.transaction.tahoe.commands.WriteIntoDelta.$anonfun$run$1(WriteIntoDelta.scala:70)
at com.databricks.sql.acl.CheckPermissions$.trusted(CheckPermissions.scala:1067)
at com.databricks.sql.transaction.tahoe.commands.WriteIntoDelta.run(WriteIntoDelta.scala:69)
at com.databricks.sql.transaction.tahoe.sources.DeltaDataSource.createRelation(DeltaDataSource.scala:155)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:91)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:195)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$3(SparkPlan.scala:247)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:187)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:152)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:151)
at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:980)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:116)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:101)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:835)
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:198)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:980)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:418)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:396)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:275)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
at py4j.Gateway.invoke(Gateway.java:295)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:251)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.StackOverflowError
at java.lang.StringBuffer.getChars(StringBuffer.java:245)
at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:473)
at java.lang.StringBuffer.append(StringBuffer.java:310)
at java.lang.StringBuffer.append(StringBuffer.java:97)
at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:486)
at java.lang.StringBuffer.append(StringBuffer.java:338)
at java.lang.StringBuffer.<init>(StringBuffer.java:158)
at scala.util.matching.Regex$Replacement.replaced(Regex.scala:895)
at scala.util.matching.Regex$Replacement.replaced$(Regex.scala:894)
at scala.util.matching.Regex$MatchIterator$$anon$4.replaced(Regex.scala:878)
at scala.util.matching.Regex.replaceAllIn(Regex.scala:509)
at org.apache.spark.internal.config.ConfigReader.substitute(ConfigReader.scala:88)
at org.apache.spark.internal.config.ConfigReader.substitute(ConfigReader.scala:84)
at org.apache.spark.internal.config.ConfigReader.$anonfun$get$1(ConfigReader.scala:79)
at scala.Option.map(Option.scala:230)
at org.apache.spark.internal.config.ConfigReader.get(ConfigReader.scala:79)
at org.apache.spark.internal.config.ConfigEntry.readString(ConfigEntry.scala:94)
at org.apache.spark.internal.config.FallbackConfigEntry.readFrom(ConfigEntry.scala:271)
at org.apache.spark.sql.internal.SQLConf.getConf(SQLConf.scala:3337)
at org.apache.spark.sql.internal.SQLConf.cboEnabled(SQLConf.scala:3120)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:29)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
Steps/Code to reproduce bug
df.write.format('delta').partitionBy(...).save(...)
The issue happens both with CBO and AQE disabled or enabled. Increasing the stack size to 4m on the driver (-Xss4m) does not help. Without the RAPIDS Accelerator For Apache Spark the export succeeds.
Expected behavior Delta export succeeds.
Environment details (please complete the following information)
- Azure Databricks 7.2
- pyspark
- rapids-4-spark v0.2.0 with cudf-0.15-cuda10-2 (CUDA 10.2)
- Delta export to Azure Datalake Storage (ADLS)
Spark configuration settings:
"spark.driver.extraJavaOptions": "-Xss4m",
"spark.driver.maxResultSize": "12g",
"spark.executor.extraJavaOptions": "-XX:+UseG1GC -Dai.rapids.cudf.prefer-pinned=true",
"spark.default.parallelism": "150",
"spark.sql.shuffle.partitions": "600",
"spark.sql.execution.arrow.pyspark.enabled": "true",
"spark.sql.execution.arrow.pyspark.fallback.enabled": "false",
"spark.sql.broadcastTimeout": "1800s",
"spark.sql.adaptive.enabled": "true",
"spark.sql.cbo.enabled": "true",
"spark.sql.cbo.joinReorder.enabled": "true",
"spark.rdd.compress": "true",
"spark.checkpoint.compress": "true",
"spark.cleaner.referenceTracking.cleanCheckpoints": "false",
"spark.io.compression.codec": "zstd",
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
"spark.scheduler.mode": "FAIR",
"spark.shuffle.sort.bypassMergeThreshold": "100",
"spark.databricks.delta.optimizeWrite.enabled": "true",
"spark.databricks.delta.autoCompact.enabled": "false",
"spark.plugins": "com.nvidia.spark.SQLPlugin",
"spark.sql.parquet.filterPushdown": "false",
"spark.rapids.sql.incompatibleOps.enabled": "true",
"spark.rapids.memory.pinnedPool.size": "2G",
"spark.task.resource.gpu.amount": 0.1,
"spark.rapids.sql.concurrentGpuTasks": 2,
"spark.locality.wait": "0s",
"spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": 2,
"spark.kryo.registrator": "com.nvidia.spark.rapids.GpuKryoRegistrator"
Additional context
Hey @martinstuder,
thanks for reporting this. We don't support databricks 7.2 runtime or AQE with the current 0.2 release. Would you be able to try with the 7.0 runtime with no AQE as its not supported there? We are working on support for the databricks 7.3 runtime in the next 0.3 release which will hopefully support AQE.
You probably already found it but our docs for Databricks for reference: https://nvidia.github.io/spark-rapids/docs/get-started/getting-started-databricks.html.
I'll also try to reproduce this locally.
Please also turn off cbo as I don't know that we have tested with that.
Hi @tgravescs,
thanks for your feedback. I tried with DBR 7.0 with AQE and CBO disabled. I can confirm that I run into the same issue. I should probably add though that I'm running Databricks Container Services (DCS) using a "GPU-enabled" base image similar to the ones provided by Databricks (see here). So I'm running the non-ML version of the Databricks runtime with nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04
as the base for our image.
thanks, I don't think the container service should matter here as long as its using the same databricks jar for 7.0 runtime. I'm trying to reproduce locally but so far no luck. It might be related to data (size or schema) or perhaps I'm not doing the write operations before hand. Since its seems to be stuck in the stats estimator might be more related to data itself so I'll try perhaps some larger data. Can you tell me how large of data you are trying to write? What type of operations are you doing, or does it matter - can you simply read and then write the data and it happens? I'll try some different data and a few more things to try to reproduce.
Looks like the issue is related to delta optimized writes (spark.databricks.delta.optimizeWrite.enabled
). When I disable optimized writes the stated issue doesn't occur anymore. It's just that the delta write itself gets really slow which is why we enabled optimized writes.
The pipeline is basically as follows:
schema = StructType([
StructField('id', IntegerType(), False),
StructField('aaa', ByteType(), False),
StructField('bbb', IntegerType(), False),
StructField('ccc', LongType(), False),
StructField('ddd', DoubleType(), False),
])
df1 = spark.read.format('parquet').schema(schema).load(...)
df2 = spark.read.format('parquet').schema(schema).load(...)
df= df1.withColumn('zzz', F.lit(1).cast('byte')) \
.unionByName(df2.withColumn('zzz', F.lit(2).cast('byte')))
df.write \
.format('delta') \
.partitionBy('id') \
.save(...)
The resulting df
has somewhere between 1e9 - 5e9 rows.
thanks, it's great that you found what was causing this. It looks like their optimized writes is basically an adaptive shuffle, which is really similar to AQE, which unfortunately we don't support right now.
We actually hit this same stack trace trying AQE on Databricks. The issue is that a Databricks specific class that is looking for very specific class names and its not matching on our Gpu version of the same class. We have tried reaching out to them to fix it but haven't had any progress.
I'm assuming that just changing the number of partitions for you won't work if other previous stages requires it to be more to be efficient?
it's likely I couldn't reproduce it as I hadn't setup the optimized configs on the table. Hopefully I will be able to reproduce now and see if there is anything else I can recommend.
If its the actually gpu parquet write that is triggering it, you might try disabling that and having that done on the CPU side.
If you wanted to try it you can set:
spark.conf.set("spark.rapids.sql.format.parquet.write.enabled", "false")
This should cause any parquet write to not be gpu accelerated, thus bypassing the issue for now. If you are using another format like orc, there is similar flags to disable.
Hi Team,
I can reproduce the stackoverflow issue with ONLY AQE=on (Nothing to do with delta write optimize).
Here is my minimum reproduce based on databricks 8.2ML GPU + 21.10 snapshot jars.
spark.conf.set("spark.sql.adaptive.enabled",true)
spark.conf.set("spark.databricks.delta.optimizeWrite.enabled", false)
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
val data = Seq(
Row(Row("Adam ","","Green"),"1","M",1000),
Row(Row("Bob ","Middle","Green"),"2","M",2000),
Row(Row("Cathy ","","Green"),"3","F",3000)
)
val schema = (new StructType()
.add("name",new StructType()
.add("firstname",StringType)
.add("middlename",StringType)
.add("lastname",StringType))
.add("id",StringType)
.add("gender",StringType)
.add("salary",IntegerType))
val df = spark.createDataFrame(spark.sparkContext.parallelize(data),schema)
df.write.format("parquet").mode("overwrite").save("/tmp/testparquet")
val df2 = spark.read.parquet("/tmp/testparquet")
val df3 = spark.read.parquet("/tmp/testparquet")
df2.createOrReplaceTempView("df2")
df3.createOrReplaceTempView("df3")
spark.sql("select count(*) from df2, df3 where df2.name=df3.name").show()
Then it will fail with stackoverflow:
tor.scala:33)
at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:41)
at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.fallback(BasicStatsSparkPlanVisitor.scala:43)
at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.default(BasicStatsSparkPlanVisitor.scala:45)
at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.default(BasicStatsSparkPlanVisitor.scala:35)
at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:79)
at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:33)
at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.super$visit(BasicStatsSparkPlanVisitor.scala:39)
at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.$anonfun$visit$1(BasicStatsSparkPlanVisitor.scala:39)
at scala.Option.getOrElse(Option.scala:189)
at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.visit(BasicStatsSparkPlanVisitor.scala:39)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:30)
at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
at
...