spark-rapids icon indicating copy to clipboard operation
spark-rapids copied to clipboard

[BUG] adaptive query executor and delta optimized table writes don't work on databricks

Open martinstuder opened this issue 4 years ago • 7 comments

Describe the bug When attempting to write a delta table using pyspark on Azure Databricks 7.2 I get the following exception (reduced; full exception attached):

Py4JJavaError: An error occurred while calling o438.save.
: org.apache.spark.SparkException: Job aborted.
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:229)
	at com.databricks.sql.transaction.tahoe.files.TransactionalWriteEdge.$anonfun$writeFiles$5(TransactionalWriteEdge.scala:179)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:116)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:101)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:835)
	at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:198)
	at com.databricks.sql.transaction.tahoe.files.TransactionalWriteEdge.$anonfun$writeFiles$1(TransactionalWriteEdge.scala:133)
	at com.databricks.logging.UsageLogging.$anonfun$recordOperation$4(UsageLogging.scala:429)
	at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:237)
	at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
	at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:232)
	at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:229)
	at com.databricks.spark.util.PublicDBLogging.withAttributionContext(DatabricksSparkUsageLogger.scala:18)
	at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:274)
	at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:267)
	at com.databricks.spark.util.PublicDBLogging.withAttributionTags(DatabricksSparkUsageLogger.scala:18)
	at com.databricks.logging.UsageLogging.recordOperation(UsageLogging.scala:410)
	at com.databricks.logging.UsageLogging.recordOperation$(UsageLogging.scala:336)
	at com.databricks.spark.util.PublicDBLogging.recordOperation(DatabricksSparkUsageLogger.scala:18)
	at com.databricks.spark.util.PublicDBLogging.recordOperation0(DatabricksSparkUsageLogger.scala:55)
	at com.databricks.spark.util.DatabricksSparkUsageLogger.recordOperation(DatabricksSparkUsageLogger.scala:98)
	at com.databricks.spark.util.UsageLogger.recordOperation(UsageLogger.scala:71)
	at com.databricks.spark.util.UsageLogger.recordOperation$(UsageLogger.scala:58)
	at com.databricks.spark.util.DatabricksSparkUsageLogger.recordOperation(DatabricksSparkUsageLogger.scala:67)
	at com.databricks.spark.util.UsageLogging.recordOperation(UsageLogger.scala:346)
	at com.databricks.spark.util.UsageLogging.recordOperation$(UsageLogger.scala:325)
	at com.databricks.sql.transaction.tahoe.OptimisticTransaction.recordOperation(OptimisticTransaction.scala:83)
	at com.databricks.sql.transaction.tahoe.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:108)
	at com.databricks.sql.transaction.tahoe.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:94)
	at com.databricks.sql.transaction.tahoe.OptimisticTransaction.recordDeltaOperation(OptimisticTransaction.scala:83)
	at com.databricks.sql.transaction.tahoe.files.TransactionalWriteEdge.writeFiles(TransactionalWriteEdge.scala:90)
	at com.databricks.sql.transaction.tahoe.files.TransactionalWriteEdge.writeFiles$(TransactionalWriteEdge.scala:86)
	at com.databricks.sql.transaction.tahoe.OptimisticTransaction.writeFiles(OptimisticTransaction.scala:83)
	at com.databricks.sql.transaction.tahoe.files.TransactionalWrite.writeFiles(TransactionalWrite.scala:110)
	at com.databricks.sql.transaction.tahoe.files.TransactionalWrite.writeFiles$(TransactionalWrite.scala:109)
	at com.databricks.sql.transaction.tahoe.OptimisticTransaction.writeFiles(OptimisticTransaction.scala:83)
	at com.databricks.sql.transaction.tahoe.commands.WriteIntoDelta.write(WriteIntoDelta.scala:112)
	at com.databricks.sql.transaction.tahoe.commands.WriteIntoDelta.$anonfun$run$2(WriteIntoDelta.scala:71)
	at com.databricks.sql.transaction.tahoe.commands.WriteIntoDelta.$anonfun$run$2$adapted(WriteIntoDelta.scala:70)
	at com.databricks.sql.transaction.tahoe.DeltaLog.withNewTransaction(DeltaLog.scala:203)
	at com.databricks.sql.transaction.tahoe.commands.WriteIntoDelta.$anonfun$run$1(WriteIntoDelta.scala:70)
	at com.databricks.sql.acl.CheckPermissions$.trusted(CheckPermissions.scala:1067)
	at com.databricks.sql.transaction.tahoe.commands.WriteIntoDelta.run(WriteIntoDelta.scala:69)
	at com.databricks.sql.transaction.tahoe.sources.DeltaDataSource.createRelation(DeltaDataSource.scala:155)
	at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:91)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:195)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$3(SparkPlan.scala:247)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:187)
	at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:152)
	at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:151)
	at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:980)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:116)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:101)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:835)
	at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:198)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:980)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:418)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:396)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:275)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
	at py4j.Gateway.invoke(Gateway.java:295)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:251)
	at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.StackOverflowError
	at java.lang.StringBuffer.getChars(StringBuffer.java:245)
	at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:473)
	at java.lang.StringBuffer.append(StringBuffer.java:310)
	at java.lang.StringBuffer.append(StringBuffer.java:97)
	at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:486)
	at java.lang.StringBuffer.append(StringBuffer.java:338)
	at java.lang.StringBuffer.<init>(StringBuffer.java:158)
	at scala.util.matching.Regex$Replacement.replaced(Regex.scala:895)
	at scala.util.matching.Regex$Replacement.replaced$(Regex.scala:894)
	at scala.util.matching.Regex$MatchIterator$$anon$4.replaced(Regex.scala:878)
	at scala.util.matching.Regex.replaceAllIn(Regex.scala:509)
	at org.apache.spark.internal.config.ConfigReader.substitute(ConfigReader.scala:88)
	at org.apache.spark.internal.config.ConfigReader.substitute(ConfigReader.scala:84)
	at org.apache.spark.internal.config.ConfigReader.$anonfun$get$1(ConfigReader.scala:79)
	at scala.Option.map(Option.scala:230)
	at org.apache.spark.internal.config.ConfigReader.get(ConfigReader.scala:79)
	at org.apache.spark.internal.config.ConfigEntry.readString(ConfigEntry.scala:94)
	at org.apache.spark.internal.config.FallbackConfigEntry.readFrom(ConfigEntry.scala:271)
	at org.apache.spark.sql.internal.SQLConf.getConf(SQLConf.scala:3337)
	at org.apache.spark.sql.internal.SQLConf.cboEnabled(SQLConf.scala:3120)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:29)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:58)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:32)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:38)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:32)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.default(SizeInBytesOnlyStatsSparkPlanVisitor.scala:45)

Steps/Code to reproduce bug

df.write.format('delta').partitionBy(...).save(...)

The issue happens both with CBO and AQE disabled or enabled. Increasing the stack size to 4m on the driver (-Xss4m) does not help. Without the RAPIDS Accelerator For Apache Spark the export succeeds.

Expected behavior Delta export succeeds.

Environment details (please complete the following information)

  • Azure Databricks 7.2
  • pyspark
  • rapids-4-spark v0.2.0 with cudf-0.15-cuda10-2 (CUDA 10.2)
  • Delta export to Azure Datalake Storage (ADLS)

Spark configuration settings:

"spark.driver.extraJavaOptions": "-Xss4m",
"spark.driver.maxResultSize": "12g",
"spark.executor.extraJavaOptions": "-XX:+UseG1GC -Dai.rapids.cudf.prefer-pinned=true",
"spark.default.parallelism": "150",
"spark.sql.shuffle.partitions": "600",
"spark.sql.execution.arrow.pyspark.enabled": "true",
"spark.sql.execution.arrow.pyspark.fallback.enabled": "false",
"spark.sql.broadcastTimeout": "1800s",
"spark.sql.adaptive.enabled": "true",
"spark.sql.cbo.enabled": "true",
"spark.sql.cbo.joinReorder.enabled": "true",
"spark.rdd.compress": "true",
"spark.checkpoint.compress": "true",
"spark.cleaner.referenceTracking.cleanCheckpoints": "false",
"spark.io.compression.codec": "zstd",
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
"spark.scheduler.mode": "FAIR",
"spark.shuffle.sort.bypassMergeThreshold": "100",
"spark.databricks.delta.optimizeWrite.enabled": "true",
"spark.databricks.delta.autoCompact.enabled": "false",
"spark.plugins": "com.nvidia.spark.SQLPlugin",
"spark.sql.parquet.filterPushdown": "false",
"spark.rapids.sql.incompatibleOps.enabled": "true",
"spark.rapids.memory.pinnedPool.size": "2G",
"spark.task.resource.gpu.amount": 0.1,
"spark.rapids.sql.concurrentGpuTasks": 2,
"spark.locality.wait": "0s",
"spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": 2,
"spark.kryo.registrator": "com.nvidia.spark.rapids.GpuKryoRegistrator"

Additional context

full_exception.txt

martinstuder avatar Nov 03 '20 07:11 martinstuder

Hey @martinstuder,

thanks for reporting this. We don't support databricks 7.2 runtime or AQE with the current 0.2 release. Would you be able to try with the 7.0 runtime with no AQE as its not supported there? We are working on support for the databricks 7.3 runtime in the next 0.3 release which will hopefully support AQE.

You probably already found it but our docs for Databricks for reference: https://nvidia.github.io/spark-rapids/docs/get-started/getting-started-databricks.html.

I'll also try to reproduce this locally.

tgravescs avatar Nov 03 '20 14:11 tgravescs

Please also turn off cbo as I don't know that we have tested with that.

tgravescs avatar Nov 03 '20 15:11 tgravescs

Hi @tgravescs,

thanks for your feedback. I tried with DBR 7.0 with AQE and CBO disabled. I can confirm that I run into the same issue. I should probably add though that I'm running Databricks Container Services (DCS) using a "GPU-enabled" base image similar to the ones provided by Databricks (see here). So I'm running the non-ML version of the Databricks runtime with nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04 as the base for our image.

martinstuder avatar Nov 03 '20 15:11 martinstuder

thanks, I don't think the container service should matter here as long as its using the same databricks jar for 7.0 runtime. I'm trying to reproduce locally but so far no luck. It might be related to data (size or schema) or perhaps I'm not doing the write operations before hand. Since its seems to be stuck in the stats estimator might be more related to data itself so I'll try perhaps some larger data. Can you tell me how large of data you are trying to write? What type of operations are you doing, or does it matter - can you simply read and then write the data and it happens? I'll try some different data and a few more things to try to reproduce.

tgravescs avatar Nov 03 '20 16:11 tgravescs

Looks like the issue is related to delta optimized writes (spark.databricks.delta.optimizeWrite.enabled). When I disable optimized writes the stated issue doesn't occur anymore. It's just that the delta write itself gets really slow which is why we enabled optimized writes.

The pipeline is basically as follows:

schema = StructType([
    StructField('id', IntegerType(), False),
    StructField('aaa', ByteType(), False),
    StructField('bbb', IntegerType(), False),
    StructField('ccc', LongType(), False),
    StructField('ddd', DoubleType(), False),
])

df1 = spark.read.format('parquet').schema(schema).load(...)
df2 = spark.read.format('parquet').schema(schema).load(...)

df= df1.withColumn('zzz', F.lit(1).cast('byte')) \
    .unionByName(df2.withColumn('zzz', F.lit(2).cast('byte')))

df.write \
    .format('delta') \
    .partitionBy('id') \
    .save(...)

The resulting df has somewhere between 1e9 - 5e9 rows.

martinstuder avatar Nov 03 '20 19:11 martinstuder

thanks, it's great that you found what was causing this. It looks like their optimized writes is basically an adaptive shuffle, which is really similar to AQE, which unfortunately we don't support right now.
We actually hit this same stack trace trying AQE on Databricks. The issue is that a Databricks specific class that is looking for very specific class names and its not matching on our Gpu version of the same class. We have tried reaching out to them to fix it but haven't had any progress.

I'm assuming that just changing the number of partitions for you won't work if other previous stages requires it to be more to be efficient?

it's likely I couldn't reproduce it as I hadn't setup the optimized configs on the table. Hopefully I will be able to reproduce now and see if there is anything else I can recommend.
If its the actually gpu parquet write that is triggering it, you might try disabling that and having that done on the CPU side. If you wanted to try it you can set: spark.conf.set("spark.rapids.sql.format.parquet.write.enabled", "false")

This should cause any parquet write to not be gpu accelerated, thus bypassing the issue for now. If you are using another format like orc, there is similar flags to disable.

tgravescs avatar Nov 03 '20 22:11 tgravescs

Hi Team,

I can reproduce the stackoverflow issue with ONLY AQE=on (Nothing to do with delta write optimize).

Here is my minimum reproduce based on databricks 8.2ML GPU + 21.10 snapshot jars.

spark.conf.set("spark.sql.adaptive.enabled",true)
spark.conf.set("spark.databricks.delta.optimizeWrite.enabled", false)

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

val data = Seq(
    Row(Row("Adam ","","Green"),"1","M",1000),
    Row(Row("Bob ","Middle","Green"),"2","M",2000),
    Row(Row("Cathy ","","Green"),"3","F",3000)
)

val schema = (new StructType()
  .add("name",new StructType()
    .add("firstname",StringType)
    .add("middlename",StringType)
    .add("lastname",StringType)) 
  .add("id",StringType)
  .add("gender",StringType)
  .add("salary",IntegerType))

val df = spark.createDataFrame(spark.sparkContext.parallelize(data),schema)
df.write.format("parquet").mode("overwrite").save("/tmp/testparquet")

val df2 = spark.read.parquet("/tmp/testparquet")
val df3 = spark.read.parquet("/tmp/testparquet")
df2.createOrReplaceTempView("df2")
df3.createOrReplaceTempView("df3")

spark.sql("select count(*) from df2, df3 where df2.name=df3.name").show()

Then it will fail with stackoverflow:

tor.scala:33)
	at com.databricks.sql.optimizer.statsEstimation.SizeInBytesOnlyStatsSparkPlanVisitor$.visit(SizeInBytesOnlyStatsSparkPlanVisitor.scala:41)
	at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.fallback(BasicStatsSparkPlanVisitor.scala:43)
	at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.default(BasicStatsSparkPlanVisitor.scala:45)
	at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.default(BasicStatsSparkPlanVisitor.scala:35)
	at com.databricks.sql.execution.SparkPlanVisitor.visit(SparkPlanVisitor.scala:79)
	at com.databricks.sql.execution.SparkPlanVisitor.visit$(SparkPlanVisitor.scala:33)
	at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.super$visit(BasicStatsSparkPlanVisitor.scala:39)
	at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.$anonfun$visit$1(BasicStatsSparkPlanVisitor.scala:39)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.sql.optimizer.statsEstimation.BasicStatsSparkPlanVisitor$.visit(BasicStatsSparkPlanVisitor.scala:39)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats(SparkPlanStats.scala:30)
	at com.databricks.sql.optimizer.SparkPlanStats.computeStats$(SparkPlanStats.scala:28)
	at org.apache.spark.sql.execution.SparkPlan.computeStats(SparkPlan.scala:70)
	at 
...

viadea avatar Sep 16 '21 16:09 viadea