byzer-lang
byzer-lang copied to clipboard
Saving 10001 rows of data to MySQL 8 throws SQLException
Description
When saving over 10001 rows into MySQL, 10K rows were saved, but an exception is thrown. java.sql.SQLException: Can't call commit when autocommit=true
Environment
- Ubuntu 20.04
- MySQL 8.0.26
- techmlsql/mlsql-sandbox:3.1.1-2.1.0-SNAPSHOT
- Hive 2.3.3
Steps to re-produce
- Preparing table and data. There're 10001 rows in hive table t_tmp_01
## Load csv to hive
use test;
create table test.t_tmp_01(
id int ,
name STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
load data local inpath '/tmp/t_tmp_01.csv' into table test.t_tmp_01;
## Create table in MySQL
use test;
create table t_tmp_01(
id int ,
name varchar(16)
);
- Load Hive data and save it to MySQL
connect jdbc where
url = "jdbc:hive2://172.17.0.7:10000/test"
and driver = "org.apache.hive.jdbc.HiveDriver"
AS hive_test;
load jdbc.`hive_test.test.t_tmp_01` as table_1;
connect jdbc where url="jdbc:mysql://172.17.0.2:3306/test?characterEncoding=utf8"
and user="root" and password="mlsql"
and isolationLevel="NONE"
and driver="com.mysql.jdbc.Driver"
as target;
save append table_1 as jdbc.`target.t_tmp_01` where idCol='id';
- Exception is thrown. However, 10000 rows were inserted. MySQL row count mysql> select count(1) from t_tmp_01; +----------+ | count(1) | +----------+ | 1000 | +----------+
Exception:
Job aborted due to stage failure: Task 0 in stage 310.0 failed 1 times, most recent failure: Lost task 0.0 in stage 310.0 (TID 570) (d0ff36c5497e executor driver): java.sql.SQLException: Can't call commit when autocommit=true
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:869)
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:865)
at com.mysql.jdbc.ConnectionImpl.commit(ConnectionImpl.java:1539)
at org.apache.spark.sql.jdbc.UpsertUtils$.upsertPartition(UpsertUtils.scala:144)
at org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2(UpsertUtils.scala:47)
at org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2$adapted(UpsertUtils.scala:45)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1020)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1020)
at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2242)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 310.0 failed 1 times, most recent failure: Lost task 0.0 in stage 310.0 (TID 570) (d0ff36c5497e executor driver): java.sql.SQLException: Can't call commit when autocommit=true
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:869)
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:865)
at com.mysql.jdbc.ConnectionImpl.commit(ConnectionImpl.java:1539)
at org.apache.spark.sql.jdbc.UpsertUtils$.upsertPartition(UpsertUtils.scala:144)
at org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2(UpsertUtils.scala:47)
at org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2$adapted(UpsertUtils.scala:45)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1020)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1020)
at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2242)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2253)
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2202)
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2201)
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2201)
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1078)
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1078)
scala.Option.foreach(Option.scala:407)
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1078)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2440)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2382)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2371)
org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2202)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2223)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2242)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2267)
org.apache.spark.rdd.RDD.$anonfun$foreachPartition$1(RDD.scala:1020)
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:1018)
org.apache.spark.sql.jdbc.UpsertUtils$.upsert(UpsertUtils.scala:45)
org.apache.spark.sql.jdbc.DataFrameWriterExtensions$Upsert.upsert(DataFrameWriterExtensions.scala:80)
streaming.core.datasource.impl.MLSQLJDBC.$anonfun$save$4(MLSQLJDBC.scala:214)
streaming.core.datasource.impl.MLSQLJDBC.$anonfun$save$4$adapted(MLSQLJDBC.scala:208)
scala.Option.map(Option.scala:230)
streaming.core.datasource.impl.MLSQLJDBC.save(MLSQLJDBC.scala:208)
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
java.lang.reflect.Method.invoke(Method.java:498)
tech.mlsql.dsl.adaptor.SaveAdaptor.$anonfun$parse$2(SaveAdaptor.scala:123)
scala.Option.map(Option.scala:230)
tech.mlsql.dsl.adaptor.SaveAdaptor.parse(SaveAdaptor.scala:114)
streaming.dsl.ScriptSQLExecListener.execute$1(ScriptSQLExec.scala:368)
streaming.dsl.ScriptSQLExecListener.exitSql(ScriptSQLExec.scala:386)
streaming.dsl.parser.DSLSQLParser$SqlContext.exitRule(DSLSQLParser.java:296)
org.antlr.v4.runtime.tree.ParseTreeWalker.exitRule(ParseTreeWalker.java:47)
org.antlr.v4.runtime.tree.ParseTreeWalker.walk(ParseTreeWalker.java:30)
org.antlr.v4.runtime.tree.ParseTreeWalker.walk(ParseTreeWalker.java:28)
streaming.dsl.ScriptSQLExec$._parse(ScriptSQLExec.scala:159)
streaming.dsl.ScriptSQLExec$.parse(ScriptSQLExec.scala:146)
streaming.rest.RestController.$anonfun$script$1(RestController.scala:136)
tech.mlsql.job.JobManager$.run(JobManager.scala:74)
tech.mlsql.job.JobManager$$anon$1.run(JobManager.scala:91)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:748)
caused by:
java.sql.SQLException: Can't call commit when autocommit=true
com.mysql.jdbc.SQLError.createSQLException(SQLError.java:869)
com.mysql.jdbc.SQLError.createSQLException(SQLError.java:865)
com.mysql.jdbc.ConnectionImpl.commit(ConnectionImpl.java:1539)
org.apache.spark.sql.jdbc.UpsertUtils$.upsertPartition(UpsertUtils.scala:144)
org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2(UpsertUtils.scala:47)
org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2$adapted(UpsertUtils.scala:45)
org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1020)
org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1020)
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2242)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
org.apache.spark.scheduler.Task.run(Task.scala:131)
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:748)
you can try it~
save append xx as jdbc.`db.table`
options truncate = "true"
and relaxAutoCommit= "true"
and autoCommit="true"
and idCol = "id";