byzer-lang Saving 10001 rows of data to MySQL 8 throws SQLException

Saving 10001 rows of data to MySQL 8 throws SQLException

Open chncaesar opened this issue 3 years ago • 1 comments

Description

When saving over 10001 rows into MySQL, 10K rows were saved, but an exception is thrown. java.sql.SQLException: Can't call commit when autocommit=true

Environment

Ubuntu 20.04
MySQL 8.0.26
techmlsql/mlsql-sandbox:3.1.1-2.1.0-SNAPSHOT
Hive 2.3.3

Steps to re-produce

Preparing table and data. There're 10001 rows in hive table t_tmp_01

## Load csv to hive 
use test;
create table test.t_tmp_01(
 id int ,
 name STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
load data local inpath '/tmp/t_tmp_01.csv' into table test.t_tmp_01;

## Create table in MySQL
use test;
create table t_tmp_01(
 id int ,
 name varchar(16)
);

Load Hive data and save it to MySQL

connect jdbc where 
 url = "jdbc:hive2://172.17.0.7:10000/test" 
 and driver = "org.apache.hive.jdbc.HiveDriver" 
 AS hive_test;
 
load jdbc.`hive_test.test.t_tmp_01` as table_1;

connect jdbc where url="jdbc:mysql://172.17.0.2:3306/test?characterEncoding=utf8" 
and user="root" and password="mlsql" 
and isolationLevel="NONE" 
and driver="com.mysql.jdbc.Driver"
as target;

save append table_1 as jdbc.`target.t_tmp_01` where idCol='id';

Exception is thrown. However, 10000 rows were inserted. MySQL row count mysql> select count(1) from t_tmp_01; +----------+ | count(1) | +----------+ | 1000 | +----------+

Exception:

Job aborted due to stage failure: Task 0 in stage 310.0 failed 1 times, most recent failure: Lost task 0.0 in stage 310.0 (TID 570) (d0ff36c5497e executor driver): java.sql.SQLException: Can't call commit when autocommit=true
	at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:869)
	at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:865)
	at com.mysql.jdbc.ConnectionImpl.commit(ConnectionImpl.java:1539)
	at org.apache.spark.sql.jdbc.UpsertUtils$.upsertPartition(UpsertUtils.scala:144)
	at org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2(UpsertUtils.scala:47)
	at org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2$adapted(UpsertUtils.scala:45)
	at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1020)
	at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1020)
	at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2242)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 310.0 failed 1 times, most recent failure: Lost task 0.0 in stage 310.0 (TID 570) (d0ff36c5497e executor driver): java.sql.SQLException: Can't call commit when autocommit=true
	at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:869)
	at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:865)
	at com.mysql.jdbc.ConnectionImpl.commit(ConnectionImpl.java:1539)
	at org.apache.spark.sql.jdbc.UpsertUtils$.upsertPartition(UpsertUtils.scala:144)
	at org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2(UpsertUtils.scala:47)
	at org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2$adapted(UpsertUtils.scala:45)
	at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1020)
	at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1020)
	at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2242)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2253)
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2202)
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2201)
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2201)
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1078)
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1078)
scala.Option.foreach(Option.scala:407)
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1078)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2440)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2382)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2371)
org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2202)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2223)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2242)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2267)
org.apache.spark.rdd.RDD.$anonfun$foreachPartition$1(RDD.scala:1020)
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:1018)
org.apache.spark.sql.jdbc.UpsertUtils$.upsert(UpsertUtils.scala:45)
org.apache.spark.sql.jdbc.DataFrameWriterExtensions$Upsert.upsert(DataFrameWriterExtensions.scala:80)
streaming.core.datasource.impl.MLSQLJDBC.$anonfun$save$4(MLSQLJDBC.scala:214)
streaming.core.datasource.impl.MLSQLJDBC.$anonfun$save$4$adapted(MLSQLJDBC.scala:208)
scala.Option.map(Option.scala:230)
streaming.core.datasource.impl.MLSQLJDBC.save(MLSQLJDBC.scala:208)
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
java.lang.reflect.Method.invoke(Method.java:498)
tech.mlsql.dsl.adaptor.SaveAdaptor.$anonfun$parse$2(SaveAdaptor.scala:123)
scala.Option.map(Option.scala:230)
tech.mlsql.dsl.adaptor.SaveAdaptor.parse(SaveAdaptor.scala:114)
streaming.dsl.ScriptSQLExecListener.execute$1(ScriptSQLExec.scala:368)
streaming.dsl.ScriptSQLExecListener.exitSql(ScriptSQLExec.scala:386)
streaming.dsl.parser.DSLSQLParser$SqlContext.exitRule(DSLSQLParser.java:296)
org.antlr.v4.runtime.tree.ParseTreeWalker.exitRule(ParseTreeWalker.java:47)
org.antlr.v4.runtime.tree.ParseTreeWalker.walk(ParseTreeWalker.java:30)
org.antlr.v4.runtime.tree.ParseTreeWalker.walk(ParseTreeWalker.java:28)
streaming.dsl.ScriptSQLExec$._parse(ScriptSQLExec.scala:159)
streaming.dsl.ScriptSQLExec$.parse(ScriptSQLExec.scala:146)
streaming.rest.RestController.$anonfun$script$1(RestController.scala:136)
tech.mlsql.job.JobManager$.run(JobManager.scala:74)
tech.mlsql.job.JobManager$$anon$1.run(JobManager.scala:91)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:748)
caused by：
java.sql.SQLException: Can't call commit when autocommit=true
com.mysql.jdbc.SQLError.createSQLException(SQLError.java:869)
com.mysql.jdbc.SQLError.createSQLException(SQLError.java:865)
com.mysql.jdbc.ConnectionImpl.commit(ConnectionImpl.java:1539)
org.apache.spark.sql.jdbc.UpsertUtils$.upsertPartition(UpsertUtils.scala:144)
org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2(UpsertUtils.scala:47)
org.apache.spark.sql.jdbc.UpsertUtils$.$anonfun$upsert$2$adapted(UpsertUtils.scala:45)
org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1020)
org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1020)
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2242)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
org.apache.spark.scheduler.Task.run(Task.scala:131)
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:748)

Sep 03 '21 08:09 chncaesar

you can try it~

save append xx as jdbc.`db.table`
 options truncate = "true"
 and relaxAutoCommit= "true" 
 and autoCommit="true"
 and idCol = "id";

Sep 10 '21 03:09 dingyufei615

byzer-lang byzer-lang copied to clipboard

Saving 10001 rows of data to MySQL 8 throws SQLException

Description

Environment

Steps to re-produce

byzer-lang
byzer-lang copied to clipboard