dw-jdbc
dw-jdbc copied to clipboard
Getting error in spark when using JDBC driver to read
Tried to read a table from DataWorld to a dataframe in Spark and then tried printSchema
on the dataframe resulted in the following error:
ERROR 2019-08-20T07:14:15.083 Exception in task 0.0 in stage 0.0 (TID 0)
java.sql.SQLException: Unable to marshal xsd:string to java.math.BigDecimal
at world.data.jdbc.internal.types.NodeValues.parseLiteral(NodeValues.java:488)
at world.data.jdbc.internal.types.NodeValues.parseNumber(NodeValues.java:208)
at world.data.jdbc.internal.types.NodeValues.parseBigDecimal(NodeValues.java:163)
at world.data.jdbc.internal.results.ResultSetImpl.getBigDecimal(ResultSetImpl.java:246)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$org$apache$spark$sql$execution$datasources$jdbc$JdbcUtils$$makeGetter$3.apply(JdbcUtils.scala:407)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$org$apache$spark$sql$execution$datasources$jdbc$JdbcUtils$$makeGetter$3.apply(JdbcUtils.scala:405)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anon$1.getNext(JdbcUtils.scala:356)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anon$1.getNext(JdbcUtils.scala:338)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:31)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$apply$27.apply(RDD.scala:927)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$apply$27.apply(RDD.scala:927)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NumberFormatException
at java.math.BigDecimal.<init>(BigDecimal.java:497)
at java.math.BigDecimal.<init>(BigDecimal.java:383)
at java.math.BigDecimal.<init>(BigDecimal.java:809)
at world.data.jdbc.internal.types.NodeValues.lambda$parseNumber$4(NodeValues.java:219)
at world.data.jdbc.internal.types.NodeValues.parseLiteral(NodeValues.java:484)
... 29 more
WARN 2019-08-20T07:14:15.19 Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.sql.SQLException: Unable to marshal xsd:string to java.math.BigDecimal
at world.data.jdbc.internal.types.NodeValues.parseLiteral(NodeValues.java:488)
at world.data.jdbc.internal.types.NodeValues.parseNumber(NodeValues.java:208)
at world.data.jdbc.internal.types.NodeValues.parseBigDecimal(NodeValues.java:163)
at world.data.jdbc.internal.results.ResultSetImpl.getBigDecimal(ResultSetImpl.java:246)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$org$apache$spark$sql$execution$datasources$jdbc$JdbcUtils$$makeGetter$3.apply(JdbcUtils.scala:407)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$org$apache$spark$sql$execution$datasources$jdbc$JdbcUtils$$makeGetter$3.apply(JdbcUtils.scala:405)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anon$1.getNext(JdbcUtils.scala:356)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anon$1.getNext(JdbcUtils.scala:338)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:31)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$apply$27.apply(RDD.scala:927)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$apply$27.apply(RDD.scala:927)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NumberFormatException
at java.math.BigDecimal.<init>(BigDecimal.java:497)
at java.math.BigDecimal.<init>(BigDecimal.java:383)
at java.math.BigDecimal.<init>(BigDecimal.java:809)
at world.data.jdbc.internal.types.NodeValues.lambda$parseNumber$4(NodeValues.java:219)
at world.data.jdbc.internal.types.NodeValues.parseLiteral(NodeValues.java:484)
... 29 more
ERROR 2019-08-20T07:14:15.193 Task 0 in stage 0.0 failed 1 times; aborting job
Any operation on the dataframe led to this error. Code to read table from DataWorld is given below:
spark.read
.format("jdbc")
.option("driver", "world.data.jdbc.Driver")
.option("url", params.url)
.option("dbtable", params.dbTable)
.option("user", params.user)
.option("password", params.password)
.load
DataWorld JDBC driver version is 0.4.4 Spark version is 2.4.0 Scala version is 2.11
same issue when reading from spark