spark-clickhouse-connector
spark-clickhouse-connector copied to clipboard
Read timed out
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - 22/11/04 23:59:37 WARN TaskSetManager: Lost task 0.0 in stage 2.0 (TID 2) (172.19.0.3 executor 0): com.fasterxml.jackson.databind.RuntimeJsonMappingException: Read timed out (through reference chain: java.lang.Object[][6])
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.MappingIterator._handleMappingException(MappingIterator.java:416)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.MappingIterator.next(MappingIterator.java:201)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at xenon.clickhouse.format.JSONCompactEachRowWithNamesAndTypesStreamOutput.hasNext(JSONOutputFormat.scala:111)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at xenon.clickhouse.read.ClickHouseReader.next(ClickHouseReader.scala:79)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.execution.datasources.v2.PartitionIterator.hasNext(DataSourceRDD.scala:119)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.execution.datasources.v2.MetricsIterator.hasNext(DataSourceRDD.scala:156)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.execution.datasources.v2.DataSourceRDD$$anon$1.$anonfun$hasNext$1(DataSourceRDD.scala:63)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.execution.datasources.v2.DataSourceRDD$$anon$1.$anonfun$hasNext$1$adapted(DataSourceRDD.scala:63)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at scala.Option.exists(Option.scala:376)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.execution.datasources.v2.DataSourceRDD$$anon$1.hasNext(DataSourceRDD.scala:63)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.sort_addToSorter_0$(Unknown Source)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.$anonfun$run$1(WriteToDataSourceV2Exec.scala:435)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1538)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:480)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:381)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.scheduler.Task.run(Task.scala:136)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.lang.Thread.run(Thread.java:834)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - Caused by: com.fasterxml.jackson.databind.JsonMappingException: Read timed out (through reference chain: java.lang.Object[][6])
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.JsonMappingException.wrapWithPath(JsonMappingException.java:392)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.JsonMappingException.wrapWithPath(JsonMappingException.java:363)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.deser.std.ObjectArrayDeserializer.deserialize(ObjectArrayDeserializer.java:225)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.deser.std.ObjectArrayDeserializer.deserialize(ObjectArrayDeserializer.java:24)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.MappingIterator.nextValue(MappingIterator.java:283)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.MappingIterator.next(MappingIterator.java:199)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - ... 27 more
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - Caused by: java.net.SocketTimeoutException: Read timed out
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.net.SocketInputStream.socketRead0(Native Method)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.net.SocketInputStream.socketRead(SocketInputStream.java:115)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.net.SocketInputStream.read(SocketInputStream.java:168)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.net.SocketInputStream.read(SocketInputStream.java:140)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.io.BufferedInputStream.fill(BufferedInputStream.java:252)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.io.BufferedInputStream.read1(BufferedInputStream.java:292)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.io.BufferedInputStream.read(BufferedInputStream.java:351)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/sun.net.www.http.ChunkedInputStream.readAheadBlocking(ChunkedInputStream.java:552)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/sun.net.www.http.ChunkedInputStream.readAhead(ChunkedInputStream.java:609)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/sun.net.www.http.ChunkedInputStream.read(ChunkedInputStream.java:696)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/java.io.FilterInputStream.read(FilterInputStream.java:133)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at java.base/sun.net.www.protocol.http.HttpURLConnection$HttpInputStream.read(HttpURLConnection.java:3495)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.clickhouse.client.stream.Lz4InputStream.readFully(Lz4InputStream.java:34)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.clickhouse.client.stream.Lz4InputStream.updateBuffer(Lz4InputStream.java:72)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.clickhouse.client.stream.AbstractByteArrayInputStream.read(AbstractByteArrayInputStream.java:135)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.core.json.UTF8StreamJsonParser._loadMore(UTF8StreamJsonParser.java:257)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.core.json.UTF8StreamJsonParser._loadMoreGuaranteed(UTF8StreamJsonParser.java:2444)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.core.json.UTF8StreamJsonParser._finishString2(UTF8StreamJsonParser.java:2527)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.core.json.UTF8StreamJsonParser._finishAndReturnString(UTF8StreamJsonParser.java:2507)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.core.json.UTF8StreamJsonParser.getText(UTF8StreamJsonParser.java:334)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer._deserializeContainerNoRecursion(JsonNodeDeserializer.java:524)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:88)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:20)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - at com.fasterxml.jackson.databind.deser.std.ObjectArrayDeserializer.deserialize(ObjectArrayDeserializer.java:214)
[2022-11-04, 23:59:37 UTC] {spark_submit.py:485} INFO - ... 30 more
Read is timing out when I am trying to insert into another table by using a UDF on a table with 1.8B Rows. How can I change the timeout for clickhouse? It seems to be easy on clickhouse-jbdc but I'm a bit confused on how to do it for spark-clickhouse.