gcs-connector-for-apache-kafka
gcs-connector-for-apache-kafka copied to clipboard
Error when using zstd compression with parquet output format
Could this be because of this: https://issues.apache.org/jira/browse/PARQUET-1866 maybe?
2022-12-01 00:56:14,469 ERROR [kafka-sink-connector-gcs|task-0] WorkerSinkTask{id=kafka-sink-connector-gcs-0} Commit of offsets threw an unexpected exception for sequence number 6: null (org.apache.kafka.connect.runtime.WorkerSinkTask) [task-thread-kafka-sink-connector-gcs-0]
org.apache.kafka.connect.errors.ConnectException: java.lang.RuntimeException: native zStandard library not available: this version of libhadoop was built without zstd support.
at io.aiven.kafka.connect.gcs.GcsSinkTask.flushFile(GcsSinkTask.java:127)
at java.base/java.util.HashMap.forEach(HashMap.java:1337)
at java.base/java.util.Collections$UnmodifiableMap.forEach(Collections.java:1505)
at io.aiven.kafka.connect.gcs.GcsSinkTask.flush(GcsSinkTask.java:110)
at org.apache.kafka.connect.sink.SinkTask.preCommit(SinkTask.java:125)
at org.apache.kafka.connect.runtime.WorkerSinkTask.commitOffsets(WorkerSinkTask.java:404)
at org.apache.kafka.connect.runtime.WorkerSinkTask.commitOffsets(WorkerSinkTask.java:374)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:218)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:203)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:189)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:244)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.RuntimeException: native zStandard library not available: this version of libhadoop was built without zstd support.
at org.apache.hadoop.io.compress.ZStandardCodec.checkNativeCodeLoaded(ZStandardCodec.java:65)
at org.apache.hadoop.io.compress.ZStandardCodec.getCompressorType(ZStandardCodec.java:153)
at org.apache.hadoop.io.compress.CodecPool.getCompressor(CodecPool.java:150)
at org.apache.hadoop.io.compress.CodecPool.getCompressor(CodecPool.java:168)
at org.apache.parquet.hadoop.CodecFactory$HeapBytesCompressor.<init>(CodecFactory.java:144)
at org.apache.parquet.hadoop.CodecFactory.createCompressor(CodecFactory.java:206)
at org.apache.parquet.hadoop.CodecFactory.getCompressor(CodecFactory.java:189)
at org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:287)
at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:564)
at io.aiven.kafka.connect.common.output.parquet.ParquetOutputWriter.writeRecords(ParquetOutputWriter.java:70)
at io.aiven.kafka.connect.gcs.GcsSinkTask.flushFile(GcsSinkTask.java:125)
... 15 more