parquet4s
parquet4s copied to clipboard
Possiblity to write avro IndexedRecords to Parquet using ParquetStreams
` import com.github.mjakubowski84.parquet4s.ScalaCompat.stream.scaladsl.Sink import com.github.mjakubowski84.parquet4s._ import org.apache.avro.generic.IndexedRecord import org.apache.parquet.hadoop.ParquetFileWriter.Mode import org.apache.parquet.hadoop.metadata.CompressionCodecName import org.apache.pekko.Done import org.apache.pekko.stream.scaladsl._
import scala.concurrent.Future import scala.concurrent.duration.DurationInt
object AvroToParquetSink {
private val writeOptions = ParquetWriter.Options(writeMode = Mode.OVERWRITE, compressionCodecName = CompressionCodecName.SNAPPY)
def parquetSink(path: String)(implicit schemaResolver: ParquetSchemaResolver[IndexedRecord], encoder: ParquetRecordEncoder[IndexedRecord] ): Sink[IndexedRecord, Future[Done]] = Flow[IndexedRecord] .via( ParquetStreams.viaParquet .of[IndexedRecord] .maxCount(writeOptions.rowGroupSize) .maxDuration(10.seconds) .options(writeOptions) .write(Path(s"file://$path")) ) .toMat(Sink.ignore)(Keep.right) } `
Is it feasible to write avro IndexedRecords to parquet file using ParquetStreams