DataflowJavaSDK
DataflowJavaSDK copied to clipboard
Null is returned when trying to get all the table cells for BigQuery TableRow
Pipeline is: BigQuery -> ParDo -> GCS
Using SDK 2.1.0
. Inside the ParDo, calling c.element().getF()
to get the List<TableCell>
for the given TableRow
, returns null
:
public class BigQueryTableToOneFile {
private static final String BIGQUERY_TABLE = "bigquery-samples:wikipedia_benchmark.Wiki1M";
private static final String GCS_OUTPUT_FILE = "gs://bigquery-table-to-one-file/output/wiki_1M.csv";
public static void main(String[] args) throws Exception {
DataflowPipelineOptions options = PipelineOptionsFactory
.fromArgs(args)
.withValidation()
.as(DataflowPipelineOptions.class);
options.setAutoscalingAlgorithm(THROUGHPUT_BASED);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(BigQueryIO.read().from(BIGQUERY_TABLE))
.apply(ParDo.of(new DoFn<TableRow, String>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
for(TableCell tableCell : c.element().getF()){ //<--this returns null
System.out.println(tableCell.getV().toString());
}
}
}))
.apply(TextIO.write().to(GCS_OUTPUT_FILE)
.withoutSharding()
.withWritableByteChannelFactory(GZIP)
);
pipeline.run();
}
}