arrow2 icon indicating copy to clipboard operation
arrow2 copied to clipboard

Append to existing ipc file results in ErrorLocation: InvalidOffset when reading new block

Open Jonarod opened this issue 1 year ago • 0 comments

Trying to append to an existing ipc file as per: arrow2::io::ipc::write::FileWriter::try_from_file(), but even though it seems to accurately write the necessary bookkeeping (metadata are correct) and also writing the new chunk of data (total file size grows accordingly), it seems that the block of written data is corrupt somehow, and cannot be read.

Here are reproducible steps:

Cargo.toml:

[dependencies]
arrow2 = { version = "0.18.0", features = ["io_ipc"]}

then in main.rs:

use std::fs::File;
use arrow2::io::ipc::{
  self,
  write::WriteOptions
};
use arrow2::array::PrimitiveArray;
use arrow2::datatypes::{Field, DataType, Schema};
use arrow2::chunk::Chunk;

fn main(){

  // =====================================================
  // Create new ipc file
  // =====================================================
  let filepath = "./test_arrow_append.ipc";

  let fields = vec![
    Field::new(String::from("col"), DataType::Float64, false),
  ];
  let schema = Schema::from(fields);

  let file = File::create(&filepath).unwrap();
  let options = WriteOptions { 
    compression: None
  };
  let mut writer = ipc::write::FileWriter::new(&file, schema, None, options);

  let col = PrimitiveArray::from_vec(vec![1.0]);

  writer.start().unwrap();
  writer.write(&Chunk::new(vec![Box::new(col)]), None).unwrap();
  writer.finish().unwrap();

  // =====================================================
  // Prove it is a valid file we can read from
  // =====================================================
  if let Ok(file) = File::open(&filepath) {
    let metadata = ipc::read::read_file_metadata(&mut File::open(filepath).unwrap()).unwrap().clone();
    let mut reader = ipc::read::FileReader::new(&file, metadata, None, None);
    println!("Contains {:?} block(s)", reader.metadata().blocks.len());
    for block in &reader.metadata().blocks {
      println!("{:#?}", block);
    }

   // Can read block 1:
    println!("Block 1 is: {:#?}", reader.nth(0));
  }

  // =====================================================
  // Now try to append to it
  // =====================================================
  let metadata = ipc::read::read_file_metadata(&mut File::open(filepath).unwrap()).unwrap().clone();
  let file_append_mode = File::options().append(true).open("./test_polars_arrow.ipc").unwrap();
  let options = WriteOptions { 
    compression: None
  };
  let mut writer = ipc::write::FileWriter::try_from_file(file_append_mode, metadata, options).unwrap();

  let col = PrimitiveArray::from_vec(vec![2.0]);

  writer.write(&Chunk::new(vec![Box::new(col)]), None).unwrap();
  writer.finish().unwrap();
  
  
  // =====================================================
  // Prove it is a valid file we can read from
  // =====================================================
  if let Ok(file) = File::open(&filepath) {
    let metadata = ipc::read::read_file_metadata(&mut File::open(filepath).unwrap()).unwrap().clone();
    let mut reader = ipc::read::FileReader::new(&file, metadata, None, None);
    println!("Contains {:?} block(s)", reader.metadata().blocks.len());
    for block in &reader.metadata().blocks {
      println!("{:#?}", block);
    }

   // Can read block 1:
    println!("Block 1 is: {:#?}", reader.nth(0));

   // CANNOT READ BLOCK 2: prints None
   // println!("Block 2 is: {:#?}", reader.nth(1));
    println!("Block 2 is: {:#?}", reader.next());
    // Some(Err(OutOfSpec("InvalidFlatbufferMessage(Error { source_location: ErrorLocation { type_: \"[MessageRef]\", method: \"read_as_root\", byte_offset: 0 }, error_kind: InvalidOffset })")))

  }

}

Jonarod avatar Jan 16 '24 16:01 Jonarod