opteryx icon indicating copy to clipboard operation
opteryx copied to clipboard

✨ test flight as serialisation format

Open joocer opened this issue 1 year ago • 0 comments

import pyarrow as pa

def write_arrow_to_bytes(data: pa.Table) -> bytes:
    """
    Serialize a PyArrow Table to a bytes array.

    Parameters:
        data: pa.Table
            The PyArrow Table to serialize.
    
    Returns:
        bytes
            The serialized Arrow Table as bytes.
    """
    sink = pa.BufferOutputStream()
    with pa.RecordBatchFileWriter(sink, data.schema) as writer:
        writer.write_table(data)
    
    return sink.getvalue().to_pybytes()

# Example usage:
# Create a sample PyArrow Table
data = pa.table({'column1': [1, 2, 3], 'column2': ['a', 'b', 'c']})
serialized_data = write_arrow_to_bytes(data)
import pyarrow as pa

def read_arrow_from_bytes(serialized_data: bytes) -> pa.Table:
    """
    Deserialize a PyArrow Table from a bytes array.

    Parameters:
        serialized_data: bytes
            The serialized Arrow Table as bytes.
    
    Returns:
        pa.Table
            The deserialized PyArrow Table.
    """
    buffer = pa.py_buffer(serialized_data)
    reader = pa.RecordBatchFileReader(pa.BufferReader(buffer))
    return reader.read_all()

# Example usage:
# Deserialize the bytes back into a PyArrow Table
deserialized_data = read_arrow_from_bytes(serialized_data)

joocer avatar Aug 20 '24 11:08 joocer