iceberg-python
iceberg-python copied to clipboard
Crash when writing map type with unsigned types
Apache Iceberg version
0.6.0 (latest release)
Please describe the bug 🐞
There might be other combinations that don't work
Note: works with (when replaced with line in reproducer)
pa.field('nested_map', pa.map_(pa.int32(), pa.map_(pa.int32(), pa.int32()))),pa.field('nested_map', pa.map_(pa.int64(), pa.map_(pa.int64(), pa.int64()))),
Note: also crashes with (when replaced with line in reproducer)
pa.field('nested_map', pa.map_(pa.int16(), pa.map_(pa.int16(), pa.int16()))),pa.field('nested_map', pa.map_(pa.uint16(), pa.map_(pa.uint16(), pa.uint16()))),pa.field('nested_map', pa.map_(pa.uint32(), pa.map_(pa.uint32(), pa.uint32()))),pa.field('nested_map', pa.map_(pa.uint64(), pa.map_(pa.uint64(), pa.uint64()))),
Crashes with both pyarrow 15 and 16, though with 16 there's less output
from pyiceberg.catalog.sql import SqlCatalog
import pyarrow as pa
pylist = [{'nested_map': {5: {5: 5}}}]
arrow_schema = pa.schema(
[
pa.field('nested_map', pa.map_(pa.int8(), pa.map_(pa.int8(), pa.int8()))),
],
)
arrow_table = pa.Table.from_pylist(pylist, schema=arrow_schema)
catalog = SqlCatalog(
'test_catalog',
**{
'type': "sql'",
'uri': 'sqlite:///pyiceberg.db',
},
)
namespace = 'test_ns'
table_name = 'test_table'
catalog.create_namespace(namespace=namespace)
new_table = catalog.create_table(
identifier=f'{namespace}.{table_name}',
schema=arrow_schema,
location='.',
)
new_table.append(arrow_table)
pyarrow 15.0.2
/Users/voltrondata/github-actions-runner/_work/crossbow/crossbow/arrow/cpp/src/arrow/array/array_nested.cc:848: Check failed: _s.ok() Operation failed: ValidateChildData(data->child_data)
Bad status: Invalid: Map array keys array should have no nulls
0 libarrow.1500.dylib 0x0000000128c3bfb8 _ZN5arrow4util7CerrLog14PrintBackTraceEv + 44
1 libarrow.1500.dylib 0x0000000128c3bf6c _ZN5arrow4util7CerrLogD2Ev + 184
2 libarrow.1500.dylib 0x0000000128c3beac _ZN5arrow4util7CerrLogD0Ev + 12
3 libarrow.1500.dylib 0x0000000128c36608 _ZN5arrow4util8ArrowLogD1Ev + 48
4 libarrow.1500.dylib 0x0000000128e6d8d4 _ZN5arrow8MapArray7SetDataERKNSt3__110shared_ptrINS_9ArrayDataEEE + 508
5 libarrow.1500.dylib 0x0000000128e6da54 _ZN5arrow8MapArrayC1ERKNSt3__110shared_ptrINS_9ArrayDataEEE + 64
6 libarrow.1500.dylib 0x0000000128d5bf40 _ZN5arrow9MakeArrayERKNSt3__110shared_ptrINS_9ArrayDataEEE + 1180
7 libarrow.1500.dylib 0x0000000128b4d898 _ZN5arrow7compute6detail12_GLOBAL__N_114ToChunkedArrayERKNSt3__16vectorINS_5DatumENS3_9allocatorIS5_EEEERKNS_10TypeHolderE + 156
8 libarrow.1500.dylib 0x0000000128b4cc50 _ZN5arrow7compute6detail12_GLOBAL__N_114ScalarExecutor11WrapResultsERKNSt3__16vectorINS_5DatumENS4_9allocatorIS6_EEEESB_ + 104
9 libarrow.1500.dylib 0x0000000128b59184 _ZN5arrow7compute6detail20FunctionExecutorImpl7ExecuteERKNSt3__16vectorINS_5DatumENS3_9allocatorIS5_EEEEx + 2156
10 libarrow.1500.dylib 0x0000000128ae9598 _ZN5arrow7compute12_GLOBAL__N_115ExecuteInternalERKNS0_8FunctionENSt3__16vectorINS_5DatumENS5_9allocatorIS7_EEEExPKNS0_15FunctionOptionsEPNS0_11ExecContextE + 480
11 libarrow.1500.dylib 0x0000000128ae9320 _ZNK5arrow7compute8Function7ExecuteERKNSt3__16vectorINS_5DatumENS2_9allocatorIS4_EEEEPKNS0_15FunctionOptionsEPNS0_11ExecContextE + 76
12 libarrow.1500.dylib 0x0000000128b4a518 _ZNK5arrow7compute8internal12_GLOBAL__N_116CastMetaFunction11ExecuteImplERKNSt3__16vectorINS_5DatumENS4_9allocatorIS6_EEEEPKNS0_15FunctionOptionsEPNS0_11ExecContextE + 640
13 libarrow.1500.dylib 0x0000000128aea758 _ZNK5arrow7compute12MetaFunction7ExecuteERKNSt3__16vectorINS_5DatumENS2_9allocatorIS4_EEEEPKNS0_15FunctionOptionsEPNS0_11ExecContextE + 236
14 _compute.cpython-39-darwin.so 0x0000000119048b38 _ZL43__pyx_pw_7pyarrow_8_compute_8Function_7callP7_objectPKS0_lS0_ + 1608
15 libpython3.9.dylib 0x00000001027d1e30 method_vectorcall + 176
16 libpython3.9.dylib 0x00000001027cf78c PyVectorcall_Call + 160
17 _compute.cpython-39-darwin.so 0x0000000119074fe0 _ZL43__pyx_pw_7pyarrow_8_compute_11call_functionP7_objectPKS0_lS0_ + 1744
18 libpython3.9.dylib 0x00000001028a4ab4 call_function + 440
19 libpython3.9.dylib 0x00000001028a1c28 _PyEval_EvalFrameDefault + 22592
20 libpython3.9.dylib 0x00000001028a58a8 _PyEval_EvalCode + 2680
21 libpython3.9.dylib 0x00000001027cfb04 _PyFunction_Vectorcall + 236
22 libpython3.9.dylib 0x00000001027cf78c PyVectorcall_Call + 160
23 lib.cpython-39-darwin.so 0x000000010660e440 _ZL44__pyx_pw_7pyarrow_3lib_12ChunkedArray_48castP7_objectPKS0_lS0_ + 1376
24 libpython3.9.dylib 0x00000001027d1e30 method_vectorcall + 176
25 libpython3.9.dylib 0x00000001027cf78c PyVectorcall_Call + 160
26 lib.cpython-39-darwin.so 0x00000001066242d8 _ZL36__pyx_pw_7pyarrow_3lib_5Table_25castP7_objectPKS0_lS0_ + 2920
27 libpython3.9.dylib 0x00000001028a4ab4 call_function + 440
28 libpython3.9.dylib 0x00000001028a1b8c _PyEval_EvalFrameDefault + 22436
29 libpython3.9.dylib 0x00000001028a58a8 _PyEval_EvalCode + 2680
30 libpython3.9.dylib 0x00000001027cfb04 _PyFunction_Vectorcall + 236
31 libpython3.9.dylib 0x00000001027d1e30 method_vectorcall + 176
32 libpython3.9.dylib 0x00000001028a4ab4 call_function + 440
33 libpython3.9.dylib 0x00000001028a1ca0 _PyEval_EvalFrameDefault + 22712
34 libpython3.9.dylib 0x00000001028a58a8 _PyEval_EvalCode + 2680
35 libpython3.9.dylib 0x00000001027cfb04 _PyFunction_Vectorcall + 236
36 libpython3.9.dylib 0x00000001028a4ab4 call_function + 440
37 libpython3.9.dylib 0x00000001028a1b8c _PyEval_EvalFrameDefault + 22436
38 libpython3.9.dylib 0x00000001028a58a8 _PyEval_EvalCode + 2680
39 libpython3.9.dylib 0x000000010289c320 PyEval_EvalCode + 80
40 libpython3.9.dylib 0x00000001028e1ddc run_mod + 180
41 libpython3.9.dylib 0x00000001028e1fb0 pyrun_file + 180
42 libpython3.9.dylib 0x00000001028dffec PyRun_SimpleFileExFlags + 836
43 libpython3.9.dylib 0x00000001028fdba8 Py_RunMain + 1620
44 libpython3.9.dylib 0x00000001028fe074 pymain_main + 324
45 libpython3.9.dylib 0x00000001028fe124 Py_BytesMain + 56
46 dyld 0x00000001a00cbf28 start + 2236
Abort trap: 6
Pyarrow 16.1.0
/Users/runner/work/crossbow/crossbow/arrow/cpp/src/arrow/array/array_nested.cc:848: Check failed: _s.ok() Operation failed: ValidateChildData(data->child_data)
Bad status: Invalid: Map array keys array should have no nulls
Abort trap: 6