polars icon indicating copy to clipboard operation
polars copied to clipboard

JsonLineReader panics on empty array in a struct

Open m-ronchi opened this issue 1 year ago • 1 comments

Polars version checks

  • [X] I have checked that this issue has not already been reported.

  • [X] I have confirmed this bug exists on the latest version of Polars.

Issue description

when reading an ndjson it panics if there is an empty array nested somewhere

Reproducible example

test.ndjson:

{"foo":{"bar":[]}}

code:


#[test]
fn test() {
    let df = LazyJsonLineReader::new("src/test.ndjson".to_string())
        .finish()
        .unwrap()
        .collect()
        .unwrap();

    dbg!(df);
}

#[test]
fn test2() {
    let df = JsonLineReader::new(&mut File::open("src/test.ndjson").unwrap())
        .finish()
        .unwrap();

    dbg!(df);
}

output:


thread '<unnamed>' panicked at 'index out of bounds: the len is 0 but the index is 0', /Users/mauroronchi/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-io-0.26.1/src/ndjson_core/buffer.rs:237:41
stack backtrace:
   0: rust_begin_unwind
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs:575:5
   1: core::panicking::panic_fmt
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/panicking.rs:65:14
   2: core::panicking::panic_bounds_check
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/panicking.rs:151:5
   3: <usize as core::slice::index::SliceIndex<[T]>>::index
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/slice/index.rs:259:10
   4: core::slice::index::<impl core::ops::index::Index<I> for [T]>::index
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/slice/index.rs:18:9
   5: <alloc::vec::Vec<T,A> as core::ops::index::Index<I>>::index
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/alloc/src/vec/mod.rs:2736:9
   6: polars_io::ndjson_core::buffer::value_to_dtype
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-io-0.26.1/src/ndjson_core/buffer.rs:237:41
   7: polars_io::ndjson_core::buffer::deserialize_all::{{closure}}
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-io-0.26.1/src/ndjson_core/buffer.rs:272:30
   8: core::iter::adapters::map::map_fold::{{closure}}
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/adapters/map.rs:84:28
   9: core::iter::traits::iterator::Iterator::fold
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/traits/iterator.rs:2414:21
  10: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::fold
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/adapters/map.rs:124:9
  11: <(ExtendA,ExtendB) as core::iter::traits::collect::Extend<(A,B)>>::extend
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/traits/collect.rs:439:9
  12: core::iter::traits::iterator::Iterator::unzip
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/traits/iterator.rs:3197:9
  13: polars_io::ndjson_core::buffer::deserialize_all
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-io-0.26.1/src/ndjson_core/buffer.rs:269:53
  14: polars_io::ndjson_core::buffer::Buffer::add
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-io-0.26.1/src/ndjson_core/buffer.rs:192:26
  15: polars_io::ndjson_core::ndjson::parse_impl::{{closure}}
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-io-0.26.1/src/ndjson_core/ndjson.rs:289:40
  16: core::iter::traits::iterator::Iterator::for_each::call::{{closure}}
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/traits/iterator.rs:828:29
  17: core::iter::traits::iterator::Iterator::fold
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/traits/iterator.rs:2414:21
  18: core::iter::traits::iterator::Iterator::for_each
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/traits/iterator.rs:831:9
  19: polars_io::ndjson_core::ndjson::parse_impl
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-io-0.26.1/src/ndjson_core/ndjson.rs:286:21
  20: polars_io::ndjson_core::ndjson::parse_lines
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-io-0.26.1/src/ndjson_core/ndjson.rs:314:9
  21: polars_io::ndjson_core::ndjson::CoreJsonReader::parse_json::{{closure}}::{{closure}}
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-io-0.26.1/src/ndjson_core/ndjson.rs:218:29
  22: core::ops::function::impls::<impl core::ops::function::FnMut<A> for &F>::call_mut
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/ops/function.rs:273:13
  23: core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &mut F>::call_once
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/ops/function.rs:309:13
  24: core::option::Option<T>::map
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/option.rs:925:29
  25: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::next
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/adapters/map.rs:103:9
  26: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::next
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/adapters/map.rs:103:9
  27: <core::iter::adapters::take_while::TakeWhile<I,P> as core::iter::traits::iterator::Iterator>::next
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/adapters/take_while.rs:46:21
  28: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::next
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/iter/adapters/map.rs:103:9
  29: alloc::vec::Vec<T,A>::extend_desugared
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/alloc/src/vec/mod.rs:2857:35
  30: <alloc::vec::Vec<T,A> as alloc::vec::spec_extend::SpecExtend<T,I>>::spec_extend
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/alloc/src/vec/spec_extend.rs:18:9
  31: <alloc::vec::Vec<T,A> as core::iter::traits::collect::Extend<T>>::extend
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/alloc/src/vec/mod.rs:2831:9
  32: <rayon::iter::extend::ListVecFolder<T> as rayon::iter::plumbing::Folder<T>>::consume_iter
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/extend.rs:73:9
  33: <rayon::iter::while_some::WhileSomeFolder<C> as rayon::iter::plumbing::Folder<core::option::Option<T>>>::consume_iter
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/while_some.rs:139:21
  34: <rayon::iter::map::MapFolder<C,F> as rayon::iter::plumbing::Folder<T>>::consume_iter
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/map.rs:248:21
  35: <rayon::iter::map::MapFolder<C,F> as rayon::iter::plumbing::Folder<T>>::consume_iter
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/map.rs:248:21
  36: rayon::iter::plumbing::Producer::fold_with
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/plumbing/mod.rs:110:9
  37: rayon::iter::plumbing::bridge_producer_consumer::helper
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/plumbing/mod.rs:438:13
  38: rayon::iter::plumbing::bridge_producer_consumer
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/plumbing/mod.rs:397:12
  39: <rayon::iter::plumbing::bridge::Callback<C> as rayon::iter::plumbing::ProducerCallback<I>>::callback
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/plumbing/mod.rs:373:13
  40: <rayon::vec::Drain<T> as rayon::iter::IndexedParallelIterator>::with_producer
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/vec.rs:147:13
  41: <rayon::vec::IntoIter<T> as rayon::iter::IndexedParallelIterator>::with_producer
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/vec.rs:83:9
  42: rayon::iter::plumbing::bridge
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/plumbing/mod.rs:357:12
  43: <rayon::vec::IntoIter<T> as rayon::iter::ParallelIterator>::drive_unindexed
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/vec.rs:58:9
  44: <rayon::iter::map::Map<I,F> as rayon::iter::ParallelIterator>::drive_unindexed
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/map.rs:49:9
  45: <rayon::iter::map::Map<I,F> as rayon::iter::ParallelIterator>::drive_unindexed
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/map.rs:49:9
  46: <rayon::iter::while_some::WhileSome<I> as rayon::iter::ParallelIterator>::drive_unindexed
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/while_some.rs:44:9
  47: rayon::iter::extend::<impl rayon::iter::ParallelExtend<T> for alloc::vec::Vec<T>>::par_extend
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/extend.rs:576:28
  48: rayon::iter::from_par_iter::collect_extended
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/from_par_iter.rs:17:5
  49: rayon::iter::from_par_iter::<impl rayon::iter::FromParallelIterator<T> for alloc::vec::Vec<T>>::from_par_iter
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/from_par_iter.rs:30:9
  50: rayon::iter::ParallelIterator::collect
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/mod.rs:2048:9
  51: rayon::result::<impl rayon::iter::FromParallelIterator<core::result::Result<T,E>> for core::result::Result<C,E>>::from_par_iter
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/result.rs:121:26
  52: rayon::iter::ParallelIterator::collect
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-1.6.1/src/iter/mod.rs:2048:9
  53: polars_io::ndjson_core::ndjson::CoreJsonReader::parse_json::{{closure}}
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-io-0.26.1/src/ndjson_core/ndjson.rs:214:13
  54: rayon_core::thread_pool::ThreadPool::install::{{closure}}
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/thread_pool/mod.rs:110:40
  55: rayon_core::registry::Registry::in_worker_cold::{{closure}}::{{closure}}
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/registry.rs:506:21
  56: rayon_core::job::JobResult<T>::call::{{closure}}
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/job.rs:212:41
  57: <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/core/src/panic/unwind_safe.rs:271:9
  58: std::panicking::try::do_call
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs:483:40
  59: ___rust_try
  60: std::panicking::try
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs:447:19
  61: std::panic::catch_unwind
             at /rustc/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panic.rs:137:14
  62: rayon_core::unwind::halt_unwinding
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/unwind.rs:17:5
  63: rayon_core::job::JobResult<T>::call
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/job.rs:212:15
  64: <rayon_core::job::StackJob<L,F,R> as rayon_core::job::Job>::execute
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/job.rs:114:32
  65: rayon_core::job::JobRef::execute
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/job.rs:58:9
  66: rayon_core::registry::WorkerThread::execute
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/registry.rs:804:9
  67: rayon_core::registry::WorkerThread::wait_until_cold
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/registry.rs:781:17
  68: rayon_core::registry::WorkerThread::wait_until
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/registry.rs:755:13
  69: rayon_core::registry::main_loop
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/registry.rs:889:5
  70: rayon_core::registry::ThreadBuilder::run
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/registry.rs:53:18
  71: <rayon_core::registry::DefaultSpawn as rayon_core::registry::ThreadSpawn>::spawn::{{closure}}
             at /Users/***/.cargo/registry/src/github.com-1ecc6299db9ec823/rayon-core-1.10.2/src/registry.rs:98:20
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.

Expected behavior

no panic

Installed versions

polars = { version = "0.26.1", features = ["parquet", "json", "lazy", "dtype-struct"] }

m-ronchi avatar Jan 27 '23 17:01 m-ronchi

@universalmind303 could you take this one?

ritchie46 avatar Jan 28 '23 09:01 ritchie46