gitoxide
gitoxide copied to clipboard
`panic!("encountered unsupported command code: 0")` when enumerating repository objects
What I'm seeing
Using Nosey Parker to scan a particular obscure repository from GitHub, I'm seeing a panic in the data::delta::apply function in gix-pack-0.59.1.
I'm seeing a crash when I run noseyparker scan -d test.np -j1 -vvv deqp.git, with a local clone of the https://github.com/elongbug/deqp repository. Here's a debug stack trace from that:
The application panicked (crashed).
Message: encountered unsupported command code: 0
Location: /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-pack-0.59.1/src/data/delta.rs:60
[..,]
Full Log
The application panicked (crashed).
Message: encountered unsupported command code: 0
Location: /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-pack-0.59.1/src/data/delta.rs:60
Run with COLORBT_SHOW_HIDDEN=1 environment variable to disable frame filtering.
⠒ Scanning content 1.55 GiB [00:01:14] ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ BACKTRACE ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
⋮ 9 frames hidden ⋮
10: gix_pack::data::delta::apply::h990db6d667116761
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-pack-0.59.1/src/data/delta.rs:60
58 │ .expect("delta copy from base: byte slices must match");
59 │ }
60 > 0 => panic!("encountered unsupported command code: 0"),
61 │ size => {
62 │ std::io::Write::write(&mut target, &data[i..i + *size as usize])
11: gix_pack::data::file::decode::entry::<impl gix_pack::data::File>::resolve_deltas::h14b6fc9b4dbdff00
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-pack-0.59.1/src/data/file/decode/entry.rs:377
375 │ last_result_size = Some(result_size);
376 │ }
377 > delta::apply(&source_buf[..base_size], &mut target_buf[..result_size], data);
378 │ // use the target as source for the next delta
379 │ std::mem::swap(&mut source_buf, &mut target_buf);
12: gix_pack::data::file::decode::entry::<impl gix_pack::data::File>::decode_entry::h485eb73abedf8206
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-pack-0.59.1/src/data/file/decode/entry.rs:186
184 │ })
185 │ }
186 > OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, inflate, out, delta_cache),
187 │ }
188 │ }
13: gix_odb::store_impls::dynamic::find::<impl gix_odb::store_impls::dynamic::Handle<S>>::try_find_cached_inner::h68d1ee577f15b8ae
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-odb-0.69.1/src/store_impls/dynamic/find.rs:151
149 │ let entry = pack.entry(pack_offset)?;
150 │ let header_size = entry.header_size();
151 > let res = pack.decode_entry(
152 │ entry,
153 │ buffer,
14: gix_odb::store_impls::dynamic::find::<impl gix_pack::find_traits::Find for gix_odb::store_impls::dynamic::Handle<S>>::try_find_cached::h356f98a75f4393f2
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-odb-0.69.1/src/store_impls/dynamic/find.rs:356
354 │ let mut snapshot = self.snapshot.borrow_mut();
355 │ let mut inflate = self.inflate.borrow_mut();
356 > self.try_find_cached_inner(id, buffer, &mut inflate, pack_cache, &mut snapshot, None)
357 │ .map_err(|err| Box::new(err) as _)
358 │ }
15: gix_odb::cache::impls::<impl gix_pack::find_traits::Find for gix_odb::Cache<S>>::try_find_cached::h22b8bd39957c46de
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-odb-0.69.1/src/cache.rs:224
222 │ }
223 │ }
224 > let possibly_obj = self.inner.try_find_cached(id.as_ref(), buffer, pack_cache)?;
225 │ if let (Some(mut obj_cache), Some((obj, _location))) =
226 │ (self.object_cache.as_ref().map(RefCell::borrow_mut), &possibly_obj)
16: gix_odb::cache::impls::<impl gix_pack::find_traits::Find for gix_odb::Cache<S>>::try_find::hc3f634d96c8f0040
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-odb-0.69.1/src/cache.rs:208
206 │ ) -> Result<Option<(Data<'a>, Option<Location>)>, gix_object::find::Error> {
207 │ match self.pack_cache.as_ref().map(RefCell::borrow_mut) {
208 > Some(mut pack_cache) => self.try_find_cached(id, buffer, pack_cache.deref_mut()),
209 │ None => self.try_find_cached(id, buffer, &mut gix_pack::cache::Never),
210 │ }
17: gix_odb::cache::impls::<impl gix_object::traits::find::Find for gix_odb::Cache<S>>::try_find::h505b462e9cba2ee2
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-odb-0.69.1/src/cache.rs:163
161 │ {
162 │ fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec<u8>) -> Result<Option<Data<'a>>, gix_object::find::Error> {
163 > gix_pack::Find::try_find(self, id, buffer).map(|t| t.map(|t| t.0))
164 │ }
165 │ }
18: <gix_odb::memory::Proxy<T> as gix_object::traits::find::Find>::try_find::h40b6c57e98500767
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-odb-0.69.1/src/memory.rs:159
157 │ }
158 │ }
159 > self.inner.try_find(id, buffer)
160 │ }
161 │ }
19: gix_object::traits::find::ext::FindExt::find::h0ce666a910c19e76
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-object-0.49.1/src/traits/find.rs:234
232 │ buffer: &'a mut Vec<u8>,
233 │ ) -> Result<crate::Data<'a>, find::existing::Error> {
234 > self.try_find(id, buffer)
235 │ .map_err(find::existing::Error::Find)?
236 │ .ok_or_else(|| find::existing::Error::NotFound { oid: id.to_owned() })
20: gix::repository::object::<impl gix::types::Repository>::find_object::hb284647272300f9e
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/gix-0.72.1/src/repository/object.rs:53
51 │ }
52 │ let mut buf = self.free_buf();
53 > let kind = self.objects.find(&id, &mut buf)?.kind;
54 │ Ok(Object::from_data(id, kind, buf, self))
55 │ }
21: <noseyparker_cli::cmd_scan::GitRepoResultIter as rayon::iter::ParallelIterator>::drive_unindexed::{{closure}}::{{closure}}::h2e81d9987823c008
at /home/blarsen/projects/noseyparkerplusplus/crates/noseyparker-cli/src/cmd_scan.rs:199
197 │
198 │ let blob = || -> Result<Blob> {
199 > let mut blob = repo.find_object(blob_id)?.try_into_blob()?;
200 │ let data = std::mem::take(&mut blob.data); // avoid a copy
201 │ Ok(Blob::new(BlobId::from(&blob_id), data))
22: <noseyparker_cli::cmd_scan::GitRepoResultIter as rayon::iter::ParallelIterator>::drive_unindexed::{{closure}}::hcfc65172c38b96e7
at /home/blarsen/projects/noseyparkerplusplus/crates/noseyparker-cli/src/cmd_scan.rs:198
196 │ let blob_id = md.blob_oid;
197 │
198 > let blob = || -> Result<Blob> {
199 │ let mut blob = repo.find_object(blob_id)?.try_into_blob()?;
200 │ let data = std::mem::take(&mut blob.data); // avoid a copy
23: core::ops::function::impls::<impl core::ops::function::Fn<A> for &F>::call::h04920647f8c3f68a
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:262
260 │ {
261 │ extern "rust-call" fn call(&self, args: A) -> F::Output {
262 > (**self).call(args)
263 │ }
264 │ }
24: <rayon::iter::map_with::MapWithFolder<C,U,F> as rayon::iter::plumbing::Folder<T>>::consume_iter::with::{{closure}}::hce48ffd6a09956f4
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/map_with.rs:317
315 │ map_op: impl Fn(&mut U, T) -> R + 'f,
316 │ ) -> impl FnMut(T) -> R + 'f {
317 > move |x| map_op(item, x)
318 │ }
319 │
25: core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &mut F>::call_once::h8cf6aabb49d00555
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:305
303 │ type Output = F::Output;
304 │ extern "rust-call" fn call_once(self, args: A) -> F::Output {
305 > (*self).call_mut(args)
306 │ }
307 │ }
26: core::option::Option<T>::map::he9fed2335951ae0c
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/option.rs:1113
1111 │ {
1112 │ match self {
1113 > Some(x) => Some(f(x)),
1114 │ None => None,
1115 │ }
27: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::next::ha3fb456556810b5c
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/iter/adapters/map.rs:107
105 │ #[inline]
106 │ fn next(&mut self) -> Option<B> {
107 > self.iter.next().map(&mut self.f)
108 │ }
109 │
28: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::next::ha7f60e1c0a324d21
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/iter/adapters/map.rs:107
105 │ #[inline]
106 │ fn next(&mut self) -> Option<B> {
107 > self.iter.next().map(&mut self.f)
108 │ }
109 │
29: rayon::iter::plumbing::Folder::consume_iter::hf8e09de90f9ea415
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/plumbing/mod.rs:177
175 │ I: IntoIterator<Item = Item>,
176 │ {
177 > for item in iter {
178 │ self = self.consume(item);
179 │ if self.full() {
30: <rayon::iter::map_with::MapWithFolder<C,U,F> as rayon::iter::plumbing::Folder<T>>::consume_iter::hcb8661039dd1a0f9
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/map_with.rs:322
320 │ {
321 │ let mapped_iter = iter.into_iter().map(with(&mut self.item, self.map_op));
322 > self.base = self.base.consume_iter(mapped_iter);
323 │ }
324 │ self
31: <rayon::iter::map_with::MapWithFolder<C,U,F> as rayon::iter::plumbing::Folder<T>>::consume_iter::hfd6b6a675578c81f
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/map_with.rs:322
320 │ {
321 │ let mapped_iter = iter.into_iter().map(with(&mut self.item, self.map_op));
322 > self.base = self.base.consume_iter(mapped_iter);
323 │ }
324 │ self
32: rayon::iter::plumbing::Producer::fold_with::ha6e5cd69b027c08d
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/plumbing/mod.rs:109
107 │ F: Folder<Self::Item>,
108 │ {
109 > folder.consume_iter(self.into_iter())
110 │ }
111 │ }
33: <rayon::iter::len::MinLenProducer<P> as rayon::iter::plumbing::Producer>::fold_with::h8428a8c1bf7a486c
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/len.rs:134
132 │ F: Folder<Self::Item>,
133 │ {
134 > self.base.fold_with(folder)
135 │ }
136 │ }
34: rayon::iter::plumbing::bridge_producer_consumer::helper::h372f7337d5f6da17
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/plumbing/mod.rs:437
435 │ reducer.reduce(left_result, right_result)
436 │ } else {
437 > producer.fold_with(consumer.into_folder()).complete()
438 │ }
439 │ }
35: rayon::iter::plumbing::bridge_producer_consumer::helper::{{closure}}::h9aad568542e81dd4
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/plumbing/mod.rs:426
424 │ },
425 │ |context| {
426 > helper(
427 │ len - mid,
428 │ context.migrated(),
36: rayon_core::join::join_context::call_b::{{closure}}::h2d4a355590d1944e
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/join/mod.rs:129
127 │ #[inline]
128 │ fn call_b<R>(f: impl FnOnce(FnContext) -> R) -> impl FnOnce(bool) -> R {
129 > move |migrated| f(FnContext::new(migrated))
130 │ }
131 │
37: rayon_core::job::StackJob<L,F,R>::run_inline::h4de831323c4e97e7
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/job.rs:102
100 │
101 │ pub(super) unsafe fn run_inline(self, stolen: bool) -> R {
102 > self.func.into_inner().unwrap()(stolen)
103 │ }
104 │
38: rayon_core::join::join_context::{{closure}}::h5822005cf64c68e4
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/join/mod.rs:159
157 │ //
158 │ // Note that this could panic, but it's ok if we unwind here.
159 > let result_b = job_b.run_inline(injected);
160 │ return (result_a, result_b);
161 │ } else {
39: rayon_core::registry::in_worker::hb110d42b82230fbb
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/registry.rs:951
949 │ // current thread, so we know the data structure won't be
950 │ // invalidated until we return.
951 > op(&*owner_thread, false)
952 │ } else {
953 │ global_registry().in_worker(op)
40: rayon_core::join::join_context::hc0311f36f2a71c18
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/join/mod.rs:132
130 │ }
131 │
132 > registry::in_worker(|worker_thread, injected| unsafe {
133 │ // Create virtual wrapper for task b; this all has to be
134 │ // done here so that the stack frame can keep it all live
41: rayon::iter::plumbing::bridge_producer_consumer::helper::h372f7337d5f6da17
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/plumbing/mod.rs:415
413 │ let (left_producer, right_producer) = producer.split_at(mid);
414 │ let (left_consumer, right_consumer, reducer) = consumer.split_at(mid);
415 > let (left_result, right_result) = join_context(
416 │ |context| {
417 │ helper(
42: rayon::iter::plumbing::bridge_producer_consumer::h8f825ccc0043937b
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/plumbing/mod.rs:396
394 │ {
395 │ let splitter = LengthSplitter::new(producer.min_len(), producer.max_len(), len);
396 > return helper(len, false, splitter, producer, consumer);
397 │
398 │ fn helper<P, C>(
43: <rayon::iter::plumbing::bridge::Callback<C> as rayon::iter::plumbing::ProducerCallback<I>>::callback::h1abc936b73e245c6
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/plumbing/mod.rs:372
370 │ P: Producer<Item = I>,
371 │ {
372 > bridge_producer_consumer(self.len, producer, self.consumer)
373 │ }
374 │ }
44: <<rayon::iter::len::MinLen<I> as rayon::iter::IndexedParallelIterator>::with_producer::Callback<CB> as rayon::iter::plumbing::ProducerCallback<T>>::callback::h28a6ba45beb8316b
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/len.rs:83
81 │ min: self.min,
82 │ };
83 > self.callback.callback(producer)
84 │ }
85 │ }
45: <rayon::vec::Drain<T> as rayon::iter::IndexedParallelIterator>::with_producer::h0cfbf39e5e74bcbf
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/vec.rs:147
145 │
146 │ // The producer will move or drop each item from the drained range.
147 > callback.callback(producer)
148 │ }
149 │ }
46: <rayon::vec::IntoIter<T> as rayon::iter::IndexedParallelIterator>::with_producer::hc0ffe17d25b11b5f
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/vec.rs:83
81 │ {
82 │ // Drain every item, and then the vector only needs to free its buffer.
83 > self.vec.par_drain(..).with_producer(callback)
84 │ }
85 │ }
47: <rayon::iter::len::MinLen<I> as rayon::iter::IndexedParallelIterator>::with_producer::h553f26dda48af215
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/len.rs:60
58 │ CB: ProducerCallback<Self::Item>,
59 │ {
60 > return self.base.with_producer(Callback {
61 │ callback,
62 │ min: self.min,
48: rayon::iter::plumbing::bridge::h773705a9be8800aa
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/plumbing/mod.rs:356
354 │ {
355 │ let len = par_iter.len();
356 > return par_iter.with_producer(Callback { len, consumer });
357 │
358 │ struct Callback<C> {
49: <rayon::iter::len::MinLen<I> as rayon::iter::ParallelIterator>::drive_unindexed::hffeef17194c9017c
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/len.rs:36
34 │ C: UnindexedConsumer<Self::Item>,
35 │ {
36 > bridge(self, consumer)
37 │ }
38 │
50: <rayon::iter::map_with::MapInit<I,INIT,F> as rayon::iter::ParallelIterator>::drive_unindexed::h94a2b6f49cb5515b
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/map_with.rs:382
380 │ {
381 │ let consumer1 = MapInitConsumer::new(consumer, &self.init, &self.map_op);
382 > self.base.drive_unindexed(consumer1)
383 │ }
384 │
51: <noseyparker_cli::cmd_scan::GitRepoResultIter as rayon::iter::ParallelIterator>::drive_unindexed::h8390c4732055d594
at /home/blarsen/projects/noseyparkerplusplus/crates/noseyparker-cli/src/cmd_scan.rs:176
174 │ let repo = self.inner.repository.into_sync();
175 │ let repo_path = Arc::new(self.inner.path.clone());
176 > self.inner
177 │ .blobs
178 │ .into_par_iter()
52: <noseyparker_cli::cmd_scan::FoundInputIter as rayon::iter::ParallelIterator>::drive_unindexed::h49dd338b93298ca7
at /home/blarsen/projects/noseyparkerplusplus/crates/noseyparker-cli/src/cmd_scan.rs:301
299 │ match self {
300 │ FoundInputIter::File(i) => i.drive_unindexed(consumer),
301 > FoundInputIter::GitRepo(i) => i.drive_unindexed(consumer),
302 │ FoundInputIter::EnumeratorFile(i) => i.drive_unindexed(consumer),
303 │ }
53: <rayon::iter::flatten::FlattenFolder<C,<C as rayon::iter::plumbing::Consumer<<T as rayon::iter::IntoParallelIterator>::Item>>::Result> as rayon::iter::plumbing::Folder<T>>::consume::h3bcfa490e60c0810
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/flatten.rs:114
112 │ let par_iter = item.into_par_iter();
113 │ let consumer = self.base.split_off_left();
114 > let result = par_iter.drive_unindexed(consumer);
115 │
116 │ let previous = match self.previous {
54: <rayon::iter::filter_map::FilterMapFolder<C,P> as rayon::iter::plumbing::Folder<T>>::consume::hc8806136aef5923b
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/filter_map.rs:124
122 │ let filter_op = self.filter_op;
123 │ if let Some(mapped_item) = filter_op(item) {
124 > let base = self.base.consume(mapped_item);
125 │ FilterMapFolder { base, filter_op }
126 │ } else {
55: <&rayon::iter::par_bridge::IterParallelProducer<Iter> as rayon::iter::plumbing::UnindexedProducer>::fold_with::h6426e9e9ba9e3af3
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/par_bridge.rs:145
143 │ if let Some(it) = iter.next() {
144 │ drop(iter);
145 > folder = folder.consume(it);
146 │ if folder.full() {
147 │ return folder;
56: rayon::iter::plumbing::bridge_unindexed_producer_consumer::h2b035d48d0c4f326
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/plumbing/mod.rs:478
476 │ reducer.reduce(left_result, right_result)
477 │ }
478 > (producer, None) => producer.fold_with(consumer.into_folder()).complete(),
479 │ }
480 │ } else {
57: rayon::iter::plumbing::bridge_unindexed_producer_consumer::{{closure}}::hb8fb6da870149c5b
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/iter/plumbing/mod.rs:473
471 │ let bridge = bridge_unindexed_producer_consumer;
472 │ let (left_result, right_result) = join_context(
473 > |context| bridge(context.migrated(), splitter, left_producer, left_consumer),
474 │ |context| bridge(context.migrated(), splitter, right_producer, right_consumer),
475 │ );
58: rayon_core::join::join_context::call_a::{{closure}}::h66b269fbf4a554a7
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/join/mod.rs:124
122 │ #[inline]
123 │ fn call_a<R>(f: impl FnOnce(FnContext) -> R, injected: bool) -> impl FnOnce() -> R {
124 > move || f(FnContext::new(injected))
125 │ }
126 │
59: <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once::hca943e5c7c818128
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panic/unwind_safe.rs:272
270 │ #[inline]
271 │ extern "rust-call" fn call_once(self, _args: ()) -> R {
272 > (self.0)()
273 │ }
274 │ }
60: std::panicking::try::do_call::h3c325e65ad71efc0
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:557
555 │ let data = &mut (*data);
556 │ let f = ManuallyDrop::take(&mut data.f);
557 > data.r = ManuallyDrop::new(f());
558 │ }
559 │ }
61: __rust_try
at <unknown source file>
62: std::panicking::try::h58b0fcdeaabdd12f
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:520
518 │ // See their safety preconditions for more information
519 │ unsafe {
520 > return if intrinsics::catch_unwind(do_call::<F, R>, data_ptr, do_catch::<F, R>) == 0 {
521 │ Ok(ManuallyDrop::into_inner(data.r))
522 │ } else {
63: std::panic::catch_unwind::hcd77458f398ee48c
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:358
356 │ #[stable(feature = "catch_unwind", since = "1.9.0")]
357 │ pub fn catch_unwind<F: FnOnce() -> R + UnwindSafe, R>(f: F) -> Result<R> {
358 > unsafe { panicking::r#try(f) }
359 │ }
360 │
64: rayon_core::unwind::halt_unwinding::h4c6f1ef13155c7d7
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/unwind.rs:17
15 │ F: FnOnce() -> R,
16 │ {
17 > panic::catch_unwind(AssertUnwindSafe(func))
18 │ }
19 │
65: rayon_core::join::join_context::{{closure}}::haeadd60ea8035103
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/join/mod.rs:142
140 │
141 │ // Execute task a; hopefully b gets stolen in the meantime.
142 > let status_a = unwind::halt_unwinding(call_a(oper_a, injected));
143 │ let result_a = match status_a {
144 │ Ok(v) => v,
66: rayon_core::registry::Registry::in_worker_cold::{{closure}}::{{closure}}::h4ffcd99f706ffada
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/registry.rs:522
520 │ let worker_thread = WorkerThread::current();
521 │ assert!(injected && !worker_thread.is_null());
522 > op(&*worker_thread, true)
523 │ },
524 │ LatchRef::new(l),
67: rayon_core::job::JobResult<T>::call::{{closure}}::h2d8e738783f1af7f
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/job.rs:218
216 │ impl<T> JobResult<T> {
217 │ fn call(func: impl FnOnce(bool) -> T) -> Self {
218 > match unwind::halt_unwinding(|| func(true)) {
219 │ Ok(x) => JobResult::Ok(x),
220 │ Err(x) => JobResult::Panic(x),
68: <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once::h62dbad7b756c450e
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panic/unwind_safe.rs:272
270 │ #[inline]
271 │ extern "rust-call" fn call_once(self, _args: ()) -> R {
272 > (self.0)()
273 │ }
274 │ }
69: std::panicking::try::do_call::h10d264f94e4540fe
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:557
555 │ let data = &mut (*data);
556 │ let f = ManuallyDrop::take(&mut data.f);
557 > data.r = ManuallyDrop::new(f());
558 │ }
559 │ }
70: __rust_try
at <unknown source file>
71: std::panicking::try::h6b62f24fc180418c
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:520
518 │ // See their safety preconditions for more information
519 │ unsafe {
520 > return if intrinsics::catch_unwind(do_call::<F, R>, data_ptr, do_catch::<F, R>) == 0 {
521 │ Ok(ManuallyDrop::into_inner(data.r))
522 │ } else {
72: std::panic::catch_unwind::hca78f5ba028ec6c3
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:358
356 │ #[stable(feature = "catch_unwind", since = "1.9.0")]
357 │ pub fn catch_unwind<F: FnOnce() -> R + UnwindSafe, R>(f: F) -> Result<R> {
358 > unsafe { panicking::r#try(f) }
359 │ }
360 │
73: rayon_core::unwind::halt_unwinding::hf2452592a011c996
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/unwind.rs:17
15 │ F: FnOnce() -> R,
16 │ {
17 > panic::catch_unwind(AssertUnwindSafe(func))
18 │ }
19 │
74: rayon_core::job::JobResult<T>::call::h6f3c0cd413514835
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/job.rs:218
216 │ impl<T> JobResult<T> {
217 │ fn call(func: impl FnOnce(bool) -> T) -> Self {
218 > match unwind::halt_unwinding(|| func(true)) {
219 │ Ok(x) => JobResult::Ok(x),
220 │ Err(x) => JobResult::Panic(x),
75: <rayon_core::job::StackJob<L,F,R> as rayon_core::job::Job>::execute::h684a52e81cc0409d
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/job.rs:120
118 │ let abort = unwind::AbortIfPanic;
119 │ let func = (*this.func.get()).take().unwrap();
120 > (*this.result.get()) = JobResult::call(func);
121 │ Latch::set(&this.latch);
122 │ mem::forget(abort);
76: rayon_core::job::JobRef::execute::ha601b4ad0234eefa
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/job.rs:64
62 │ #[inline]
63 │ pub(super) unsafe fn execute(self) {
64 > (self.execute_fn)(self.pointer)
65 │ }
66 │ }
77: rayon_core::registry::WorkerThread::execute::h303b24019f5d76f5
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/registry.rs:860
858 │ #[inline]
859 │ pub(super) unsafe fn execute(&self, job: JobRef) {
860 > job.execute();
861 │ }
862 │
78: rayon_core::registry::WorkerThread::wait_until_cold::h81dc9ddf37ae1fd2
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/registry.rs:794
792 │ if let Some(job) = self.find_work() {
793 │ self.registry.sleep.work_found();
794 > self.execute(job);
795 │ // The job might have injected local work, so go back to the outer loop.
796 │ continue 'outer;
79: rayon_core::registry::WorkerThread::wait_until::h542fc7d746a67754
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/registry.rs:769
767 │ let latch = latch.as_core_latch();
768 │ if !latch.probe() {
769 > self.wait_until_cold(latch);
770 │ }
771 │ }
80: rayon_core::registry::WorkerThread::wait_until_out_of_work::h31529aeb7dda607b
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/registry.rs:818
816 │ let index = self.index;
817 │
818 > self.wait_until(®istry.thread_infos[index].terminate);
819 │
820 │ // Should not be any work left in our queue.
81: rayon_core::registry::main_loop::h71f1e250e38a88e2
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/registry.rs:923
921 │ }
922 │
923 > worker_thread.wait_until_out_of_work();
924 │
925 │ // Normal termination, do not abort.
82: rayon_core::registry::ThreadBuilder::run::h0d5c633bf4b630cc
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/registry.rs:53
51 │ /// thread pool is dropped.
52 │ pub fn run(self) {
53 > unsafe { main_loop(self) }
54 │ }
55 │ }
83: <rayon_core::registry::DefaultSpawn as rayon_core::registry::ThreadSpawn>::spawn::{{closure}}::h6e8a8fe4e34fd376
at /home/blarsen/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/registry.rs:98
96 │ b = b.stack_size(stack_size);
97 │ }
98 > b.spawn(|| thread.run())?;
99 │ Ok(())
100 │ }
84: std::sys::backtrace::__rust_begin_short_backtrace::ha240e7ef2440fa50
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys/backtrace.rs:154
152 │ F: FnOnce() -> T,
153 │ {
154 > let result = f();
155 │
156 │ // prevent this frame from being tail-call optimised away
85: std::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}}::h638b8c0b1d37c2e1
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:561
559 │ let try_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
560 │ crate::sys::backtrace::__rust_begin_short_backtrace(|| hooks.run());
561 > crate::sys::backtrace::__rust_begin_short_backtrace(f)
562 │ }));
563 │ // SAFETY: `their_packet` as been built just above and moved by the
86: <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once::h2f63e73aa378b5e9
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panic/unwind_safe.rs:272
270 │ #[inline]
271 │ extern "rust-call" fn call_once(self, _args: ()) -> R {
272 > (self.0)()
273 │ }
274 │ }
87: std::panicking::try::do_call::h6d6d8a3d23323aad
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:557
555 │ let data = &mut (*data);
556 │ let f = ManuallyDrop::take(&mut data.f);
557 > data.r = ManuallyDrop::new(f());
558 │ }
559 │ }
88: __rust_try
at <unknown source file>
89: std::panicking::try::hec4f5105d251e24f
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:520
518 │ // See their safety preconditions for more information
519 │ unsafe {
520 > return if intrinsics::catch_unwind(do_call::<F, R>, data_ptr, do_catch::<F, R>) == 0 {
521 │ Ok(ManuallyDrop::into_inner(data.r))
522 │ } else {
90: std::panic::catch_unwind::h54574b824fc683a3
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:358
356 │ #[stable(feature = "catch_unwind", since = "1.9.0")]
357 │ pub fn catch_unwind<F: FnOnce() -> R + UnwindSafe, R>(f: F) -> Result<R> {
358 > unsafe { panicking::r#try(f) }
359 │ }
360 │
91: std::thread::Builder::spawn_unchecked_::{{closure}}::h5561b49cc0050c06
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:559
557 │
558 │ let f = f.into_inner();
559 > let try_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
560 │ crate::sys::backtrace::__rust_begin_short_backtrace(|| hooks.run());
561 │ crate::sys::backtrace::__rust_begin_short_backtrace(f)
92: core::ops::function::FnOnce::call_once{{vtable.shim}}::h6cc0d940ae51894e
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:250
248 │ /// Performs the call operation.
249 │ #[unstable(feature = "fn_traits", issue = "29625")]
250 > extern "rust-call" fn call_once(self, args: Args) -> Self::Output;
251 │ }
252 │
93: <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once::h9578f6ea1d4e1c4b
at /rustc/9fc6b43126469e3858e2fe86cafb4f0fd5068869/library/alloc/src/boxed.rs:1972
94: <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once::hf4a2f438d8019348
at /rustc/9fc6b43126469e3858e2fe86cafb4f0fd5068869/library/alloc/src/boxed.rs:1972
95: std::sys::pal::unix::thread::Thread::new::thread_start::h14f1eb868ff90fc9
at /rustc/9fc6b43126469e3858e2fe86cafb4f0fd5068869/library/std/src/sys/pal/unix/thread.rs:105
96: start_thread
at ./nptl/pthread_create.c:442
97: __GI___clone3
at ./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
What I expect
gitoxide code should not crash, even if provided malformed input
The crashing gitoxide code
https://github.com/GitoxideLabs/gitoxide/blob/08e7777454bdce466363321fce7b168d425bb833/gix-pack/src/data/delta.rs#L18-L70
In particular, it's the panic!("encountered unsupported command code: 0") code that I'm seeing crash. However, there is also an expect call just above that which also could be triggered by ill-formed user-controllable input.
FYI, looking at the git blame for that code, it appears to be pretty old, much of it from 2021.
Reproducing this
The input repository to Nosey Parker (that's triggering the gitoxide crash when enumerating inputs) is elongbug/deqp, cloned with git clone --bare https://github.com/elongbug/deqp.git. The last changes there are from 10 years ago.
The packfile contents that I see:
% shasum deqp.git/objects/pack/*
77809f4a1783bc011dd9f49c0ee18fe45d0cb792 deqp.git/objects/pack/pack-d57c91f4836f96358552c59966566d6a8f522440.idx
8325905c1a6b262deaadf6c6a1a9ede804859d00 deqp.git/objects/pack/pack-d57c91f4836f96358552c59966566d6a8f522440.pack
I see the crash deterministically, with or without multithreading in Nosey Parker, but only on an x86_64 machine, and not on Apple Silicon-based machines (i.e., ARM).
Other notes
I see the gix_pack::data::delta::apply function used only in 3 places in gitoxide, and all 3 locations return Result values already. Could gix_pack::data::delta::apply be modified to return an error instead of panicking on malformed input? Would you like a PR for this? It might not be a semver-breaking change, as that function looks like it's only exposed pub(crate) from the gix_pack::data module.
P.S. git fsck reports no errors in the deqp.git repository.
Additionally, it appears that the gitoxide panic occurs when trying to use gix::Repository::find_object with object ID 5665283d8c34f78a615fb6bf76fdd14bdb19b882.
Thanks so much for reporting, it's great to have Nosey Parker downstream!
I see the crash deterministically, with or without multithreading in Nosey Parker, but only on an x86_64 machine, and not on Apple Silicon-based machines (i.e., ARM).
That is very interesting, as this makes it appear like miscompilation is involved. Since Delta-Apply just applies a previously inflated buffer, ZLIB may also be involved (CC @folkertdev).
Could you try to compile with an older version, one before 96164c5 (96164c5~1 == f3684a4f67f98c0e3884e2d348cb092f48ca443a) to see if this is still reproducible without zlib-rs?
Could gix_pack::data::delta::apply be modified to return an error instead of panicking on malformed input? Would you like a PR for this?
Yes, a PR for that is very welcome, we can't have gitoxide panic, and I hope passing through this new error won't cause too much trouble - there may have been a reason I opted out from doing that (laziness).
I'll open a PR for the error handling changes.
I will also try reproducing with the older version before 96164c5 to see if I still have trouble there. But that's going to be next week.
Could you try to compile with an older version, one before 96164c5 (96164c5~1 == f3684a4f67f98c0e3884e2d348cb092f48ca443a) to see if this is still reproducible without zlib-rs?
I tried this on my x86_64 machine, pinning the gix package to revision f3684a4f67f98c0e3884e2d348cb092f48ca443a with a line like this in a few Nosey Parker Cargo.toml files:
gix = { git = "https://github.com/GitoxideLabs/gitoxide", rev = "f3684a4f67f98c0e3884e2d348cb092f48ca443a", features = ["max-performance", "serde"] }
With this change, it looks like libz-ng-sys is used instead of zlib-rs. I do not see the crash with this setup.
HOWEVER, if I change the line instead to disable the max-performance feature, I do still see the crash.
In either case, debug vs release build makes no difference in whether the crash occurs.
@Byron I opened a PR to make gix_pack::data::delta::apply a fallible function: #2059.
For the first time in a long time I actually tried to reproduce this issue, and failed to do so on MacOS:
# gix is newly built from the latest `main`
❯ gix cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882
PKM 10@X%
~/dev/deqp.git ( master)
❯ ls -l objects/pack
.r--r--r--@ 783Ki byron staff 30 Jun 04:29 pack-d57c91f4836f96358552c59966566d6a8f522440.idx
.r--r--r--@ 20Mi byron staff 30 Jun 04:29 pack-d57c91f4836f96358552c59966566d6a8f522440.pack
Running gix cat will use find_object() under the hood to decode the object, and I would have expected that it returns an error due to #2059.
Probably this means it can't be reproduced on ARM64?
Can you still reproduce this with a crash at this object? Should we wait for a couple of version changes in Rust and LLVM to see if this fixes it naturally? If wait-and-see shouldn't be the approach, then I think the goal should be to obtain the compressed delta buffer that yields the invalid opcode after decompression so a unit-test can be devised to reproduce this.
@Byron Let me try your gix cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882 test case on my x86_64 machine to see if I can reproduce it now, and perhaps put together a unit test.
Like I said initially, I've only seen the crash on an x86_64 machine; I've been unable to reproduce on ARM machines. Let me poke around a little bit more and see if it causes trouble on x86_64 generally, or just that particular system...
x86_64 crash
On my x86_64 system, with a fresh debug build of gitoxide commit 8007f1d0bad357688acd1235d079bf164290cda6:
% cargo run -- -r /disk2/deqp.git -v --trace cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.30s
Running `target/debug/gix -r /disk2/deqp.git -v --trace cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882`
12:56:07 tracing INFO run [ 2.35ms | 52.35% / 100.00% ]
12:56:07 tracing INFO ┕━ ThreadSafeRepository::discover() [ 1.12ms | 12.50% / 47.65% ]
12:56:07 tracing INFO ┕━ open_from_paths() [ 826µs | 28.29% / 35.15% ]
12:56:07 tracing INFO ┕━ gix_odb::Store::at() [ 161µs | 6.86% ]
Error: An error occurred while obtaining an object from the packed object store
Caused by:
Encountered unsupported command code: 0
Test input
Just to get details together in one place, recall that the test input is blob 5665283d8c34f78a615fb6bf76fdd14bdb19b882 from elongbug/deqp, with a single packfile that looks like this:
% shasum deqp.git/objects/pack/*
77809f4a1783bc011dd9f49c0ee18fe45d0cb792 deqp.git/objects/pack/pack-d57c91f4836f96358552c59966566d6a8f522440.idx
8325905c1a6b262deaadf6c6a1a9ede804859d00 deqp.git/objects/pack/pack-d57c91f4836f96358552c59966566d6a8f522440.pack
Machine details
% cat /etc/lsb-release
DISTRIB_ID=Ubuntu
DISTRIB_RELEASE=22.04
DISTRIB_CODENAME=jammy
DISTRIB_DESCRIPTION="Ubuntu 22.04.5 LTS"
% cat /proc/cpuinfo
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 63
model name : Intel(R) Xeon(R) CPU @ 2.30GHz
stepping : 0
microcode : 0xffffffff
cpu MHz : 2299.998
cache size : 46080 KB
physical id : 0
siblings : 32
core id : 0
cpu cores : 16
apicid : 0
initial apicid : 0
fpu : yes
fpu_exception : yes
cpuid level : 13
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities
bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs mmio_stale_data retbleed bhi
bogomips : 4599.99
clflush size : 64
cache_alignment : 64
address sizes : 46 bits physical, 48 bits virtual
power management:
(Repeated 32 times for 32 vCPUs)
Cargo / Rust details
% cargo --version
cargo 1.84.0 (66221abde 2024-11-19)
% rustc --version --verbose
rustc 1.84.0 (9fc6b4312 2025-01-07)
binary: rustc
commit-hash: 9fc6b43126469e3858e2fe86cafb4f0fd5068869
commit-date: 2025-01-07
host: x86_64-unknown-linux-gnu
release: 1.84.0
LLVM version: 19.1.5
To test whether the llvm / cargo / rust versions make a difference, I tested on the same x86_64 machine using the rust:latest Docker image. Same crash.
# rustc --verbose --version
rustc 1.88.0 (6b00bc388 2025-06-23)
binary: rustc
commit-hash: 6b00bc3880198600130e1cf62b8f8a93494488cc
commit-date: 2025-06-23
host: x86_64-unknown-linux-gnu
release: 1.88.0
LLVM version: 20.1.5
I'll see if I can find another x86_64 machine with a different processor revision.
What I'd find really interesting is if we'd hack the pack delta apply machinery to store each slice of compressed bytes on disk, numbered. Then the highest number should be the slice that decompresses incorrectly.
From there, it could be put into a unit test which can run on x86_64 CI to reproduce there and be made available to researchers. It's notable that this also happened when miniz_oxide was used.
Just to sketch the path of data flow, we end with passing a decompressed slice of bytes to delta::apply():
https://github.com/GitoxideLabs/gitoxide/blob/fce70950006892f51b32af233656be6fe5de9df3/gix-pack/src/data/file/decode/entry.rs#L363-L377
I does some very clever buffer handling to avoid allocations, but ultimately here is the decompression function:
https://github.com/GitoxideLabs/gitoxide/blob/fce70950006892f51b32af233656be6fe5de9df3/gix-pack/src/data/file/decode/entry.rs#L119-L132
…which calls the once primitive that is used everywhere in the codebase.
https://github.com/GitoxideLabs/gitoxide/blob/beba7204a50a84b30e3eb81413d968920599e226/gix-features/src/zlib/mod.rs#L33-L43
It's notable how the status is ignored, so it would be possible for the decoding to not be complete leaving zeroes in the output buffer. Thus we assume that ZLIB will always decompress the whole input if the output is big enough.
I applied this patch to see what happens when accessing the reproduction object.
diff --git a/gix-pack/src/data/file/decode/entry.rs b/gix-pack/src/data/file/decode/entry.rs
index bc8d5a2ca..1ee236079 100644
--- a/gix-pack/src/data/file/decode/entry.rs
+++ b/gix-pack/src/data/file/decode/entry.rs
@@ -128,7 +128,10 @@ impl File {
inflate.reset();
inflate
.once(&self.data[offset..], out)
- .map(|(_status, consumed_in, _consumed_out)| consumed_in)
+ .map(|(_status, consumed_in, _consumed_out)| {
+ dbg!(_status);
+ consumed_in
+ })
}
/// Like `decompress_entry_from_data_offset`, but returns consumed input and output.
On an ARM64 machine, this looks like this:
❯ /Users/byron/dev/github.com/GitoxideLabs/gitoxide/target/debug/gix cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882
[gix-pack/src/data/file/decode/entry.rs:132:17] _status = StreamEnd
[gix-pack/src/data/file/decode/entry.rs:132:17] _status = StreamEnd
PKM 10@X%
I ~~am pretty sure~~ hope that if you would apply this patch and run it on an x86 machine, the last status would not be StreamEnd.
Could you try that for me?
If that was the case, then a fix could be devised, ideally with a dumped version of the compressed slice input so we can bake this into a test, one that then is testing once() which either is made to not do things just once anymore, or there is another function that is called all() so there can be a little more machinery to keep going as long as the output buffer isn't fully filled.
Exciting!
Discarded
A wrong path I plotted to see who calls apply().
https://github.com/GitoxideLabs/gitoxide/blob/fce70950006892f51b32af233656be6fe5de9df3/gix-pack/src/cache/delta/traverse/resolve.rs#L352-L370
These bytes are decompressed here, with the decompressed length coming from the pack slice header.
https://github.com/GitoxideLabs/gitoxide/blob/fce70950006892f51b32af233656be6fe5de9df3/gix-pack/src/cache/delta/traverse/resolve.rs#L303-L312
We re-use a Zlib context to decompress all at once…
https://github.com/GitoxideLabs/gitoxide/blob/fce70950006892f51b32af233656be6fe5de9df3/gix-pack/src/cache/delta/traverse/resolve.rs#L455-L468
I am pretty sure hope that if you would apply this patch and run it on an x86 machine, the last status would not be StreamEnd.
Could you try that for me?
Here's what output I get with your patch:
% cargo run -- -r /disk2/deqp.git cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882
Compiling gix-pack v0.59.1 (/home/blarsen/projects/gitoxide/gix-pack)
Compiling gix-odb v0.69.1 (/home/blarsen/projects/gitoxide/gix-odb)
Compiling gix v0.72.1 (/home/blarsen/projects/gitoxide/gix)
Compiling gitoxide-core v0.47.1 (/home/blarsen/projects/gitoxide/gitoxide-core)
Compiling gitoxide v0.44.0 (/home/blarsen/projects/gitoxide)
Finished `dev` profile [unoptimized + debuginfo] target(s) in 18.17s
Running `target/debug/gix -r /disk2/deqp.git cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882`
[gix-pack/src/data/file/decode/entry.rs:132:17] _status = StreamEnd
[gix-pack/src/data/file/decode/entry.rs:132:17] _status = StreamEnd
Error: An error occurred while obtaining an object from the packed object store
Caused by:
Encountered unsupported command code: 0
I'm sorry to disappoint!
Note that the Encountered unsupported command code: 0 is being produced from gix_pack::data::file::decode::entry::<impl gix_pack::data::File>::resolve_deltas. A stack trace from a copy modified with the addition of some printline debugging and .unwrap() calls:
thread 'main' panicked at gix-pack/src/data/file/decode/entry.rs:384:17:
called `Result::unwrap()` on an `Err` value: UnsupportedCommandCode
stack backtrace:
0: rust_begin_unwind
at /rustc/9fc6b43126469e3858e2fe86cafb4f0fd5068869/library/std/src/panicking.rs:665:5
1: core::panicking::panic_fmt
at /rustc/9fc6b43126469e3858e2fe86cafb4f0fd5068869/library/core/src/panicking.rs:76:14
2: core::result::unwrap_failed
at /rustc/9fc6b43126469e3858e2fe86cafb4f0fd5068869/library/core/src/result.rs:1699:5
3: core::result::Result<T,E>::unwrap
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/result.rs:1104:23
4: gix_pack::data::file::decode::entry::<impl gix_pack::data::File>::resolve_deltas
at ./gix-pack/src/data/file/decode/entry.rs:384:13
5: gix_pack::data::file::decode::entry::<impl gix_pack::data::File>::decode_entry
at ./gix-pack/src/data/file/decode/entry.rs:191:50
6: gix_odb::store_impls::dynamic::find::<impl gix_odb::store_impls::dynamic::Handle<S>>::try_find_cached_inner
at ./gix-odb/src/store_impls/dynamic/find.rs:151:35
7: gix_odb::store_impls::dynamic::find::<impl gix_pack::find_traits::Find for gix_odb::store_impls::dynamic::Handle<S>>::try_find_cached
at ./gix-odb/src/store_impls/dynamic/find.rs:356:9
8: gix_odb::cache::impls::<impl gix_pack::find_traits::Find for gix_odb::Cache<S>>::try_find_cached
at ./gix-odb/src/cache.rs:224:32
9: gix_odb::cache::impls::<impl gix_pack::find_traits::Find for gix_odb::Cache<S>>::try_find
at ./gix-odb/src/cache.rs:209:25
10: gix_odb::cache::impls::<impl gix_object::traits::find::Find for gix_odb::Cache<S>>::try_find
at ./gix-odb/src/cache.rs:163:13
11: <gix_odb::memory::Proxy<T> as gix_object::traits::find::Find>::try_find
at ./gix-odb/src/memory.rs:159:9
12: gix_object::traits::find::ext::FindExt::find
at ./gix-object/src/traits/find.rs:234:13
13: gix::repository::object::<impl gix::types::Repository>::find_object
at ./gix/src/repository/object.rs:53:20
14: gix::id::<impl gix::types::Id>::object
at ./gix/src/id.rs:17:9
15: gitoxide_core::repository::cat::display_object
at ./gitoxide-core/src/repository/cat.rs:49:29
16: gitoxide_core::repository::cat::function::cat
at ./gitoxide-core/src/repository/cat.rs:58:9
17: gitoxide::plumbing::main::main::{{closure}}
at ./src/plumbing/main.rs:1256:41
18: gitoxide::shared::pretty::prepare_and_run::{{closure}}
at ./src/shared.rs:170:36
19: gix_trace::<impl gix_trace::enabled::Span>::into_scope
at ./gix-trace/src/lib.rs:43:9
20: gitoxide::shared::pretty::prepare_and_run
at ./src/shared.rs:169:27
21: gitoxide::plumbing::main::main
at ./src/plumbing/main.rs:1249:41
22: gix::main
at ./src/gix.rs:5:5
23: core::ops::function::FnOnce::call_once
at /home/blarsen/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:250:5
The instructions slice inside of gix_pack::data::file::decode::entry::<impl gix_pack::data::File>::resolve_deltas is being computed differently on my ARM64 and x86_64 machine. Investigating further...
Hmmm, well, it does seem like something is coming out differently on ARM64 vs x86_64 from the call to inflate.once() in this: https://github.com/GitoxideLabs/gitoxide/blob/fce70950006892f51b32af233656be6fe5de9df3/gix-pack/src/data/file/decode/entry.rs#L119-L132.
I haven't debugged further into the miniz_oxide code, but it appears that 4 bytes are coming out differently on the two platforms when decompressing the blob in question (i.e., gix cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882).
Can you get the input/output pair out for the misbehaving target?
Can you get the input/output pair out for the misbehaving target?
I'll try to grab this next time I look at this, probably next week
Thanks for taking a look @folkertdev, and thanks to @bradlarsen for independently reproducing the issue.
Here is a patch that produces input and output buffers in a format suitable for a unit-test.
diff --git a/gix-pack/src/data/file/decode/entry.rs b/gix-pack/src/data/file/decode/entry.rs
index bc8d5a2ca..3d76fa614 100644
--- a/gix-pack/src/data/file/decode/entry.rs
+++ b/gix-pack/src/data/file/decode/entry.rs
@@ -128,7 +128,11 @@ impl File {
inflate.reset();
inflate
.once(&self.data[offset..], out)
- .map(|(_status, consumed_in, _consumed_out)| consumed_in)
+ .map(|(_status, consumed_in, _consumed_out)| {
+ dbg!(_status, consumed_in, _consumed_out);
+ dbg!(&self.data[offset..offset + consumed_in], out);
+ consumed_in
+ })
}
/// Like `decompress_entry_from_data_offset`, but returns consumed input and output.
And here is what it's outputting on ARM64, which is the desired 'good' version.
❯ cargo run --bin gix -- -r /Users/byron/dev/github.com/elongbug/deqp.git cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882
Blocking waiting for file lock on build directory
Compiling gix-pack v0.59.1 (/Users/byron/dev/github.com/GitoxideLabs/gitoxide/gix-pack)
Compiling gix-odb v0.69.1 (/Users/byron/dev/github.com/GitoxideLabs/gitoxide/gix-odb)
Compiling gix v0.72.1 (/Users/byron/dev/github.com/GitoxideLabs/gitoxide/gix)
Compiling gitoxide-core v0.47.1 (/Users/byron/dev/github.com/GitoxideLabs/gitoxide/gitoxide-core)
Compiling gitoxide v0.44.0 (/Users/byron/dev/github.com/GitoxideLabs/gitoxide)
Finished `dev` profile [unoptimized + debuginfo] target(s) in 9.34s
Running `target/debug/gix -r /Users/byron/dev/github.com/elongbug/deqp.git cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882`
[gix-pack/src/data/file/decode/entry.rs:132:17] _status = StreamEnd
[gix-pack/src/data/file/decode/entry.rs:132:17] consumed_in = 18
[gix-pack/src/data/file/decode/entry.rs:132:17] _consumed_out = 12
[gix-pack/src/data/file/decode/entry.rs:133:17] &self.data[offset..offset + consumed_in] = [
120,
218,
147,
144,
152,
32,
200,
30,
209,
176,
141,
1,
8,
0,
17,
155,
2,
103,
]
[gix-pack/src/data/file/decode/entry.rs:133:17] out = [
24,
24,
144,
17,
7,
88,
128,
182,
0,
0,
0,
0,
]
[gix-pack/src/data/file/decode/entry.rs:132:17] _status = StreamEnd
[gix-pack/src/data/file/decode/entry.rs:132:17] consumed_in = 29
[gix-pack/src/data/file/decode/entry.rs:132:17] _consumed_out = 24
[gix-pack/src/data/file/decode/entry.rs:133:17] &self.data[offset..offset + consumed_in] = [
120,
156,
11,
240,
246,
85,
48,
52,
96,
96,
96,
96,
1,
66,
70,
6,
70,
135,
128,
4,
53,
32,
143,
1,
0,
38,
169,
2,
138,
]
[gix-pack/src/data/file/decode/entry.rs:133:17] out = [
80,
75,
77,
32,
49,
48,
0,
0,
0,
4,
0,
4,
0,
1,
0,
1,
64,
80,
96,
38,
0,
0,
0,
0,
0,
4,
0,
1,
0,
1,
64,
80,
96,
38,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
24,
24,
144,
17,
7,
88,
128,
182,
0,
0,
0,
0,
]
PKM 10@X%
Finally, let me bring in @oyvindln as maintainer/author of miniz_oxide as this issue also seems to reproduce there as well, see this comment for reference. There we switch gitoxide back to a version where the removal of the max-performance feature will automatically bring in the miniz_oxide backend of flate2, which seems to have the same issue on x64-64 as zlib-rs.
At this point we know that the output of the decompress(https://github.com/GitoxideLabs/gitoxide/blob/beba7204a50a84b30e3eb81413d968920599e226/gix-features/src/zlib/mod.rs#L34-L43) call differs between platforms. We also know that the returned status is always StreamEnd, so it's not that we are trying to access zeroed portion of a pre-sized output buffer that hasn't been written to yet.
Wait, did you already post the "bad" input/output somewhere? That's the interesting one really. Or is there a project/branch I can run to replicate? I'm on x86_64 linux.
edit: nvm, just the input should be enough of course. I can locally cross-compile and run qemu to validate results on both targets.
You seem to deliberately ignore the _consumed_out, but I think that throws off the debug print here? I suspect that some of the data in the buffer is from earlier runs? why is it OK in both cases to just print the whole output buffer length, and not just the first _consumed_out bytes?
I'm increasingly sure that you're incorrect in ignoring _consumed_out there. The debug print shows that _consumed_out is just 24, but you print more bytes than that. From my testing, the first 24 bytes are the same for both aarch64 and x86_64.
The implementation is allowed to use the whole output buffer, and so because we use simd we might touch more output bytes than we consume.
Thanks for taking a look!
Wait, did you already post the "bad" input/output somewhere? That's the interesting one really. Or is there a project/branch I can run to replicate? I'm on x86_64 linux.
I do not have the bad output myself, for all we know @bradlarsen has it and saw that 4 bytes are different.
You seem to deliberately ignore the
_consumed_out, but I think that throws off the debug print here? I suspect that some of the data in the buffer is from earlier runs? why is it OK in both cases to just print the whole output buffer length, and not just the first_consumed_outbytes?
What's interesting about this is that out was right-sized according to the metadata stored in the pack entry, hence it's the size of the decompressed data. And from all I know, it's actually correct as the output is correct.
However, there is no denying that the amount of consumed output bytes isn't the same as out.len() (60 bytes vs 24 consumed out), and that it reports StreamEnd in all cases.
I wonder why it works at all, and also why the produced file seems to be PKM 10@X - is that correct even? I presume nosey parker would have detected a hash mismatch, but maybe it never ran successfully there?
For fun, I ran this again with @ as revspec to get the HEAD commit, and there the output of decompressed size and consumed_out matches.
So I think the result on MacOS is also wrong, but differently.
The right thing to do here immediately seems to be to compare both values and bail on mismatch. This would also mean this particular object simply wouldn't decompress and is 'broken' in the pack.
Wait, did you already post the "bad" input/output somewhere? That's the interesting one really. Or is there a project/branch I can run to replicate? I'm on x86_64 linux.
I do not have the bad output myself, for all we know @bradlarsen has it and saw that 4 bytes are different.
I'm not at that x86_64 machine at the moment and have a lot going on right now. I'll gather the input and output from that machine and share with you on Monday. Thanks!
I wonder why it works at all, and also why the produced file seems to be PKM 10@X - is that correct even? I presume nosey parker would have detected a hash mismatch, but maybe it never ran successfully there?
I'm not sure if that is correct! Validating against regular git cat would be a good idea. When I had tried git fsck, I got not errors or warnings from that repo.
Indeed, Nosey Parker was never able to run successfully on that blob on x86_64 — it failed to decompress without error.
Indeed, what I said there isn't correct. Despite the two values (consumed_out and decompressed_size) not matching at all even on a platform that doesn't fail, while otherwise they do seem to match, that particular object hashes back to the correct SHA1:
github.com/elongbug/deqp.git ( master)
❯ git cat-file -p 5665283d8c34f78a615fb6bf76fdd14bdb19b882 | git hash-object -t blob --stdin
5665283d8c34f78a615fb6bf76fdd14bdb19b882
I have an updated debug-patch for the next time this gets run on x86_64, adding out.len() to the list of values to print as a proxy for the decompressed buffer size that is decoded from the entry header:
diff --git a/gix-pack/src/data/file/decode/entry.rs b/gix-pack/src/data/file/decode/entry.rs
index bc8d5a2ca..82b299eda 100644
--- a/gix-pack/src/data/file/decode/entry.rs
+++ b/gix-pack/src/data/file/decode/entry.rs
@@ -128,7 +128,11 @@ impl File {
inflate.reset();
inflate
.once(&self.data[offset..], out)
- .map(|(_status, consumed_in, _consumed_out)| consumed_in)
+ .map(|(_status, consumed_in, _consumed_out)| {
+ dbg!(_status, consumed_in, _consumed_out, out.len());
+ dbg!(&self.data[offset..offset + consumed_in], out);
+ consumed_in
+ })
}
/// Like `decompress_entry_from_data_offset`, but returns consumed input and output.
When it works, out.len() == 60, but _consumed_out() == 24. It's puzzling, as what comes out of that operation is correct, yet these two values seem at odds. Given a starting byte for a DEFLATE stream, ZIP reaches the StreamEnd and according to its own count only writes 24 bytes of a 60 bytes output buffer. That buffer is then used as instructions to apply deltas to a base buffer which even seems to work correctly despite presumed garbage past the first 24 bytes.
Could it be that the count for consumed_out is off?
Finally, let me bring in @oyvindln as maintainer/author of
miniz_oxideas this issue also seems to reproduce there as well, see this comment for reference. There we switchgitoxideback to a version where the removal of themax-performancefeature will automatically bring in theminiz_oxidebackend offlate2, which seems to have the same issue on x64-64 aszlib-rs.
okay - let me know when you have a dump of the compressed data that goes into miniz_oxide/flate2 then
If different cpu architectures return different incomplete streams for the same compressed input that's not ideal.
Note, here's the content that git version 2.34.1 prints out for that blob:
% git show 5665283d8c34f78a615fb6bf76fdd14bdb19b882 | hexdump -C
00000000 50 4b 4d 20 31 30 00 00 00 04 00 04 00 01 00 01 |PKM 10..........|
00000010 40 58 80 b6 00 00 00 00 |@X......|
00000018
This is what gix shows for that blob as well, at least on ARM64.
okay - let me know when you have a dump of the compressed data that goes into miniz_oxide/flate2 then
Thanks for chiming in! It's still a bit strange to me what's happening here. I believe the compressed input is this, at least on ARM (as per this comment).
[gix-pack/src/data/file/decode/entry.rs:133:17] &self.data[offset..offset + consumed_in] = [
120,
156,
11,
240,
246,
85,
48,
52,
96,
96,
96,
96,
1,
66,
70,
6,
70,
135,
128,
4,
53,
32,
143,
1,
0,
38,
169,
2,
138,
]
It's possible that this is different on x86_64, and maybe @bradlarsen can reproduce it on that platform, possibly with a more recent patch from this comment.
If the inputs are the same, then the output would be the thing that has to differ as apply::delta() will always process the entire output buffer (which as we recall is larger according to Git than what inflate thinks it is after decompression, even on ARM64). All this is fishy, but one would also think that a SHA doesn't lie, especially if the received pack passed Git connectivity checks after cloning it.
@bradlarsen Do I understand correctly that NoseyParker found that object by traversing the commit-graph exclusively? I am asking because in theory, any kind of corrupted pack could be sent, and as long as it decodes fine (like this one seems to do) an index will be created with the hash of the decoded data. But what if it decodes wrongly in the first place? If no other object refers to 5665283d8c34f78a615fb6bf76fdd14bdb19b882 and it's just something left in the pack for other reasons, then it might well be completely corrupt.
Then the issue wouldn't be related to inflate, but more about if the bytes not written into the 60 bytes output buffer happen to be a valid delta stream.
@Byron running with your debug output patch on top of commit c7af04db9b6bb1204e0f4c436d1db8f48a491e86 on my x86_64 machine, I see the same input as you got on ARM64, but an output that differs by 4 bytes (16 bytes from the end are 0, 0, 0, 0 instead of 24, 24, 144, 17):
% cargo run -- -r /disk2/deqp.git cat 5665283d8c34f78a615fb6bf76fdd14bdb19b882
...
[gix-pack/src/data/file/decode/entry.rs:132:17] _status = StreamEnd
[gix-pack/src/data/file/decode/entry.rs:132:17] consumed_in = 29
[gix-pack/src/data/file/decode/entry.rs:132:17] _consumed_out = 24
[gix-pack/src/data/file/decode/entry.rs:132:17] out.len() = 60
[gix-pack/src/data/file/decode/entry.rs:133:17] &self.data[offset..offset + consumed_in] = [
120,
156,
11,
240,
246,
85,
48,
52,
96,
96,
96,
96,
1,
66,
70,
6,
70,
135,
128,
4,
53,
32,
143,
1,
0,
38,
169,
2,
138,
]
[gix-pack/src/data/file/decode/entry.rs:133:17] out = [
80,
75,
77,
32,
49,
48,
0,
0,
0,
4,
0,
4,
0,
1,
0,
1,
64,
80,
96,
38,
0,
0,
0,
0,
0,
4,
0,
1,
0,
1,
64,
80,
96,
38,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
7,
88,
128,
182,
0,
0,
0,
0,
]
Error: An error occurred while obtaining an object from the packed object store
Caused by:
Encountered unsupported command code: 0
I had tried running gix cat through miri to see if any undefined behavior jumps out, but there are far too many unsafe and ffi things that have to be stubbed out to make that work end-to-end. I'll try to isolate the test into something much smaller.
Thanks a lot! Here is the diff for completeness.
--- good 2025-07-09 04:27:14
+++ bad 2025-07-09 04:26:48
@@ -41,22 +41,22 @@
0,
0,
0,
0,
0,
0,
0,
0,
0,
- 24,
- 24,
- 144,
- 17,
+ 0,
+ 0,
+ 0,
+ 0,
7,
88,
128,
182,
0,
0,
0,
0,
]
\ No newline at end of file
I am getting the feeling that we are really looking at a bug here, one that allowed an invalid object or broken stream to make it into a pack.
It's data couldn't look stranger to me, I think it's fair to say that it's garbage.
% git show 5665283d8c34f78a615fb6bf76fdd14bdb19b882 | hexdump -C
00000000 50 4b 4d 20 31 30 00 00 00 04 00 04 00 01 00 01 |PKM 10..........|
00000010 40 58 80 b6 00 00 00 00 |@X......|
00000018
I'd like to repeat my question for @bradlarsen:
@bradlarsen Do I understand correctly that NoseyParker found that object by traversing the commit-graph exclusively? I am asking because in theory, any kind of corrupted pack could be sent, and as long as it decodes fine (like this one seems to do) an index will be created with the hash of the decoded data. But what if it decodes wrongly in the first place? If no other object refers to 5665283d8c34f78a615fb6bf76fdd14bdb19b882 and it's just something left in the pack for other reasons, then it might well be completely corrupt.
If the object is referenced then I'd think despite all, it must be legitimate. But if it is not referenced and instead extracted directly from the pack, I think we really are looking at a bug here.
If consumed_out can be trusted, and I'd want to assume that, then there is a clear mismatch between the stream length and the passed-in buffer, leaving plenty of bytes with the value they have had before. Another question is really why that value could be different at all given that everything in these buffers is deterministic at all times as resize() is used, and it's either input buffers or output buffers (out is part of a double-buffer that is swapped by reference, so the output of the previous cycle can be used as base next time without copying or moving the data, and usually without extra allocation).
@bradlarsen Do I understand correctly that NoseyParker found that object by traversing the commit-graph exclusively? I am asking because in theory, any kind of corrupted pack could be sent, and as long as it decodes fine (like this one seems to do) an index will be created with the hash of the decoded data. But what if it decodes wrongly in the first place? If no other object refers to 5665283d8c34f78a615fb6bf76fdd14bdb19b882 and it's just something left in the pack for other reasons, then it might well be completely corrupt.
My apologies; I missed this with everything else in this thread. Nosey Parker does not traverse the commit graph. Instead, it inspects each blob from the object database, and then uses a novel algorithm to efficiently determine for each blob which commit introduced it.
I'm not sure at the moment if the blob in question is referenced from any commit.