rust-lexical
rust-lexical copied to clipboard
[BUG] Unsoundness in `Bytes::read()`
Description
Bytes::read() only requires that V be of the right length, and imposes no validity requirements. This is trivially usable to create unsoundness.
Test case
use lexical_util::iterator::Bytes;
fn main() {
let b: Bytes<10> = Bytes::new(&[21]);
println!("{:?}", b.read::<bool>()); // will read the value 21 as a boolean
}
Prerequisites
- Rust version:
rustc 1.68.0 (2c8cc3432 2023-03-06) - lexical version:
lexical_util0.8.5 - lexical compilation features used:
parsing
One potential patch for this is the following, since the callers do uphold this invariant. Safety docs would also need to be updated.
A better way to do this is to either use some POD trait (I don't recall if there's one in use in lexical already), or just expose a read_u32() or read_u64()
Patch
diff --git a/lexical-parse-float/src/slow.rs b/lexical-parse-float/src/slow.rs
index 26a0d90..8891bba 100644
--- a/lexical-parse-float/src/slow.rs
+++ b/lexical-parse-float/src/slow.rs
@@ -335,7 +335,7 @@ macro_rules! round_up_nonzero {
// First try reading 8-digits at a time.
if iter.is_contiguous() {
- while let Some(value) = iter.read::<u64>() {
+ while let Some(value) = unsafe { iter.read::<u64>() } {
// SAFETY: safe since we have at least 8 bytes in the buffer.
unsafe { iter.step_by_unchecked(8) };
if value != 0x3030_3030_3030_3030 {
diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs
index 3caedc6..bcc66a0 100644
--- a/lexical-parse-integer/src/algorithm.rs
+++ b/lexical-parse-integer/src/algorithm.rs
@@ -217,7 +217,7 @@ where
debug_assert!(Iter::IS_CONTIGUOUS);
// Read our digits, validate the input, and check from there.
- let bytes = u32::from_le(iter.read::<u32>()?);
+ let bytes = unsafe { u32::from_le(iter.read::<u32>()?) };
if is_4digits::<FORMAT>(bytes) {
// SAFETY: safe since we have at least 4 bytes in the buffer.
unsafe { iter.step_by_unchecked(4) };
@@ -289,7 +289,7 @@ where
debug_assert!(Iter::IS_CONTIGUOUS);
// Read our digits, validate the input, and check from there.
- let bytes = u64::from_le(iter.read::<u64>()?);
+ let bytes = unsafe { u64::from_le(iter.read::<u64>()?) };
if is_8digits::<FORMAT>(bytes) {
// SAFETY: safe since we have at least 8 bytes in the buffer.
unsafe { iter.step_by_unchecked(8) };
diff --git a/lexical-util/src/iterator.rs b/lexical-util/src/iterator.rs
index bb5f86d..18c760c 100644
--- a/lexical-util/src/iterator.rs
+++ b/lexical-util/src/iterator.rs
@@ -122,7 +122,7 @@ pub trait BytesIter<'a>: Iterator<Item = &'a u8> {
/// Try to read a value of a different type from the iterator.
/// This advances the internal state of the iterator.
- fn read<V>(&self) -> Option<V>;
+ unsafe fn read<V>(&self) -> Option<V>;
/// Advance the internal slice by `N` elements.
///
diff --git a/lexical-util/src/noskip.rs b/lexical-util/src/noskip.rs
index 22c511b..9df4cfb 100644
--- a/lexical-util/src/noskip.rs
+++ b/lexical-util/src/noskip.rs
@@ -120,7 +120,7 @@ impl<'a, const __: u128> Bytes<'a, __> {
/// Try to read a value of a different type from the iterator.
/// This advances the internal state of the iterator.
#[inline]
- pub fn read<V>(&self) -> Option<V> {
+ unsafe fn read<V>(&self) -> Option<V> {
if Self::IS_CONTIGUOUS && self.as_slice().len() >= mem::size_of::<V>() {
// SAFETY: safe since we've guaranteed the buffer is greater than
// the number of elements read.
@@ -288,7 +288,7 @@ impl<'a: 'b, 'b, const __: u128> BytesIter<'a> for BytesIterator<'a, 'b, __> {
}
#[inline]
- fn read<V>(&self) -> Option<V> {
+ unsafe fn read<V>(&self) -> Option<V> {
self.byte.read()
}
diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs
index 4daaaa9..55e098f 100644
--- a/lexical-util/src/skip.rs
+++ b/lexical-util/src/skip.rs
@@ -442,7 +442,7 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> {
/// Try to read a value of a different type from the iterator.
/// This advances the internal state of the iterator.
#[inline]
- pub fn read<V>(&self) -> Option<V> {
+ unsafe fn read<V>(&self) -> Option<V> {
if Self::IS_CONTIGUOUS && self.as_slice().len() >= mem::size_of::<V>() {
// SAFETY: safe since we've guaranteed the buffer is greater than
// the number of elements read.