rust-lexical icon indicating copy to clipboard operation
rust-lexical copied to clipboard

[BUG] Unsoundness in `Bytes::read()`

Open Manishearth opened this issue 2 years ago • 1 comments

Description

Bytes::read() only requires that V be of the right length, and imposes no validity requirements. This is trivially usable to create unsoundness.

Test case

use lexical_util::iterator::Bytes;

fn main() {
    let b: Bytes<10> = Bytes::new(&[21]);

    println!("{:?}", b.read::<bool>()); // will read the value 21 as a boolean
}

Prerequisites

  • Rust version: rustc 1.68.0 (2c8cc3432 2023-03-06)
  • lexical version: lexical_util 0.8.5
  • lexical compilation features used: parsing

Manishearth avatar Jul 25 '23 19:07 Manishearth

One potential patch for this is the following, since the callers do uphold this invariant. Safety docs would also need to be updated.

A better way to do this is to either use some POD trait (I don't recall if there's one in use in lexical already), or just expose a read_u32() or read_u64()

Patch
diff --git a/lexical-parse-float/src/slow.rs b/lexical-parse-float/src/slow.rs
index 26a0d90..8891bba 100644
--- a/lexical-parse-float/src/slow.rs
+++ b/lexical-parse-float/src/slow.rs
@@ -335,7 +335,7 @@ macro_rules! round_up_nonzero {
 
         // First try reading 8-digits at a time.
         if iter.is_contiguous() {
-            while let Some(value) = iter.read::<u64>() {
+            while let Some(value) = unsafe { iter.read::<u64>() } {
                 // SAFETY: safe since we have at least 8 bytes in the buffer.
                 unsafe { iter.step_by_unchecked(8) };
                 if value != 0x3030_3030_3030_3030 {
diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs
index 3caedc6..bcc66a0 100644
--- a/lexical-parse-integer/src/algorithm.rs
+++ b/lexical-parse-integer/src/algorithm.rs
@@ -217,7 +217,7 @@ where
     debug_assert!(Iter::IS_CONTIGUOUS);
 
     // Read our digits, validate the input, and check from there.
-    let bytes = u32::from_le(iter.read::<u32>()?);
+    let bytes = unsafe { u32::from_le(iter.read::<u32>()?) };
     if is_4digits::<FORMAT>(bytes) {
         // SAFETY: safe since we have at least 4 bytes in the buffer.
         unsafe { iter.step_by_unchecked(4) };
@@ -289,7 +289,7 @@ where
     debug_assert!(Iter::IS_CONTIGUOUS);
 
     // Read our digits, validate the input, and check from there.
-    let bytes = u64::from_le(iter.read::<u64>()?);
+    let bytes = unsafe { u64::from_le(iter.read::<u64>()?) };
     if is_8digits::<FORMAT>(bytes) {
         // SAFETY: safe since we have at least 8 bytes in the buffer.
         unsafe { iter.step_by_unchecked(8) };
diff --git a/lexical-util/src/iterator.rs b/lexical-util/src/iterator.rs
index bb5f86d..18c760c 100644
--- a/lexical-util/src/iterator.rs
+++ b/lexical-util/src/iterator.rs
@@ -122,7 +122,7 @@ pub trait BytesIter<'a>: Iterator<Item = &'a u8> {
 
     /// Try to read a value of a different type from the iterator.
     /// This advances the internal state of the iterator.
-    fn read<V>(&self) -> Option<V>;
+    unsafe fn read<V>(&self) -> Option<V>;
 
     /// Advance the internal slice by `N` elements.
     ///
diff --git a/lexical-util/src/noskip.rs b/lexical-util/src/noskip.rs
index 22c511b..9df4cfb 100644
--- a/lexical-util/src/noskip.rs
+++ b/lexical-util/src/noskip.rs
@@ -120,7 +120,7 @@ impl<'a, const __: u128> Bytes<'a, __> {
     /// Try to read a value of a different type from the iterator.
     /// This advances the internal state of the iterator.
     #[inline]
-    pub fn read<V>(&self) -> Option<V> {
+    unsafe fn read<V>(&self) -> Option<V> {
         if Self::IS_CONTIGUOUS && self.as_slice().len() >= mem::size_of::<V>() {
             // SAFETY: safe since we've guaranteed the buffer is greater than
             // the number of elements read.
@@ -288,7 +288,7 @@ impl<'a: 'b, 'b, const __: u128> BytesIter<'a> for BytesIterator<'a, 'b, __> {
     }
 
     #[inline]
-    fn read<V>(&self) -> Option<V> {
+    unsafe fn read<V>(&self) -> Option<V> {
         self.byte.read()
     }
 
diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs
index 4daaaa9..55e098f 100644
--- a/lexical-util/src/skip.rs
+++ b/lexical-util/src/skip.rs
@@ -442,7 +442,7 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> {
     /// Try to read a value of a different type from the iterator.
     /// This advances the internal state of the iterator.
     #[inline]
-    pub fn read<V>(&self) -> Option<V> {
+    unsafe fn read<V>(&self) -> Option<V> {
         if Self::IS_CONTIGUOUS && self.as_slice().len() >= mem::size_of::<V>() {
             // SAFETY: safe since we've guaranteed the buffer is greater than
             // the number of elements read.

Manishearth avatar Jul 25 '23 19:07 Manishearth