borsh-rs icon indicating copy to clipboard operation
borsh-rs copied to clipboard

Support deserializing as references to buffer

Open austinabell opened this issue 2 years ago • 0 comments

Was asked the question of why this deserialization didn't work and I always assumed it was because of an incompatible interface or to minimize code size, but after playing with it:

diff --git a/borsh-derive-internal/src/struct_de.rs b/borsh-derive-internal/src/struct_de.rs
index d26192d2..b5abb77a 100644
--- a/borsh-derive-internal/src/struct_de.rs
+++ b/borsh-derive-internal/src/struct_de.rs
@@ -34,7 +34,7 @@ pub fn struct_de(input: &ItemStruct, cratename: Ident) -> syn::Result<TokenStrea
                     );
 
                     quote! {
-                        #field_name: #cratename::BorshDeserialize::deserialize(buf)?,
+                        #field_name: #cratename::BorshDeserializeRef::deserialize_ref(buf)?,
                     }
                 };
                 body.extend(delta);
@@ -47,7 +47,7 @@ pub fn struct_de(input: &ItemStruct, cratename: Ident) -> syn::Result<TokenStrea
             let mut body = TokenStream2::new();
             for _ in 0..fields.unnamed.len() {
                 let delta = quote! {
-                    #cratename::BorshDeserialize::deserialize(buf)?,
+                    #cratename::BorshDeserializeRef::deserialize_ref(buf)?,
                 };
                 body.extend(delta);
             }
diff --git a/borsh/src/de/mod.rs b/borsh/src/de/mod.rs
index eedf6c87..00d3440a 100644
--- a/borsh/src/de/mod.rs
+++ b/borsh/src/de/mod.rs
@@ -59,6 +59,56 @@ pub trait BorshDeserialize: Sized {
     }
 }
 
+pub trait BorshDeserializeRef<'de>: Sized {
+    /// Deserializes this instance from a given slice of bytes.
+    /// Updates the buffer to point at the remaining bytes.
+    fn deserialize_ref(buf: &mut &'de [u8]) -> Result<Self>;
+
+    /// Deserialize this instance from a slice of bytes.
+    fn try_from_slice_ref(v: &'de [u8]) -> Result<Self> {
+        let mut v_mut = v;
+        let result = Self::deserialize_ref(&mut v_mut)?;
+        if !v_mut.is_empty() {
+            return Err(Error::new(ErrorKind::InvalidData, ERROR_NOT_ALL_BYTES_READ));
+        }
+        Ok(result)
+    }
+}
+
+impl<'de, T> BorshDeserializeRef<'de> for T
+where
+    T: BorshDeserialize,
+{
+    fn deserialize_ref<'m>(buf: &'m mut &'de [u8]) -> Result<Self> {
+        <T as BorshDeserialize>::deserialize(buf)
+    }
+}
+
+impl<'de> BorshDeserializeRef<'de> for &'de [u8] {
+    fn deserialize_ref<'m>(buf: &'m mut &'de [u8]) -> Result<Self> {
+        let len = u32::deserialize_ref(buf)?;
+        let len = len.try_into().map_err(|_| ErrorKind::InvalidInput)?;
+        if buf.len() < len {
+            return Err(Error::new(
+                ErrorKind::InvalidInput,
+                ERROR_UNEXPECTED_LENGTH_OF_INPUT,
+            ));
+        }
+        let (front, rest) = buf.split_at(len);
+        *buf = rest;
+        Ok(front)
+    }
+}
+
+impl<'de> BorshDeserializeRef<'de> for &'de str {
+    fn deserialize_ref<'m>(buf: &'m mut &'de [u8]) -> Result<Self> {
+        core::str::from_utf8(<&'de [u8]>::deserialize_ref(buf)?).map_err(|err| {
+            let msg = err.to_string();
+            Error::new(ErrorKind::InvalidData, msg)
+        })
+    }
+}
+
 impl BorshDeserialize for u8 {
     #[inline]
     fn deserialize(buf: &mut &[u8]) -> Result<Self> {
@@ -550,9 +600,10 @@ const _: () = {
 };
 
 #[cfg(feature = "const-generics")]
-impl<T, const N: usize> BorshDeserialize for [T; N]
+impl<'_de, T, const N: usize> BorshDeserialize for [T; N]
 where
-    T: BorshDeserialize + Default + Copy,
+// TODO yeah don't look at this yet
+    T: BorshDeserializeRef<'_de> + BorshDeserialize + Default + Copy,
 {
     #[inline]
     fn deserialize(buf: &mut &[u8]) -> Result<Self> {
@@ -579,10 +630,19 @@ macro_rules! impl_tuple {
       {
         #[inline]
         fn deserialize(buf: &mut &[u8]) -> Result<Self> {
-
             Ok(($($name::deserialize(buf)?,)+))
         }
       }
+
+      // TODO too lazy to resolve this with this setup
+    //   impl<'_de, $($name),+> $crate::BorshDeserializeRef<'_de> for ($($name),+)
+    //   where $($name: $crate::BorshDeserializeRef<'_de>,)+
+    //   {
+    //     #[inline]
+    //     fn deserialize_ref(buf: &mut &'_de [u8]) -> Result<Self> {
+    //         Ok(($($name::deserialize_ref(buf)?,)+))
+    //     }
+    //   }
     };
 }
 
diff --git a/borsh/src/lib.rs b/borsh/src/lib.rs
index 6a13270d..b1fe3eb8 100644
--- a/borsh/src/lib.rs
+++ b/borsh/src/lib.rs
@@ -10,7 +10,7 @@ pub mod schema;
 pub mod schema_helpers;
 pub mod ser;
 
-pub use de::BorshDeserialize;
+pub use de::{BorshDeserialize, BorshDeserializeRef};
 pub use schema::BorshSchema;
 pub use schema_helpers::{try_from_slice_with_schema, try_to_vec_with_schema};
 pub use ser::helpers::{to_vec, to_writer};

I don't see why this would be the case. These changes were me trying to have this change happen with a non-breaking change, but seems infeasible to handle all cases like this. Probably would need to follow the serde pattern of having BorshDeserialize and BorshDeserializeOwned: for<'de> Deserialize<'de>.

Although this would be a breaking change, would allow certain parts of code to be optimized to avoid unnecessary allocations/copies. Was this ever benchmarked that adding the lifetime increased code size or reduced performance? Seems like something that should be supported before 1.0

austinabell avatar Mar 18 '22 19:03 austinabell