diff --git a/Cargo.lock b/Cargo.lock index 3d0e28d..9a0b5e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,16 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "as-slice" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37dfb65bc03b2bc85ee827004f14a6817e04160e3b1a28931986a666a9290e70" +dependencies = [ + "generic-array 0.12.3", + "generic-array 0.13.2", + "stable_deref_trait", +] + [[package]] name = "atty" version = "0.2.14" @@ -245,6 +256,24 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" +[[package]] +name = "generic-array" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c68f0274ae0e023facc3c97b2e00f076be70e254bc851d972503b328db79b2ec" +dependencies = [ + "typenum", +] + +[[package]] +name = "generic-array" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ed1e761351b56f54eb9dcd0cfaca9fd0daecf93918e1cfc01c8a3d26ee7adcd" +dependencies = [ + "typenum", +] + [[package]] name = "getrandom" version = "0.1.14" @@ -256,6 +285,27 @@ dependencies = [ "wasi", ] +[[package]] +name = "hash32" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4041af86e63ac4298ce40e5cca669066e75b6f1aa3390fe2561ffa5e1d9f4cc" +dependencies = [ + "byteorder", +] + +[[package]] +name = "heapless" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ffa511365b12346c5fbe759d82f80d3aa70d9f1ba01955594f84a1a6bbab985" +dependencies = [ + "as-slice", + "generic-array 0.13.2", + "hash32", + "stable_deref_trait", +] + [[package]] name = "hermit-abi" version = "0.1.8" @@ -722,6 +772,12 @@ dependencies = [ "serde", ] +[[package]] +name = "stable_deref_trait" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" + [[package]] name = "syn" version = "1.0.16" @@ -772,6 +828,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "typenum" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33" + [[package]] name = "unicode-width" version = "0.1.7" @@ -795,6 +857,8 @@ name = "veriform" version = "0.0.1" dependencies = [ "displaydoc", + "heapless", + "log", "tai64", "uuid", "vint64", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index e99f626..b15cad0 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -13,6 +13,8 @@ keywords = ["hashing", "merkle", "protobufs", "security", "serialization"] [dependencies] displaydoc = { version = "0.1", default-features = false } +heapless = "0.5" +log = { version = "0.4", optional = true } tai64 = { version = "3", optional = true, default-features = false } uuid = { version = "0.8", optional = true, default-features = false } vint64 = { version = "1", path = "vint64" } diff --git a/rust/src/builtins.rs b/rust/src/builtins.rs index ffca192..a0bbf6f 100644 --- a/rust/src/builtins.rs +++ b/rust/src/builtins.rs @@ -3,13 +3,13 @@ //! These are the equivalent of Protobufs' "well-known types" #[cfg(feature = "tai64")] -mod tai64; +mod timestamp; #[cfg(feature = "uuid")] mod uuid; #[cfg(feature = "tai64")] -pub use self::tai64::TAI64N; +pub use self::timestamp::Timestamp; #[cfg(feature = "uuid")] pub use self::uuid::Uuid; diff --git a/rust/src/builtins/tai64.rs b/rust/src/builtins/timestamp.rs similarity index 62% rename from rust/src/builtins/tai64.rs rename to rust/src/builtins/timestamp.rs index 3eb0cb8..3816469 100644 --- a/rust/src/builtins/tai64.rs +++ b/rust/src/builtins/timestamp.rs @@ -3,33 +3,21 @@ //! In Veriform these are encoded as: //! //! ```text -//! message TAI64N { +//! message Timestamp { //! secs[0]: !bytes(size = 8), //! nanos[1]: bytes(size = 4) //! } //! ``` -pub use tai64::TAI64N; +pub use tai64::TAI64N as Timestamp; -use crate::{ - decoder::{Decodable, Decoder}, - encoder::Encoder, - error::Error, - field::{self, WireType}, - message::Message, -}; +use crate::{decoder::Decode, field, Decoder, Encoder, Error, Message}; use core::convert::TryInto; -impl Message for TAI64N { - fn decode(bytes: impl AsRef<[u8]>) -> Result { - let mut bytes = bytes.as_ref(); - let mut decoder = Decoder::new(); - - decoder.decode_expected_header(&mut bytes, 0, WireType::UInt64)?; - let secs = decoder.decode_uint64(&mut bytes)?; - - decoder.decode_expected_header(&mut bytes, 1, WireType::UInt64)?; - let nanos = decoder.decode_uint64(&mut bytes)?; +impl Message for Timestamp { + fn decode(decoder: &mut Decoder, mut input: &[u8]) -> Result { + let secs: u64 = decoder.decode(0, &mut input)?; + let nanos: u64 = decoder.decode(1, &mut input)?; if nanos > core::u32::MAX as u64 { return Err(Error::Length); @@ -55,8 +43,8 @@ impl Message for TAI64N { } } -/// Convert a TAI64N timestamp to two integers -fn tai64_to_ints(tai64n: &TAI64N) -> (u64, u32) { +/// Convert a Timestamp timestamp to two integers +fn tai64_to_ints(tai64n: &Timestamp) -> (u64, u32) { let encoded = tai64n.to_bytes(); let secs = u64::from_le_bytes(encoded[..8].try_into().unwrap()); let nanos = u32::from_le_bytes(encoded[8..].try_into().unwrap()); diff --git a/rust/src/builtins/uuid.rs b/rust/src/builtins/uuid.rs index 2aafebc..0289771 100644 --- a/rust/src/builtins/uuid.rs +++ b/rust/src/builtins/uuid.rs @@ -11,23 +11,16 @@ pub use uuid::Uuid; use crate::{ - decoder::{Decodable, Decoder}, - encoder::Encoder, - error::Error, - field::{self, WireType}, - message::Message, + decoder::{DecodeRef, Decoder}, + field, Encoder, Error, Message, }; use core::convert::TryInto; impl Message for Uuid { - fn decode(bytes: impl AsRef<[u8]>) -> Result { - let mut bytes = bytes.as_ref(); - let mut decoder = Decoder::new(); + fn decode(decoder: &mut Decoder, mut input: &[u8]) -> Result { + let bytes: &[u8] = decoder.decode_ref(0, &mut input)?; - decoder.decode_expected_header(&mut bytes, 0, WireType::String)?; - let uuid_bytes = decoder.decode_bytes(&mut bytes)?; - - uuid_bytes + bytes .try_into() .map(Uuid::from_bytes) .map_err(|_| Error::Builtin) diff --git a/rust/src/decoder.rs b/rust/src/decoder.rs index 7ff2c97..2a2b160 100644 --- a/rust/src/decoder.rs +++ b/rust/src/decoder.rs @@ -7,4 +7,258 @@ mod decodable; mod event; mod vint64; -pub use self::{decodable::Decodable, event::Event, message::Decoder}; +pub use self::{decodable::Decodable, event::Event}; + +use crate::{ + field::{Tag, WireType}, + Error, Message, +}; +use heapless::consts::U16; + +#[cfg(feature = "log")] +macro_rules! trace { + ($decoder:expr, $c:expr, $msg:expr, $($arg:tt)*) => { + let mut prefix: heapless::String = heapless::String::new(); + for _ in 0..$decoder.depth() { + prefix.push($c).unwrap(); + } + log::trace!(concat!("{}", $msg), &prefix, $($arg)*); + } +} + +#[cfg(feature = "log")] +macro_rules! begin { + ($decoder:expr, $msg:expr, $($arg:tt)*) => { + trace!($decoder, '+', $msg, $($arg)*); + } +} + +/// Veriform decoder +pub struct Decoder { + /// Stack of message decoders (max nesting depth 16) + stack: heapless::Vec, +} + +impl Default for Decoder { + fn default() -> Self { + let mut stack = heapless::Vec::new(); + stack.push(message::Decoder::new()).unwrap(); + Decoder { stack } + } +} + +impl Decoder { + /// Initialize decoder + pub fn new() -> Self { + Self::default() + } + + /// Push a new message decoder down onto the stack + // TODO(tarcieri): higher-level API (more like `::decode_message`) + pub fn push(&mut self) -> Result<(), Error> { + self.stack + .push(message::Decoder::new()) + .map_err(|_| Error::NestingDepth) + } + + /// Pop the message decoder from the stack when we've finished a message. + /// + /// Panics if the decoder's stack underflows. + // TODO(tarcieri): panic-free higher-level API, possibly RAII-based? + pub fn pop(&mut self) { + self.stack.pop().unwrap(); + } + + /// Peek at the message decoder on the top of the stack + // TODO(tarcieri): remove this implementation detail from public API + pub fn peek(&mut self) -> &mut message::Decoder { + self.stack.last_mut().unwrap() + } + + /// Get the depth of the pushdown stack + // TODO(tarcieri): remove this implementation detail from public API + pub fn depth(&self) -> usize { + self.stack.len() + } +} + +/// Try to decode a field to a value of the given type. +/// +/// This trait is intended to be impl'd by the [`Decoder`] type. +pub trait Decode { + /// Try to decode a value of type `T` + fn decode(&mut self, tag: Tag, input: &mut &[u8]) -> Result; +} + +/// Try to decode a field to a reference of the given type. +/// +/// This trait is intended to be impl'd by the [`Decoder`] type. +pub trait DecodeRef { + /// Try to decode a reference to type `T` + fn decode_ref<'a>(&mut self, tag: Tag, input: &mut &'a [u8]) -> Result<&'a T, Error>; +} + +/// Decode a sequence of values to a [`sequence::Iter`]. +/// +/// This trait is intended to be impl'd by the [`Decoder`] type. +pub trait DecodeSeq { + /// Try to decode a sequence of values of type `T` + fn decode_seq<'a>( + &mut self, + tag: Tag, + input: &mut &'a [u8], + ) -> Result, Error>; +} + +impl Decode for Decoder { + fn decode(&mut self, tag: Tag, input: &mut &[u8]) -> Result { + #[cfg(feature = "log")] + begin!(self, "[{}]: msg?", tag); + + self.peek().expect_header(input, tag, WireType::Message)?; + let msg_bytes = self.peek().decode_message(input)?; + + self.push()?; + let msg = T::decode(self, msg_bytes)?; + //begin!(self, "[{}]", tag); + + self.pop(); + Ok(msg) + } +} + +impl Decode for Decoder { + fn decode(&mut self, tag: Tag, input: &mut &[u8]) -> Result { + #[cfg(feature = "log")] + begin!(self, "[{}]: uint64?", tag); + + self.peek().expect_header(input, tag, WireType::UInt64)?; + self.peek().decode_uint64(input) + } +} + +impl Decode for Decoder { + fn decode(&mut self, tag: Tag, input: &mut &[u8]) -> Result { + #[cfg(feature = "log")] + begin!(self, "[{}]: sint64?", tag); + + self.peek().expect_header(input, tag, WireType::SInt64)?; + self.peek().decode_sint64(input) + } +} + +impl DecodeRef<[u8]> for Decoder { + fn decode_ref<'a>(&mut self, tag: Tag, input: &mut &'a [u8]) -> Result<&'a [u8], Error> { + #[cfg(feature = "log")] + begin!(self, "[{}]: bytes?", tag); + + self.peek().expect_header(input, tag, WireType::Bytes)?; + self.peek().decode_bytes(input) + } +} + +impl DecodeRef for Decoder { + fn decode_ref<'a>(&mut self, tag: Tag, input: &mut &'a [u8]) -> Result<&'a str, Error> { + #[cfg(feature = "log")] + begin!(self, "[{}]: string?", tag); + + self.peek().expect_header(input, tag, WireType::String)?; + self.peek().decode_string(input) + } +} + +impl DecodeSeq for Decoder { + fn decode_seq<'a>( + &mut self, + tag: Tag, + input: &mut &'a [u8], + ) -> Result, Error> { + #[cfg(feature = "log")] + begin!(self, "[{}]: seq?", tag); + + self.peek().expect_header(input, tag, WireType::Sequence)?; + let seq_bytes = self.peek().decode_sequence(WireType::Message, input)?; + let decoder = sequence::Decoder::new(WireType::Message, seq_bytes.len()); + + Ok(sequence::Iter::new(decoder, seq_bytes)) + } +} + +impl DecodeSeq for Decoder { + fn decode_seq<'a>( + &mut self, + tag: Tag, + input: &mut &'a [u8], + ) -> Result, Error> { + #[cfg(feature = "log")] + begin!(self, "[{}]: seq?", tag); + + self.peek().expect_header(input, tag, WireType::Sequence)?; + let seq_bytes = self.peek().decode_sequence(WireType::UInt64, input)?; + let decoder = sequence::Decoder::new(WireType::UInt64, seq_bytes.len()); + + Ok(sequence::Iter::new(decoder, seq_bytes)) + } +} + +impl DecodeSeq for Decoder { + fn decode_seq<'a>( + &mut self, + tag: Tag, + input: &mut &'a [u8], + ) -> Result, Error> { + #[cfg(feature = "log")] + begin!(self, "[{}]: seq?", tag); + + self.peek().expect_header(input, tag, WireType::Sequence)?; + let seq_bytes = self.peek().decode_sequence(WireType::SInt64, input)?; + let decoder = sequence::Decoder::new(WireType::SInt64, seq_bytes.len()); + + Ok(sequence::Iter::new(decoder, seq_bytes)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn decode_uint64() { + let input = [138, 10, 85]; + let mut input_ref = &input[..]; + + let value: u64 = Decoder::new().decode(42, &mut input_ref).unwrap(); + assert_eq!(value, 42); + assert!(input_ref.is_empty()); + } + + #[test] + fn decode_sint64() { + let input = [206, 10, 167]; + let mut input_ref = &input[..]; + + let value: i64 = Decoder::new().decode(43, &mut input_ref).unwrap(); + assert_eq!(value, -42); + assert!(input_ref.is_empty()); + } + + #[test] + fn decode_bytes() { + let input = [73, 11, 98, 121, 116, 101, 115]; + let mut input_ref = &input[..]; + + let bytes: &[u8] = Decoder::new().decode_ref(2, &mut input_ref).unwrap(); + assert_eq!(bytes, &[98, 121, 116, 101, 115]); + assert!(input_ref.is_empty()); + } + + #[test] + fn decode_string() { + let input = [139, 7, 98, 97, 122]; + let mut input_ref = &input[..]; + + let string: &str = Decoder::new().decode_ref(4, &mut input_ref).unwrap(); + assert_eq!(string, "baz"); + assert!(input_ref.is_empty()); + } +} diff --git a/rust/src/decoder/message.rs b/rust/src/decoder/message.rs index b04aac1..3f206c4 100644 --- a/rust/src/decoder/message.rs +++ b/rust/src/decoder/message.rs @@ -113,7 +113,7 @@ impl Decoder { /// Decode an expected field header, skipping (in-order) unknown fields, /// and returning an error if the field is missing or unexpected - pub fn decode_expected_header( + pub fn expect_header( &mut self, input: &mut &[u8], tag: Tag, @@ -136,8 +136,9 @@ impl Decoder { } if header.wire_type != wire_type { - return Err(Error::WireType { - wanted: Some(wire_type), + return Err(Error::UnexpectedWireType { + actual: header.wire_type, + wanted: wire_type, }); } diff --git a/rust/src/decoder/sequence.rs b/rust/src/decoder/sequence.rs index 61ea72c..e0a1bf0 100644 --- a/rust/src/decoder/sequence.rs +++ b/rust/src/decoder/sequence.rs @@ -1,5 +1,9 @@ //! Sequence decoder +mod iter; + +pub use self::iter::Iter; + use super::{ vint64::{self, zigzag}, Decodable, Event, @@ -41,8 +45,9 @@ impl Decodable for Decoder { input: &mut &'a [u8], ) -> Result<&'a [u8], Error> { if expected_type != self.wire_type { - return Err(Error::WireType { - wanted: Some(expected_type), + return Err(Error::UnexpectedWireType { + actual: self.wire_type, + wanted: expected_type, }); } diff --git a/rust/src/decoder/sequence/iter.rs b/rust/src/decoder/sequence/iter.rs new file mode 100644 index 0000000..494008a --- /dev/null +++ b/rust/src/decoder/sequence/iter.rs @@ -0,0 +1,77 @@ +//! Sequence iterator + +use super::Decoder; +use crate::{decoder::Decodable, Error, Message}; +use core::marker::PhantomData; + +/// Sequence iterator: iterates over a sequence of values in a Veriform +/// message, decoding each one. +pub struct Iter<'a, T> { + /// Sequence decoder + decoder: Decoder, + + /// Input data + data: &'a [u8], + + /// Type to decode + decodable: PhantomData, +} + +impl<'a, T> Iter<'a, T> { + /// Create a new sequence iterator from a sequence decoder + pub(crate) fn new(decoder: Decoder, data: &'a [u8]) -> Self { + Self { + decoder, + data, + decodable: PhantomData, + } + } +} + +impl<'a, T: Message> Iterator for Iter<'a, T> { + type Item = Result; + + fn next(&mut self) -> Option> { + if self.decoder.remaining() == 0 { + return None; + } + + let mut input = &self.data[self.decoder.position()..]; + + // TODO(tarcieri): reuse decoder + let mut decoder = crate::Decoder::new(); + + let result = self + .decoder + .decode_message(&mut input) + .and_then(|msg_bytes| T::decode(&mut decoder, msg_bytes)); + + Some(result) + } +} + +impl<'a> Iterator for Iter<'a, u64> { + type Item = Result; + + fn next(&mut self) -> Option> { + if self.decoder.remaining() == 0 { + return None; + } + + let mut input = &self.data[self.decoder.position()..]; + Some(self.decoder.decode_uint64(&mut input)) + } +} + +impl<'a> Iterator for Iter<'a, i64> { + type Item = Result; + + fn next(&mut self) -> Option> { + if self.decoder.remaining() == 0 { + return None; + } + + let mut input = &self.data[self.decoder.position()..]; + Some(self.decoder.decode_sint64(&mut input)) + } +} diff --git a/rust/src/encoder.rs b/rust/src/encoder.rs index 8a12ffc..827843b 100644 --- a/rust/src/encoder.rs +++ b/rust/src/encoder.rs @@ -145,7 +145,7 @@ impl<'a> Encoder<'a> { mod tests { use super::Encoder; use crate::{ - decoder::{Decodable, Decoder}, + decoder::{message::Decoder, Decodable}, field::WireType, }; diff --git a/rust/src/error.rs b/rust/src/error.rs index 5c41e21..903effd 100644 --- a/rust/src/error.rs +++ b/rust/src/error.rs @@ -33,9 +33,15 @@ pub enum Error { wire_type: Option, }, + /// invalid wire type + InvalidWireType, + /// bad length Length, + /// maximum message nesting depth exceeded + NestingDepth, + /// field {tag:?} is out-of-order Order { /// tag of the out-of-order field @@ -54,6 +60,15 @@ pub enum Error { wire_type: WireType, }, + /// unexpected wire type: actual={actual:?} wanted={wanted:?} + UnexpectedWireType { + /// actual wire type found in message + actual: WireType, + + /// wire type we were looking for + wanted: WireType, + }, + /// malformed UTF-8 encountered at byte: {valid_up_to:?} Utf8 { /// byte at which UTF-8 encoding failed @@ -62,12 +77,6 @@ pub enum Error { /// `vint64` encoding error VInt64, - - /// invalid wire type: {wanted:?} - WireType { - /// wire type we were looking for - wanted: Option, - }, } #[cfg(feature = "std")] diff --git a/rust/src/field/wire_type.rs b/rust/src/field/wire_type.rs index b3ef435..0c96547 100644 --- a/rust/src/field/wire_type.rs +++ b/rust/src/field/wire_type.rs @@ -61,7 +61,7 @@ impl TryFrom for WireType { 5 => Ok(WireType::String), 6 => Ok(WireType::Message), 7 => Ok(WireType::Sequence), - _ => Err(Error::WireType { wanted: None }), + _ => Err(Error::InvalidWireType), } } } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index a7b9752..2fca624 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -24,10 +24,12 @@ pub mod error; pub mod field; pub mod message; +// Re-export the `vint64` crate +pub use vint64; + pub use crate::{ decoder::{Decodable, Decoder}, encoder::Encoder, error::Error, message::Message, }; -pub use vint64; diff --git a/rust/src/message.rs b/rust/src/message.rs index de540e2..171fca4 100644 --- a/rust/src/message.rs +++ b/rust/src/message.rs @@ -5,7 +5,7 @@ // // Copyright (c) 2017 Dan Burkert and released under the Apache 2.0 license. -use crate::Error; +use crate::{Decoder, Error}; #[cfg(feature = "alloc")] use alloc::vec::Vec; @@ -26,10 +26,11 @@ pub enum Element { Value, } -/// Veriform message +/// Veriform messages pub trait Message { - /// Decode a Veriform message contained in the provided slice. - fn decode(bytes: impl AsRef<[u8]>) -> Result + /// Decode a Veriform message contained in the provided slice using the + /// given [`Decoder`]. + fn decode(decoder: &mut Decoder, input: &[u8]) -> Result where Self: Sized; @@ -40,7 +41,8 @@ pub trait Message { /// Get the length of a message after being encoded as Veriform. fn encoded_len(&self) -> usize; - /// Encode this message as Veriform, returning a byte sequence on success. + /// Encode this message as Veriform, allocating returning a byte vector + /// on success. #[cfg(feature = "alloc")] fn encode_vec(&self) -> Result, Error> { let mut encoded = vec![0; self.encoded_len()];