From 06fba3efbaaba0caaf7009175da5597fce677624 Mon Sep 17 00:00:00 2001 From: hanako mumei <81144685+2501babe@users.noreply.github.com> Date: Thu, 1 Feb 2024 03:06:59 -0800 Subject: [PATCH 1/5] solana-program: make VoteState::deserialize_into() much faster we also relax some constraints which bring it in line with bincode::deserialize --- sdk/program/src/vote/state/mod.rs | 61 +++++++++++++------ .../src/vote/state/vote_state_0_23_5.rs | 61 +++++++++++++++++++ .../src/vote/state/vote_state_deserialize.rs | 39 ++++++------ 3 files changed, 125 insertions(+), 36 deletions(-) diff --git a/sdk/program/src/vote/state/mod.rs b/sdk/program/src/vote/state/mod.rs index c4a98e8a9819e2..e1379f1f5ba0b0 100644 --- a/sdk/program/src/vote/state/mod.rs +++ b/sdk/program/src/vote/state/mod.rs @@ -18,7 +18,7 @@ use { sysvar::clock::Clock, vote::{authorized_voters::AuthorizedVoters, error::VoteError}, }, - bincode::{serialize_into, serialized_size, ErrorKind}, + bincode::{serialize_into, ErrorKind}, serde_derive::{Deserialize, Serialize}, std::{collections::VecDeque, fmt::Debug, io::Cursor}, }; @@ -475,8 +475,11 @@ impl VoteState { 3762 // see test_vote_state_size_of. } - // we retain bincode deserialize for not(target_os = "solana") - // because the hand-written parser does not support V0_23_5 + // NOTE we retain `bincode::deserialize` for `not(target_os = "solana")` pending testing on mainnet-beta + // once that testing is done, `VoteState::deserialize_into` may be used for all targets + // conversion of V0_23_5 to current must be handled specially, however + // because it inserts a null voter into `authorized_voters` + // which `VoteStateVersions::is_uninitialized` erroneously reports as initialized pub fn deserialize(input: &[u8]) -> Result { #[cfg(not(target_os = "solana"))] { @@ -492,26 +495,32 @@ impl VoteState { } } - /// Deserializes the input buffer into the provided `VoteState` + /// Deserializes the input `VoteStateVersions` buffer directly into a provided `VoteState` struct /// - /// This function exists to deserialize `VoteState` in a BPF context without going above - /// the compute limit, and must be kept up to date with `bincode::deserialize`. + /// In a BPF context, V0_23_5 is not supported, but in non-BPF, all versions are supported for + /// compatibility with `bincode::deserialize` pub fn deserialize_into( input: &[u8], vote_state: &mut VoteState, ) -> Result<(), InstructionError> { - let minimum_size = - serialized_size(vote_state).map_err(|_| InstructionError::InvalidAccountData)?; - if (input.len() as u64) < minimum_size { - return Err(InstructionError::InvalidAccountData); - } - let mut cursor = Cursor::new(input); let variant = read_u32(&mut cursor)?; match variant { - // V0_23_5. not supported; these should not exist on mainnet - 0 => Err(InstructionError::InvalidAccountData), + // V0_23_5. not supported for bpf targets; these should not exist on mainnet + // supported for non-bpf targets for backwards compatibility + 0 => { + #[cfg(not(target_os = "solana"))] + { + *vote_state = bincode::deserialize::(input) + .map(|versioned| versioned.convert_to_current()) + .map_err(|_| InstructionError::InvalidAccountData)?; + + Ok(()) + } + #[cfg(target_os = "solana")] + Err(InstructionError::InvalidAccountData) + } // V1_14_11. substantially different layout and data from V0_23_5 1 => deserialize_vote_state_into(&mut cursor, vote_state, false), // Current. the only difference from V1_14_11 is the addition of a slot-latency to each vote @@ -519,6 +528,8 @@ impl VoteState { _ => Err(InstructionError::InvalidAccountData), }?; + // if cursor overruns the input, it produces null bytes and continues to advance `position` + // this check ensures we do not accept such a malformed input erroneously if cursor.position() > input.len() as u64 { return Err(InstructionError::InvalidAccountData); } @@ -1089,7 +1100,7 @@ pub mod serde_tower_sync { #[cfg(test)] mod tests { - use {super::*, itertools::Itertools, rand::Rng}; + use {super::*, bincode::serialized_size, itertools::Itertools, rand::Rng}; #[test] fn test_vote_serialize() { @@ -1147,16 +1158,28 @@ mod tests { assert_eq!(e, InstructionError::InvalidAccountData); // variant - let serialized_len_x4 = serialized_size(&test_vote_state).unwrap() * 4; + let serialized_len_x4 = serialized_size(&VoteState::default()).unwrap() * 4; let mut rng = rand::thread_rng(); for _ in 0..1000 { let raw_data_length = rng.gen_range(1..serialized_len_x4); - let raw_data: Vec = (0..raw_data_length).map(|_| rng.gen::()).collect(); + let mut raw_data: Vec = (0..raw_data_length).map(|_| rng.gen::()).collect(); + + // pure random data will ~never have a valid enum tag, so lets help it out + if raw_data_length >= 4 && rng.gen::() { + let tag = rng.gen::() % 3; + raw_data[0] = tag; + raw_data[1] = 0; + raw_data[2] = 0; + raw_data[3] = 0; + } // it is extremely improbable, though theoretically possible, for random bytes to be syntactically valid - // so we only check that the deserialize function does not panic + // so we only check that the parser does not panic and that it succeeds or fails exactly in line with bincode let mut test_vote_state = VoteState::default(); - let _ = VoteState::deserialize_into(&raw_data, &mut test_vote_state); + let test_res = VoteState::deserialize_into(&raw_data, &mut test_vote_state); + let bincode_res = bincode::deserialize::(&raw_data); + + assert_eq!(test_res.is_ok(), bincode_res.is_ok()); } } diff --git a/sdk/program/src/vote/state/vote_state_0_23_5.rs b/sdk/program/src/vote/state/vote_state_0_23_5.rs index ae3b9207fe494e..2537633690c7ea 100644 --- a/sdk/program/src/vote/state/vote_state_0_23_5.rs +++ b/sdk/program/src/vote/state/vote_state_0_23_5.rs @@ -1,9 +1,12 @@ #![allow(clippy::arithmetic_side_effects)] use super::*; +#[cfg(test)] +use arbitrary::{Arbitrary, Unstructured}; const MAX_ITEMS: usize = 32; #[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq, Clone)] +#[cfg_attr(test, derive(Arbitrary))] pub struct VoteState0_23_5 { /// the node that votes in this account pub node_pubkey: Pubkey, @@ -59,3 +62,61 @@ impl CircBuf { self.buf[self.idx] = item; } } + +#[cfg(test)] +impl<'a, I: Default + Copy> Arbitrary<'a> for CircBuf +where + I: Arbitrary<'a>, +{ + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + let mut circbuf = Self::default(); + + let len = u.arbitrary_len::()?; + for _ in 0..len { + circbuf.append(I::arbitrary(u)?); + } + + Ok(circbuf) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_vote_deserialize_0_23_5() { + // base case + let target_vote_state = VoteState0_23_5::default(); + let target_vote_state_versions = VoteStateVersions::V0_23_5(Box::new(target_vote_state)); + let vote_state_buf = bincode::serialize(&target_vote_state_versions).unwrap(); + + let mut test_vote_state = VoteState::default(); + VoteState::deserialize_into(&vote_state_buf, &mut test_vote_state).unwrap(); + + assert_eq!( + target_vote_state_versions.convert_to_current(), + test_vote_state + ); + + // variant + // provide 4x the minimum struct size in bytes to ensure we typically touch every field + let struct_bytes_x4 = std::mem::size_of::() * 4; + for _ in 0..100 { + let raw_data: Vec = (0..struct_bytes_x4).map(|_| rand::random::()).collect(); + let mut unstructured = Unstructured::new(&raw_data); + + let arbitrary_vote_state = VoteState0_23_5::arbitrary(&mut unstructured).unwrap(); + let target_vote_state_versions = + VoteStateVersions::V0_23_5(Box::new(arbitrary_vote_state)); + + let vote_state_buf = bincode::serialize(&target_vote_state_versions).unwrap(); + let target_vote_state = target_vote_state_versions.convert_to_current(); + + let mut test_vote_state = VoteState::default(); + VoteState::deserialize_into(&vote_state_buf, &mut test_vote_state).unwrap(); + + assert_eq!(target_vote_state, test_vote_state); + } + } +} diff --git a/sdk/program/src/vote/state/vote_state_deserialize.rs b/sdk/program/src/vote/state/vote_state_deserialize.rs index b457395ccbd38a..f4628332c2f2c3 100644 --- a/sdk/program/src/vote/state/vote_state_deserialize.rs +++ b/sdk/program/src/vote/state/vote_state_deserialize.rs @@ -1,14 +1,16 @@ use { crate::{ instruction::InstructionError, - pubkey::Pubkey, serialize_utils::cursor::*, vote::state::{BlockTimestamp, LandedVote, Lockout, VoteState, MAX_ITEMS}, }, - bincode::serialized_size, std::io::Cursor, }; +// hardcode this number to avoid calculating onchain; this is a fixed-size ringbuffer +// `serialized_size()` must be used over `mem::size_of()` because of alignment +const PRIOR_VOTERS_SERIALIZED_SIZE: u64 = 1545; + pub(super) fn deserialize_vote_state_into( cursor: &mut Cursor<&[u8]>, vote_state: &mut VoteState, @@ -70,10 +72,8 @@ fn read_prior_voters_into>( // record our position at the start of the struct let prior_voters_position = cursor.position(); - // `serialized_size()` must be used over `mem::size_of()` because of alignment - let is_empty_position = serialized_size(&vote_state.prior_voters) - .ok() - .and_then(|v| v.checked_add(prior_voters_position)) + let is_empty_position = PRIOR_VOTERS_SERIALIZED_SIZE + .checked_add(prior_voters_position) .and_then(|v| v.checked_sub(1)) .ok_or(InstructionError::InvalidAccountData)?; @@ -86,21 +86,12 @@ fn read_prior_voters_into>( if !is_empty { cursor.set_position(prior_voters_position); - let mut encountered_null_voter = false; for i in 0..MAX_ITEMS { let prior_voter = read_pubkey(cursor)?; let from_epoch = read_u64(cursor)?; let until_epoch = read_u64(cursor)?; - let item = (prior_voter, from_epoch, until_epoch); - - if item == (Pubkey::default(), 0, 0) { - encountered_null_voter = true; - } else if encountered_null_voter { - // `prior_voters` should never be sparse - return Err(InstructionError::InvalidAccountData); - } else { - vote_state.prior_voters.buf[i] = item; - } + + vote_state.prior_voters.buf[i] = (prior_voter, from_epoch, until_epoch); } vote_state.prior_voters.idx = read_u64(cursor)? as usize; @@ -140,3 +131,17 @@ fn read_last_timestamp_into>( Ok(()) } + +#[cfg(test)] +mod tests { + use {super::*, bincode::serialized_size}; + + #[test] + fn test_prior_voters_serialized_size() { + let vote_state = VoteState::default(); + assert_eq!( + serialized_size(&vote_state.prior_voters).unwrap(), + PRIOR_VOTERS_SERIALIZED_SIZE + ); + } +} From 57125e7380662c45f29cf4bd55fe15c536129e8f Mon Sep 17 00:00:00 2001 From: hanako mumei <81144685+2501babe@users.noreply.github.com> Date: Tue, 16 Jul 2024 03:17:01 -0700 Subject: [PATCH 2/5] add 1.14 test --- .../src/vote/state/vote_state_1_14_11.rs | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/sdk/program/src/vote/state/vote_state_1_14_11.rs b/sdk/program/src/vote/state/vote_state_1_14_11.rs index 9a2365674171c2..645e73dc353d3e 100644 --- a/sdk/program/src/vote/state/vote_state_1_14_11.rs +++ b/sdk/program/src/vote/state/vote_state_1_14_11.rs @@ -82,3 +82,44 @@ impl From for VoteState1_14_11 { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_vote_deserialize_1_14_11() { + // base case + let target_vote_state = VoteState1_14_11::default(); + let target_vote_state_versions = VoteStateVersions::V1_14_11(Box::new(target_vote_state)); + let vote_state_buf = bincode::serialize(&target_vote_state_versions).unwrap(); + + let mut test_vote_state = VoteState::default(); + VoteState::deserialize_into(&vote_state_buf, &mut test_vote_state).unwrap(); + + assert_eq!( + target_vote_state_versions.convert_to_current(), + test_vote_state + ); + + // variant + // provide 4x the minimum struct size in bytes to ensure we typically touch every field + let struct_bytes_x4 = std::mem::size_of::() * 4; + for _ in 0..1000 { + let raw_data: Vec = (0..struct_bytes_x4).map(|_| rand::random::()).collect(); + let mut unstructured = Unstructured::new(&raw_data); + + let arbitrary_vote_state = VoteState1_14_11::arbitrary(&mut unstructured).unwrap(); + let target_vote_state_versions = + VoteStateVersions::V1_14_11(Box::new(arbitrary_vote_state)); + + let vote_state_buf = bincode::serialize(&target_vote_state_versions).unwrap(); + let target_vote_state = target_vote_state_versions.convert_to_current(); + + let mut test_vote_state = VoteState::default(); + VoteState::deserialize_into(&vote_state_buf, &mut test_vote_state).unwrap(); + + assert_eq!(target_vote_state, test_vote_state); + } + } +} From c4bbab0aad86443245925d76adb3b0150711483a Mon Sep 17 00:00:00 2001 From: hanako mumei <81144685+2501babe@users.noreply.github.com> Date: Thu, 18 Jul 2024 03:32:39 -0700 Subject: [PATCH 3/5] address feedback --- sdk/program/src/vote/state/mod.rs | 33 ++++-------- .../src/vote/state/vote_state_deserialize.rs | 52 +++---------------- 2 files changed, 17 insertions(+), 68 deletions(-) diff --git a/sdk/program/src/vote/state/mod.rs b/sdk/program/src/vote/state/mod.rs index e1379f1f5ba0b0..5444e242d51dbe 100644 --- a/sdk/program/src/vote/state/mod.rs +++ b/sdk/program/src/vote/state/mod.rs @@ -323,6 +323,7 @@ const MAX_ITEMS: usize = 32; #[cfg_attr(feature = "frozen-abi", derive(AbiExample))] #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] +#[cfg_attr(test, derive(Arbitrary))] pub struct CircBuf { buf: [I; MAX_ITEMS], /// next pointer @@ -368,23 +369,6 @@ impl CircBuf { } } -#[cfg(test)] -impl<'a, I: Default + Copy> Arbitrary<'a> for CircBuf -where - I: Arbitrary<'a>, -{ - fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - let mut circbuf = Self::default(); - - let len = u.arbitrary_len::()?; - for _ in 0..len { - circbuf.append(I::arbitrary(u)?); - } - - Ok(circbuf) - } -} - #[cfg_attr( feature = "frozen-abi", frozen_abi(digest = "EeenjJaSrm9hRM39gK6raRNtzG61hnk7GciUCJJRDUSQ"), @@ -528,12 +512,6 @@ impl VoteState { _ => Err(InstructionError::InvalidAccountData), }?; - // if cursor overruns the input, it produces null bytes and continues to advance `position` - // this check ensures we do not accept such a malformed input erroneously - if cursor.position() > input.len() as u64 { - return Err(InstructionError::InvalidAccountData); - } - Ok(()) } @@ -1179,7 +1157,14 @@ mod tests { let test_res = VoteState::deserialize_into(&raw_data, &mut test_vote_state); let bincode_res = bincode::deserialize::(&raw_data); - assert_eq!(test_res.is_ok(), bincode_res.is_ok()); + if test_res.is_err() { + assert!(bincode_res.is_err()); + } else { + assert_eq!( + VoteStateVersions::new_current(test_vote_state), + bincode_res.unwrap() + ); + } } } diff --git a/sdk/program/src/vote/state/vote_state_deserialize.rs b/sdk/program/src/vote/state/vote_state_deserialize.rs index f4628332c2f2c3..69fdf0636d9b57 100644 --- a/sdk/program/src/vote/state/vote_state_deserialize.rs +++ b/sdk/program/src/vote/state/vote_state_deserialize.rs @@ -7,10 +7,6 @@ use { std::io::Cursor, }; -// hardcode this number to avoid calculating onchain; this is a fixed-size ringbuffer -// `serialized_size()` must be used over `mem::size_of()` because of alignment -const PRIOR_VOTERS_SERIALIZED_SIZE: u64 = 1545; - pub(super) fn deserialize_vote_state_into( cursor: &mut Cursor<&[u8]>, vote_state: &mut VoteState, @@ -69,35 +65,17 @@ fn read_prior_voters_into>( cursor: &mut Cursor, vote_state: &mut VoteState, ) -> Result<(), InstructionError> { - // record our position at the start of the struct - let prior_voters_position = cursor.position(); - - let is_empty_position = PRIOR_VOTERS_SERIALIZED_SIZE - .checked_add(prior_voters_position) - .and_then(|v| v.checked_sub(1)) - .ok_or(InstructionError::InvalidAccountData)?; - - // move to the end, to check if we need to parse the data - cursor.set_position(is_empty_position); - - // if empty, we already read past the end of this struct and need to do no further work - // otherwise we go back to the start and proceed to decode the data - let is_empty = read_bool(cursor)?; - if !is_empty { - cursor.set_position(prior_voters_position); - - for i in 0..MAX_ITEMS { - let prior_voter = read_pubkey(cursor)?; - let from_epoch = read_u64(cursor)?; - let until_epoch = read_u64(cursor)?; + for i in 0..MAX_ITEMS { + let prior_voter = read_pubkey(cursor)?; + let from_epoch = read_u64(cursor)?; + let until_epoch = read_u64(cursor)?; - vote_state.prior_voters.buf[i] = (prior_voter, from_epoch, until_epoch); - } - - vote_state.prior_voters.idx = read_u64(cursor)? as usize; - vote_state.prior_voters.is_empty = read_bool(cursor)?; + vote_state.prior_voters.buf[i] = (prior_voter, from_epoch, until_epoch); } + vote_state.prior_voters.idx = read_u64(cursor)? as usize; + vote_state.prior_voters.is_empty = read_bool(cursor)?; + Ok(()) } @@ -131,17 +109,3 @@ fn read_last_timestamp_into>( Ok(()) } - -#[cfg(test)] -mod tests { - use {super::*, bincode::serialized_size}; - - #[test] - fn test_prior_voters_serialized_size() { - let vote_state = VoteState::default(); - assert_eq!( - serialized_size(&vote_state.prior_voters).unwrap(), - PRIOR_VOTERS_SERIALIZED_SIZE - ); - } -} From cd73fc44061138cc63fe4a54bd1028fe29c4a624 Mon Sep 17 00:00:00 2001 From: hanako mumei <81144685+2501babe@users.noreply.github.com> Date: Thu, 18 Jul 2024 08:05:06 -0700 Subject: [PATCH 4/5] test missized vote states --- sdk/program/src/vote/state/mod.rs | 45 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/sdk/program/src/vote/state/mod.rs b/sdk/program/src/vote/state/mod.rs index 5444e242d51dbe..a40a7ba3476a26 100644 --- a/sdk/program/src/vote/state/mod.rs +++ b/sdk/program/src/vote/state/mod.rs @@ -1155,19 +1155,54 @@ mod tests { // so we only check that the parser does not panic and that it succeeds or fails exactly in line with bincode let mut test_vote_state = VoteState::default(); let test_res = VoteState::deserialize_into(&raw_data, &mut test_vote_state); - let bincode_res = bincode::deserialize::(&raw_data); + let bincode_res = bincode::deserialize::(&raw_data) + .map(|versioned| versioned.convert_to_current()); if test_res.is_err() { assert!(bincode_res.is_err()); } else { - assert_eq!( - VoteStateVersions::new_current(test_vote_state), - bincode_res.unwrap() - ); + assert_eq!(test_vote_state, bincode_res.unwrap()); } } } + #[test] + fn test_vote_deserialize_into_ill_sized() { + // provide 4x the minimum struct size in bytes to ensure we typically touch every field + let struct_bytes_x4 = std::mem::size_of::() * 4; + for _ in 0..1000 { + let raw_data: Vec = (0..struct_bytes_x4).map(|_| rand::random::()).collect(); + let mut unstructured = Unstructured::new(&raw_data); + + let original_vote_state_versions = + VoteStateVersions::arbitrary(&mut unstructured).unwrap(); + let original_buf = bincode::serialize(&original_vote_state_versions).unwrap(); + + let mut truncated_buf = original_buf.clone(); + let mut expanded_buf = original_buf.clone(); + + truncated_buf.resize(original_buf.len() - 8, 0); + expanded_buf.resize(original_buf.len() + 8, 0); + + // truncated fails + let mut test_vote_state = VoteState::default(); + let test_res = VoteState::deserialize_into(&truncated_buf, &mut test_vote_state); + let bincode_res = bincode::deserialize::(&truncated_buf) + .map(|versioned| versioned.convert_to_current()); + + assert!(test_res.is_err()); + assert!(bincode_res.is_err()); + + // expanded succeeds + let mut test_vote_state = VoteState::default(); + VoteState::deserialize_into(&expanded_buf, &mut test_vote_state).unwrap(); + let bincode_res = bincode::deserialize::(&expanded_buf) + .map(|versioned| versioned.convert_to_current()); + + assert_eq!(test_vote_state, bincode_res.unwrap()); + } + } + #[test] fn test_vote_state_commission_split() { let vote_state = VoteState::default(); From 28076691f762f33bc6de295332e245bac21a2c4b Mon Sep 17 00:00:00 2001 From: hanako mumei <81144685+2501babe@users.noreply.github.com> Date: Thu, 18 Jul 2024 08:07:23 -0700 Subject: [PATCH 5/5] forgot other circbuf arbitrary --- .../src/vote/state/vote_state_0_23_5.rs | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/sdk/program/src/vote/state/vote_state_0_23_5.rs b/sdk/program/src/vote/state/vote_state_0_23_5.rs index 2537633690c7ea..eff88adca6dd75 100644 --- a/sdk/program/src/vote/state/vote_state_0_23_5.rs +++ b/sdk/program/src/vote/state/vote_state_0_23_5.rs @@ -38,6 +38,7 @@ pub struct VoteState0_23_5 { } #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] +#[cfg_attr(test, derive(Arbitrary))] pub struct CircBuf { pub buf: [I; MAX_ITEMS], /// next pointer @@ -63,23 +64,6 @@ impl CircBuf { } } -#[cfg(test)] -impl<'a, I: Default + Copy> Arbitrary<'a> for CircBuf -where - I: Arbitrary<'a>, -{ - fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - let mut circbuf = Self::default(); - - let len = u.arbitrary_len::()?; - for _ in 0..len { - circbuf.append(I::arbitrary(u)?); - } - - Ok(circbuf) - } -} - #[cfg(test)] mod tests { use super::*;