From 796888765ad9cc7acd5b5b893a92c662b9d97340 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Wed, 21 Jun 2023 00:47:52 +0200 Subject: [PATCH] Optimise ChainId epoch format parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit from_string checks if argument is in epoch format and then calls chain_version which does it again. chain_version and with_version use regex to check if string is in epoch format and then not only parse the string again but unnecessarily collect dash-separated parts into a new vector. Optimise it by providing split_chain_id method which checks the format and returns individual name and version parts of the id if it is in epoch format. Furthermore, don’t use regex which is a heavy machinery for simply checking if string ends in a positive integer. --- crates/ibc/src/core/ics24_host/identifier.rs | 66 ++++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/crates/ibc/src/core/ics24_host/identifier.rs b/crates/ibc/src/core/ics24_host/identifier.rs index 5023faade1..66e6fd76b3 100644 --- a/crates/ibc/src/core/ics24_host/identifier.rs +++ b/crates/ibc/src/core/ics24_host/identifier.rs @@ -65,16 +65,9 @@ impl ChainId { } pub fn from_string(id: &str) -> Self { - let version = if Self::is_epoch_format(id) { - Self::chain_version(id) - } else { - 0 - }; - - Self { - id: id.to_string(), - version, - } + let version = Self::chain_version(id); + let id = String::from(id); + Self { id, version } } /// Get a reference to the underlying string. @@ -98,15 +91,9 @@ impl ChainId { /// assert_eq!(ChainId::chain_version("testnet-helloworld-2"), 2); /// ``` pub fn chain_version(chain_id: &str) -> u64 { - if !ChainId::is_epoch_format(chain_id) { - return 0; - } - - let split: Vec<_> = chain_id.split('-').collect(); - split - .last() - .expect("get revision number from chain_id") - .parse() + split_chain_id(chain_id) + .ok() + .and_then(|(_, version)| u64::from_str(version).ok()) .unwrap_or(0) } @@ -120,8 +107,7 @@ impl ChainId { /// assert_eq!(ChainId::is_epoch_format("c-1"), true); /// ``` pub fn is_epoch_format(chain_id: &str) -> bool { - let re = safe_regex::regex!(br".*[^-]-[1-9][0-9]*"); - re.is_match(chain_id.as_bytes()) + split_chain_id(chain_id).is_ok() } /// with_version() checks if a chain_id is in the format required for parsing epochs, and if so @@ -131,19 +117,33 @@ impl ChainId { /// assert_eq!(ChainId::new("chainA", 1).with_version(2), ChainId::new("chainA", 2)); /// assert_eq!("chain1".parse::().unwrap().with_version(2), "chain1".parse::().unwrap()); /// ``` - pub fn with_version(mut self, version: u64) -> Self { - if Self::is_epoch_format(&self.id) { - self.id = { - let mut split: Vec<&str> = self.id.split('-').collect(); - let version = version.to_string(); - if let Some(last_elem) = split.last_mut() { - *last_elem = &version; - } - split.join("-") - }; - self.version = version; + pub fn with_version(self, version: u64) -> Self { + if self.version == 0 { + return self; } - self + // self.version ≠ 0 ⇒ self.id is in epoch format ⇒ split_chain_id can’t fail. + let (name, _) = split_chain_id(&self.id).unwrap(); + Self::new(name, version) + } +} + +/// Splits chain_id into name and version if the id includes epoch number. +/// +/// Chain id with epoch number has format `{name}-{version}` where name is +/// non-empty and version is a positive integer starting with a non-zero digit. +/// If `chain_id` is in that format, returns `(name, version)`; otherwise +/// returns an error. +fn split_chain_id(chain_id: &str) -> Result<(&str, &str), ()> { + let (name, version) = chain_id.rsplit_once('-').ok_or(())?; + // `-123` and `foo--123` are not in epoch format. + if name.is_empty() || name.as_bytes().last() == Some(&b'-') { + return Err(()); + } + let (car, cdr) = version.as_bytes().split_first().ok_or(())?; + if matches!(*car, b'1'..=b'9') && cdr.iter().all(u8::is_ascii_digit) { + Ok((name, version)) + } else { + Err(()) } }