Skip to content

Commit

Permalink
pw_tokenizer: Add code size optimization to Rust implementation
Browse files Browse the repository at this point in the history
* Marshals arguments in to an `[Argument]` and calls a common encoding
  engine instead of a fn call for the token and each argument.
* Adds a `MessageWriter` interface for APIs like logging that use a
  shared/ambient resource to consume the message.

Change-Id: I6e44fbd875228ee8757b684e967d6c5ec80ab90f
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/193504
Pigweed-Auto-Submit: Erik Gilling <[email protected]>
Commit-Queue: Auto-Submit <[email protected]>
Reviewed-by: Taylor Cramer <[email protected]>
  • Loading branch information
konkers authored and CQ Bot Account committed Feb 24, 2024
1 parent 510845c commit dcbf65b
Show file tree
Hide file tree
Showing 4 changed files with 441 additions and 77 deletions.
11 changes: 10 additions & 1 deletion pw_tokenizer/rust/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,19 @@ rust_library(
"pw_tokenizer/internal.rs",
"pw_tokenizer/lib.rs",
],
crate_features = select({
"@rust_crates//:std": ["std"],
"//conditions:default": [""],
}),
proc_macro_deps = [":pw_tokenizer_macro"],
visibility = ["//visibility:public"],
deps = [
":pw_tokenizer_core",
"//pw_status/rust:pw_status",
"//pw_stream/rust:pw_stream",
"//pw_varint/rust:pw_varint",
] + select({
"@rust_crates//:std": [
":pw_tokenizer_core", # Added for rustdoc linking support.
"//pw_format/rust:pw_format", # Added for rustdoc linking support.
],
"//conditions:default": [],
Expand All @@ -90,6 +95,10 @@ rust_library(
rust_test(
name = "pw_tokenizer_test",
crate = ":pw_tokenizer",
crate_features = select({
"@rust_crates//:std": ["std"],
"//conditions:default": [""],
}),
)

rust_doc_test(
Expand Down
122 changes: 112 additions & 10 deletions pw_tokenizer/rust/pw_tokenizer/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,56 @@

use core::cmp::min;

use pw_status::Result;
use pw_status::{Error, Result};
use pw_stream::{Cursor, Write};
use pw_varint::VarintEncode;

pub fn encode_string(cursor: &mut Cursor<&mut [u8]>, value: &str) -> Result<()> {
use crate::MessageWriter;

// The `Argument` enum is used to marshal arguments to pass to the tokenization
// engine.
pub enum Argument<'a> {
String(&'a str),
Varint(i32),
Char(u8),
}

// Wraps a `Cursor` so that `tokenize_to_buffer` and `tokenize_to_writer` can
// share implementations. It is not meant to be used outside of
// `tokenize_to_buffer`.
struct CursorMessageWriter<'a> {
cursor: Cursor<&'a mut [u8]>,
}

impl MessageWriter for CursorMessageWriter<'_> {
fn new() -> Self {
// Ensure `tokenize_to_buffer` never calls `new()`.
unimplemented!();
}

fn write(&mut self, data: &[u8]) -> Result<()> {
self.cursor.write_all(data)
}

fn remaining(&self) -> usize {
self.cursor.remaining()
}

fn finalize(self) -> Result<()> {
// Ensure `tokenize_to_buffer` never calls `finalize()`.
unimplemented!();
}
}

// Encode a string in Tokenizer format: length byte + data with the high bit of
// the length byte used to signal that the string was truncated.
pub fn encode_string<W: MessageWriter>(writer: &mut W, value: &str) -> Result<()> {
const MAX_STRING_LENGTH: usize = 0x7f;

let string_bytes = value.as_bytes();

// Limit the encoding to the lesser of 127 or the available space in the buffer.
let max_len = min(MAX_STRING_LENGTH, cursor.remaining() - 1);
let max_len = min(MAX_STRING_LENGTH, writer.remaining() - 1);
let overflow = max_len < string_bytes.len();
let len = min(max_len, string_bytes.len());

Expand All @@ -35,24 +75,86 @@ pub fn encode_string(cursor: &mut Cursor<&mut [u8]>, value: &str) -> Result<()>
if overflow {
header |= 0x80;
}
cursor.write_all(&[header as u8])?;
writer.write(&[header as u8])?;

writer.write(&string_bytes[..len])
}

// Write out a tokenized message to an already created `MessageWriter`.
fn tokenize_engine<W: crate::MessageWriter>(
writer: &mut W,
token: u32,
args: &[Argument<'_>],
) -> Result<()> {
writer.write(&token.to_le_bytes()[..])?;
for arg in args {
match arg {
Argument::String(s) => encode_string(writer, s)?,
Argument::Varint(i) => {
let mut encode_buffer = [0u8; 10];
let len = i.varint_encode(&mut encode_buffer)?;
writer.write(&encode_buffer[..len])?;
}
Argument::Char(c) => writer.write(&[*c])?,
}
}

Ok(())
}

#[inline(never)]
pub fn tokenize_to_buffer(buffer: &mut [u8], token: u32, args: &[Argument<'_>]) -> Result<usize> {
let mut writer = CursorMessageWriter {
cursor: Cursor::new(buffer),
};
tokenize_engine(&mut writer, token, args)?;
Ok(writer.cursor.position())
}

#[inline(never)]
pub fn tokenize_to_buffer_no_args(buffer: &mut [u8], token: u32) -> Result<usize> {
let token_bytes = &token.to_le_bytes()[..];
let token_len = token_bytes.len();
if buffer.len() < token_len {
return Err(Error::OutOfRange);
}
buffer[..token_len].copy_from_slice(token_bytes);

Ok(token_len)
}

#[inline(never)]
pub fn tokenize_to_writer<W: crate::MessageWriter>(
token: u32,
args: &[Argument<'_>],
) -> Result<()> {
let mut writer = W::new();
tokenize_engine(&mut writer, token, args)?;
writer.finalize()
}

cursor.write_all(&string_bytes[..len])
#[inline(never)]
pub fn tokenize_to_writer_no_args<W: crate::MessageWriter>(token: u32) -> Result<()> {
let mut writer = W::new();
writer.write(&token.to_le_bytes()[..])?;
writer.finalize()
}

#[cfg(test)]
mod test {
use pw_stream::{Cursor, Seek};

use super::encode_string;
use super::*;

fn do_string_encode_test<const BUFFER_LEN: usize>(value: &str, expected: &[u8]) {
let mut buffer = [0u8; BUFFER_LEN];
let mut cursor = Cursor::new(&mut buffer[..]);
encode_string(&mut cursor, value).unwrap();
let mut writer = CursorMessageWriter {
cursor: Cursor::new(&mut buffer),
};
encode_string(&mut writer, value).unwrap();

let len = cursor.stream_position().unwrap() as usize;
let buffer = cursor.into_inner();
let len = writer.cursor.stream_position().unwrap() as usize;
let buffer = writer.cursor.into_inner();

assert_eq!(len, expected.len());
assert_eq!(&buffer[..len], expected);
Expand Down
Loading

0 comments on commit dcbf65b

Please sign in to comment.