diff --git a/Cargo.lock b/Cargo.lock index e06479d..89d980a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -120,15 +120,15 @@ checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff" [[package]] name = "base64" -version = "0.13.1" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "base64" -version = "0.21.7" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bigdecimal" @@ -179,9 +179,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.15.1" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c764d619ca78fccbf3069b37bd7af92577f044bb15236036662d79b6559f25b7" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "byteorder" @@ -428,13 +428,13 @@ dependencies = [ [[package]] name = "delegate" -version = "0.10.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee5df75c70b95bd3aacc8e2fd098797692fb1d54121019c4de481e42f04c8a1" +checksum = "4e018fccbeeb50ff26562ece792ed06659b9c2dae79ece77c4456bb10d9bf79b" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.50", ] [[package]] @@ -749,6 +749,12 @@ dependencies = [ "cc", ] +[[package]] +name = "ice_code" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6524844f553e8da5999f3000cf11d3f1ff926bb03fc087441c7b86dee4a7d48" + [[package]] name = "ident_case" version = "1.0.1" @@ -790,6 +796,7 @@ checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" dependencies = [ "equivalent", "hashbrown 0.14.3", + "serde", ] [[package]] @@ -833,16 +840,16 @@ dependencies = [ "convert_case", "flate2", "infer", - "ion-rs 1.0.0-rc.3", + "ion-rs 1.0.0-rc.5", "ion-schema", "matches", - "memmap", "pager", "rstest", "serde", "serde_json", "tempfile", "tera", + "termcolor", "thiserror", "zstd", ] @@ -869,18 +876,17 @@ dependencies = [ [[package]] name = "ion-rs" -version = "1.0.0-rc.3" +version = "1.0.0-rc.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4048cdda3ac98a729cdbac81a4dbcae988bdf01696da36244d65c1d234ee93a" +checksum = "ae6628b313b01f34e167393a688a78ce907ff7307cb9e4a93c9d3599cabb1b03" dependencies = [ "arrayvec", "base64 0.12.3", "bumpalo", - "bytes", "chrono", - "delegate 0.10.0", + "delegate 0.12.0", + "ice_code", "nom", - "num-bigint 0.4.4", "num-integer", "num-traits", "serde", @@ -987,16 +993,6 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" -[[package]] -name = "memmap" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1435,15 +1431,17 @@ dependencies = [ [[package]] name = "serde_with" -version = "2.3.3" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07ff71d2c147a7b57362cead5e22f772cd52f6ab31cfcd9edcd7f6aeb2a0afbe" +checksum = "0ad483d2ab0149d5a5ebcd9972a3852711e0153d863bf5a5d0391d28883c4a20" dependencies = [ - "base64 0.13.1", + "base64 0.22.1", "chrono", "hex", "indexmap 1.9.3", + "indexmap 2.2.3", "serde", + "serde_derive", "serde_json", "serde_with_macros", "time", @@ -1451,9 +1449,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "2.3.3" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "881b6f881b17d13214e5d494c939ebab463d01264ce1811e9d4ac3a882e7695f" +checksum = "65569b702f41443e8bc8bbb1c5779bd0450bbe723b56198980e80ec45780bce2" dependencies = [ "darling", "proc-macro2", @@ -1572,6 +1570,15 @@ dependencies = [ "unic-segment", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "termtree" version = "0.4.1" diff --git a/Cargo.toml b/Cargo.toml index 2bc0e43..2e62e5d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,18 +17,18 @@ clap = { version = "4.0.17", features = ["cargo"] } colored = "2.0.0" flate2 = "1.0" infer = "0.15.0" -ion-rs = {version = "1.0.0-rc.2", features = ["experimental"]} -memmap = "0.7.0" +ion-rs = { version = "1.0.0-rc.5", features = ["experimental"] } tempfile = "3.2.0" ion-schema = "0.10.0" serde = { version = "1.0.163", features = ["derive"] } -serde_json = { version = "1.0.81", features = [ "arbitrary_precision", "preserve_order" ] } +serde_json = { version = "1.0.81", features = ["arbitrary_precision", "preserve_order"] } base64 = "0.21.1" -tera = { version = "1.18.1", optional = true } +tera = { version = "1.18.1", optional = true } convert_case = { version = "0.6.0", optional = true } matches = "0.1.10" thiserror = "1.0.50" zstd = "0.13.0" +termcolor = "1.4.1" [target.'cfg(not(target_os = "windows"))'.dependencies] pager = "0.16.1" diff --git a/src/bin/ion/commands/beta/count.rs b/src/bin/ion/commands/beta/count.rs index 7e857e9..ee71181 100644 --- a/src/bin/ion/commands/beta/count.rs +++ b/src/bin/ion/commands/beta/count.rs @@ -25,13 +25,13 @@ impl IonCliCommand for CountCommand { for input_file in input_file_iter { let file = File::open(input_file) .with_context(|| format!("Could not open file '{}'", input_file))?; - let mut reader = ReaderBuilder::new().build(file)?; + let mut reader = Reader::new(AnyEncoding, file)?; print_top_level_value_count(&mut reader)?; } } else { let input: StdinLock = stdin().lock(); let buf_reader = BufReader::new(input); - let mut reader = ReaderBuilder::new().build(buf_reader)?; + let mut reader = Reader::new(AnyEncoding, buf_reader)?; print_top_level_value_count(&mut reader)?; }; @@ -39,15 +39,9 @@ impl IonCliCommand for CountCommand { } } -fn print_top_level_value_count(reader: &mut Reader) -> Result<()> { +fn print_top_level_value_count(reader: &mut Reader) -> Result<()> { let mut count: usize = 0; - loop { - let item = reader - .next() - .with_context(|| "could not count values in Ion stream")?; - if item == StreamItem::Nothing { - break; - } + while let Some(_) = reader.next()? { count += 1; } println!("{}", count); diff --git a/src/bin/ion/commands/beta/generate/utils.rs b/src/bin/ion/commands/beta/generate/utils.rs index b2fdcae..b3852d9 100644 --- a/src/bin/ion/commands/beta/generate/utils.rs +++ b/src/bin/ion/commands/beta/generate/utils.rs @@ -5,7 +5,7 @@ use serde::Serialize; use std::fmt::{Display, Formatter}; /// Represents a field that will be added to generated data model. -/// This will be used by the template engine to fill properties of a struct/classs. +/// This will be used by the template engine to fill properties of a struct/class. #[derive(Serialize)] pub struct Field { pub(crate) name: String, diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index 04bd896..e96b907 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -1,19 +1,27 @@ -use std::cmp::min; -use std::fmt::{Display, Write}; +use std::fmt::Display; use std::fs::File; use std::io; -use std::io::BufWriter; -use std::ops::Range; -use std::str::{from_utf8_unchecked, FromStr}; +use std::io::Write; +use std::str::FromStr; -use crate::commands::{IonCliCommand, WithIonCliArgument}; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use clap::{Arg, ArgMatches, Command}; -use colored::Colorize; +use ion_rs::v1_0::{LazyRawBinaryValue, RawValueRef}; use ion_rs::*; -use memmap::MmapOptions; -#[cfg(not(target_os = "windows"))] -use pager::Pager; + +use crate::commands::{IonCliCommand, WithIonCliArgument}; + +// The `inspect` command uses the `termcolor` crate to colorize its text when STDOUT is a TTY. +use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, StandardStreamLock, WriteColor}; +// When writing to a named file instead of STDOUT, `inspect` will use a `FileWriter` instead. +// `FileWriter` ignores all requests to emit TTY color escape codes. +use crate::file_writer::FileWriter; + +// * The output stream could be STDOUT or a file handle, so we use `dyn io::Write` to abstract +// over the two implementations. +// * The Drop implementation will ensure that the output stream is flushed when the last reference +// is dropped, so we don't need to do that manually. +type OutputRef<'a> = Box; pub struct InspectCommand; @@ -40,12 +48,11 @@ impl IonCliCommand for InspectCommand { .help("Do not display any user values for the first `n` bytes of Ion data.") .long_help( "When specified, the inspector will skip ahead `n` bytes before -beginning to display the contents of the stream. System values like -Ion version markers and symbol tables in the bytes being skipped will -still be displayed. If the requested number of bytes falls in the -middle of a value, the whole value (complete with field ID and -annotations if applicable) will be displayed. If the value is nested -in one or more containers, those containers will be displayed too.", +beginning to display the contents of the stream. If the requested number +of bytes falls in the middle of a scalar, the whole value (complete with +field ID and annotations if applicable) will be displayed. If the value +is nested in one or more containers, the opening delimiters of those +containers be displayed.", ), ) .arg( @@ -58,24 +65,28 @@ in one or more containers, those containers will be displayed too.", .help("Only display the next 'n' bytes of Ion data.") .long_help( "When specified, the inspector will stop printing values after -processing `n` bytes of Ion data. If `n` falls within a value, the -complete value will be displayed.", +processing `n` bytes of Ion data. If `n` falls within a scalar, the +complete value will be displayed. If `n` falls within one or more containers, +the closing delimiters for those containers will be displayed. If this flag +is used with `--skip-bytes`, `n` is counted from the beginning of the first +value start after `--skip-bytes`. +", ), ) } - #[cfg(not(target_os = "windows"))] // TODO find a cross-platform pager implementation. - fn set_up_pager(&self) { - // Direct output to the pager specified by the PAGER environment variable, or "less -FIRX" - // if the environment variable is not set. Note: a pager is not used if the output is not - // a TTY. - Pager::with_default_pager("less -FIRX").setup(); - } - fn run(&self, _command_path: &mut Vec, args: &ArgMatches) -> Result<()> { - self.set_up_pager(); + // On macOS and Linux, the `inspect` command's output will automatically be rerouted to a paging + // utility like `less` when STDOUT is a TTY. + // TODO find a cross-platform pager implementation. + #[cfg(not(target_os = "windows"))] + { + // If STDOUT is a TTY, direct output to the pager specified by the PAGER environment + // variable, or "less -FIRX" if the environment variable is not set. + pager::Pager::with_default_pager("less -FIRX").setup(); + } - // --skip-bytes has a default value, so we can unwrap this safely. + // `--skip-bytes` has a default value, so we can unwrap this safely. let skip_bytes_arg = args.get_one::("skip-bytes").unwrap().as_str(); let bytes_to_skip = usize::from_str(skip_bytes_arg) @@ -83,7 +94,7 @@ complete value will be displayed.", // will be displayed if it bubbles up to the end user. .with_context(|| format!("Invalid value for '--skip-bytes': '{}'", skip_bytes_arg))?; - // --limit-bytes has a default value, so we can unwrap this safely. + // `--limit-bytes` has a default value, so we can unwrap this safely. let limit_bytes_arg = args.get_one::("limit-bytes").unwrap().as_str(); let mut limit_bytes = usize::from_str(limit_bytes_arg) @@ -92,18 +103,28 @@ complete value will be displayed.", // If unset, --limit-bytes is effectively usize::MAX. However, it's easier on users if we let // them specify "0" on the command line to mean "no limit". if limit_bytes == 0 { - limit_bytes = usize::MAX + limit_bytes = usize::MAX; } + // These types are provided by the `termcolor` crate. They wrap the normal `io::Stdout` and + // `io::StdOutLock` types, making it possible to write colorful text to the output stream when + // it's a TTY that understands formatting escape codes. These variables are declared here so + // the lifetime will extend through the remainder of the function. Unlike `io::StdoutLock`, + // the `StandardStreamLock` does not have a static lifetime. + let stdout: StandardStream; + let stdout_lock: StandardStreamLock<'_>; + // If the user has specified an output file, use it. let mut output: OutputRef = if let Some(file_name) = args.get_one::("output") { let output_file = File::create(file_name) - .with_context(|| format!("Could not open '{}'", file_name))?; - let buf_writer = BufWriter::new(output_file); - Box::new(buf_writer) + .with_context(|| format!("Could not open output file '{file_name}' for writing"))?; + let file_writer = FileWriter::new(output_file); + Box::new(file_writer) } else { // Otherwise, write to STDOUT. - Box::new(io::stdout().lock()) + stdout = StandardStream::stdout(ColorChoice::Always); + stdout_lock = stdout.lock(); + Box::new(stdout_lock) }; // Run the inspector on each input file that was specified. @@ -111,7 +132,7 @@ complete value will be displayed.", for input_file_name in input_file_iter { let input_file = File::open(input_file_name) .with_context(|| format!("Could not open '{}'", input_file_name))?; - inspect_file( + inspect_input( input_file_name, input_file, &mut output, @@ -120,613 +141,1228 @@ complete value will be displayed.", )?; } } else { + let stdin_lock = io::stdin().lock(); // If no input file was specified, run the inspector on STDIN. - - // The inspector expects its input to be a byte array or mmap()ed file acting as a byte - // array. If the user wishes to provide data on STDIN, we'll need to copy those bytes to - // a temporary file and then read from that. - - // Create a temporary file that will delete itself when the program ends. - let mut input_file = tempfile::tempfile().with_context(|| { - concat!( - "Failed to create a temporary file to store STDIN.", - "Try passing an --input flag instead." - ) - })?; - - // Pipe the data from STDIN to the temporary file. - let mut writer = BufWriter::new(input_file); - io::copy(&mut io::stdin(), &mut writer) - .with_context(|| "Failed to copy STDIN to a temp file.")?; - // Get our file handle back from the BufWriter - input_file = writer - .into_inner() - .with_context(|| "Failed to read from temp file containing STDIN data.")?; - // Read from the now-populated temporary file. - inspect_file( - "STDIN temp file", - input_file, - &mut output, - bytes_to_skip, - limit_bytes, - )?; + inspect_input("STDIN", stdin_lock, &mut output, bytes_to_skip, limit_bytes)?; } Ok(()) } } -// Create a type alias to simplify working with a shared reference to our output stream. -type OutputRef = Box; -// * The output stream could be STDOUT or a file handle, so we use `dyn io::Write` to abstract -// over the two implementations. -// * The Drop implementation will ensure that the output stream is flushed when the last reference -// is dropped, so we don't need to do that manually. - -// Given a file, try to mmap() it and run the inspector over the resulting byte array. -fn inspect_file( - input_file_name: &str, - input_file: File, +/// Prints a table showing the offset, length, binary encoding, and text encoding of the Ion stream +/// contained in `input`. +fn inspect_input( + input_name: &str, + input: Input, output: &mut OutputRef, bytes_to_skip: usize, limit_bytes: usize, ) -> Result<()> { - // mmap involves operating system interactions that inherently place its usage outside of Rust's - // safety guarantees. If the file is unexpectedly truncated while it's being read, for example, - // problems could arise. - let mmap = unsafe { - MmapOptions::new() - .map(&input_file) - .with_context(|| format!("Could not mmap '{}'", input_file_name))? - }; - - // Treat the mmap as a byte array. - let ion_data: &[u8] = &mmap[..]; - // Confirm that the input data is binary Ion, then run the inspector. - match ion_data { - // Pattern match the byte array to verify it starts with an IVM - [0xE0, 0x01, 0x00, 0xEA, ..] => { - write_header(output)?; - let mut inspector = IonInspector::new(ion_data, output, bytes_to_skip, limit_bytes)?; - // This inspects all values at the top level, recursing as necessary. - inspector.inspect_level()?; - } - _ => { - // bail! constructs an `anyhow::Result` with the given context and returns. - bail!( - "Input file '{}' does not appear to be binary Ion.", - input_file_name - ); - } - }; + let mut reader = SystemReader::new(AnyEncoding, input); + let mut inspector = IonInspector::new(output, bytes_to_skip, limit_bytes)?; + // This inspects all values at the top level, recursing as necessary. + inspector + .inspect_top_level(&mut reader) + .with_context(|| format!("input: {input_name}"))?; Ok(()) } -const IVM_HEX: &str = "e0 01 00 ea"; -const IVM_TEXT: &str = "// Ion 1.0 Version Marker"; -// System events (IVM, symtabs) are always at the top level. -const SYSTEM_EVENT_INDENTATION: &str = ""; -const LEVEL_INDENTATION: &str = " "; // 2 spaces per level -const TEXT_WRITER_INITIAL_BUFFER_SIZE: usize = 128; +// See the Wikipedia page for Unicode Box Drawing[1] for other potentially useful glyphs. +// [1] https://en.wikipedia.org/wiki/Box-drawing_characters#Unicode +const VERTICAL_LINE: &str = "│"; +const START_OF_HEADER: &str = + "┌──────────────┬──────────────┬─────────────────────────┬──────────────────────┐"; +const END_OF_HEADER: &str = + "├──────────────┼──────────────┼─────────────────────────┼──────────────────────┘"; +const ROW_SEPARATOR: &str = r#" +├──────────────┼──────────────┼─────────────────────────┤"#; +const END_OF_TABLE: &str = r#" +└──────────────┴──────────────┴─────────────────────────┘"#; -struct IonInspector<'a> { - output: &'a mut OutputRef, - reader: SystemReader>, +struct IonInspector<'a, 'b> { + output: &'a mut OutputRef<'b>, bytes_to_skip: usize, + skip_complete: bool, limit_bytes: usize, - // Reusable buffer for formatting bytes as hex - hex_buffer: String, - // Reusable buffer for formatting text - text_buffer: String, - // Reusable buffer for colorizing text - color_buffer: String, - // Reusable buffer for tracking indentation - indentation_buffer: String, // Text Ion writer for formatting scalar values - text_ion_writer: RawTextWriter>, + text_writer: v1_0::RawTextWriter>, +} + +// This buffer is used by the IonInspector's `text_writer` to format scalar values. +const TEXT_WRITER_INITIAL_BUFFER_SIZE: usize = 128; + +// The number of hex-encoded bytes to show in each row of the `Binary Ion` column. +const BYTES_PER_ROW: usize = 8; + +/// Friendly trait alias (by way of an empty extension) for a closure that takes an output reference +/// and a value and writes a comment for that value. Returns `true` if it wrote a comment, `false` +/// otherwise. +trait CommentFn<'x>: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result {} + +impl<'x, F> CommentFn<'x> for F where + F: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result +{ } -impl<'a> IonInspector<'a> { - fn new<'b>( - input: &'b [u8], - out: &'b mut OutputRef, +/// Returns a `CommentFn` implementation that does nothing. +fn no_comment<'x>() -> impl CommentFn<'x> { + |_, _| Ok(false) +} + +impl<'a, 'b> IonInspector<'a, 'b> { + fn new( + out: &'a mut OutputRef<'b>, bytes_to_skip: usize, limit_bytes: usize, - ) -> IonResult> { - let reader = SystemReader::new(RawBinaryReader::new(input)); - let text_ion_writer = RawTextWriterBuilder::new(TextKind::Compact) - .build(Vec::with_capacity(TEXT_WRITER_INITIAL_BUFFER_SIZE))?; + ) -> IonResult> { + let text_writer = WriteConfig::::new(TextFormat::Compact) + .build_raw_writer(Vec::with_capacity(TEXT_WRITER_INITIAL_BUFFER_SIZE))?; let inspector = IonInspector { output: out, - reader, bytes_to_skip, + skip_complete: bytes_to_skip == 0, limit_bytes, - hex_buffer: String::new(), - text_buffer: String::new(), - color_buffer: String::new(), - indentation_buffer: String::new(), - text_ion_writer, + text_writer, }; Ok(inspector) } - // Returns the offset of the first byte that pertains to the value on which the reader is - // currently parked. - fn first_value_byte_offset(&self) -> usize { - if let Some(offset) = self.reader.field_id_offset() { - return offset; + /// Iterates over the items in `reader`, printing a table section for each top level value. + fn inspect_top_level( + &mut self, + reader: &mut SystemReader, + ) -> Result<()> { + const TOP_LEVEL_DEPTH: usize = 0; + self.write_table_header()?; + let mut is_first_item = true; + let mut has_printed_skip_message = false; + loop { + // TODO: This does not account for shared symbol table imports. However, the CLI does not + // yet support specifying a catalog, so it's correct enough for the moment. + let mut next_symbol_id = reader.symbol_table().len(); + let item = reader.next_item()?; + let is_last_item = matches!(item, SystemStreamItem::EndOfStream(_)); + + match self.select_action( + TOP_LEVEL_DEPTH, + &mut has_printed_skip_message, + &item.raw_stream_item(), + "stream items", + "ending", + )? { + InspectorAction::Skip => continue, + InspectorAction::Inspect => {} + InspectorAction::LimitReached => break, + } + + if !is_first_item && !is_last_item { + // If this item is neither the first nor last in the stream, print a row separator. + write!(self.output, "{ROW_SEPARATOR}")?; + } + + match item { + SystemStreamItem::SymbolTable(lazy_struct) => { + let is_append = lazy_struct.get("imports")? + == Some(ValueRef::Symbol(SymbolRef::with_text("$ion_symbol_table"))); + if !is_append { + next_symbol_id = 10; // First available SID after system symbols in Ion 1.0 + } + self.inspect_symbol_table(next_symbol_id, lazy_struct)?; + } + SystemStreamItem::Value(lazy_value) => { + self.inspect_value(0, "", lazy_value, no_comment())?; + } + SystemStreamItem::VersionMarker(marker) => { + self.inspect_ivm(marker)?; + } + SystemStreamItem::EndOfStream(_) => { + break; + } + // `SystemStreamItem` is marked `#[non_exhaustive]`, so this branch is needed. + // The arms above cover all of the existing variants at the time of writing. + _ => unimplemented!("a new SystemStreamItem variant was added"), + } + + is_first_item = false; } - if let Some(offset) = self.reader.annotations_offset() { - return offset; + self.output.write_all(END_OF_TABLE.as_bytes())?; + Ok(()) + } + + /// If `maybe_item` is: + /// * `Some(entity)`, checks to see if the entity's final byte offset is beyond the configured + /// number of bytes to skip. + /// * `None`, then there is no stream-level entity backing the item (that is: it was the result + /// of a macro expansion). Checks to see if the inspector has already completed its + /// skipping phase on an earlier item. + fn should_skip(&mut self, maybe_item: &Option) -> bool { + match maybe_item { + // If this item came from an input literal, see if the input literal ends after + // the requested number of bytes to skip. If not, we'll move to the next one. + Some(item) => item.range().end <= self.bytes_to_skip, + // If this item came from a macro, there's no corresponding input literal. If we + // haven't finished skipping input literals, we'll skip this ephemeral value. + None => !self.skip_complete, } - self.reader.header_offset() } - // Returns the byte offset range containing the current value and its annotations/field ID if - // applicable. - fn complete_value_range(&self) -> Range { - let start = self.first_value_byte_offset(); - let end = self.reader.value_range().end; - start..end + /// If `maybe_item` is: + /// * `Some(entity)`, checks to see if the entity's final byte offset is beyond the configured + /// number of bytes to inspect. + /// * `None`, then there is no stream-level entity backing the item. These will always be + /// inspected; if the e-expression that produced the value was not beyond the limit, + /// none of the ephemeral values it produces are either. + fn is_past_limit(&self, maybe_item: &Option) -> bool { + let limit = self.bytes_to_skip.saturating_add(self.limit_bytes); + maybe_item + .as_ref() + .map(|item| item.range().start >= limit) + .unwrap_or(false) + } + + /// Convenience method to set the output stream to the specified color/style for the duration of `write_fn` + /// and then reset it upon completion. + fn with_style( + &mut self, + style: ColorSpec, + write_fn: impl FnOnce(&mut OutputRef) -> Result<()>, + ) -> Result<()> { + self.output.set_color(&style)?; + write_fn(&mut self.output)?; + self.output.reset()?; + Ok(()) } - // Displays all of the values (however deeply nested) at the current level. - fn inspect_level(&mut self) -> Result<()> { - self.increase_indentation(); + /// Convenience method to set the output stream to the specified color/style, write `text`, + /// and then reset the output stream's style again. + fn write_with_style(&mut self, style: ColorSpec, text: &str) -> Result<()> { + self.with_style(style, |out| { + out.write_all(text.as_bytes())?; + Ok(()) + }) + } - // Per-level bytes skipped are tracked so we can add them to the text Ion comments that - // appear each time some number of values is skipped. - let mut bytes_skipped_this_level = 0; + /// Convenience method to move output to the next line. + fn newline(&mut self) -> Result<()> { + Ok(self.output.write_all(b"\n")?) + } - loop { - let ion_type = match self.reader.next()? { - SystemStreamItem::Nothing => break, - SystemStreamItem::VersionMarker(major, minor) => { - if major != 1 || minor != 0 { - bail!( - "Only Ion 1.0 is supported. Found IVM for v{}.{}", - major, - minor - ); - } - output( - self.output, - None, - Some(4), - SYSTEM_EVENT_INDENTATION, - IVM_HEX, - IVM_TEXT.dimmed(), - ) - .expect("output() failure from on_ivm()"); - continue; - } - // We don't care if this is a system or user-level value; that distinction - // is handled inside the SystemReader. - SystemStreamItem::SymbolTableValue(ion_type) - | SystemStreamItem::Value(ion_type) - | SystemStreamItem::SymbolTableNull(ion_type) - | SystemStreamItem::Null(ion_type) => ion_type, - }; - // See if we've already processed `bytes_to_skip` bytes; if not, move to the next value. - let complete_value_range = self.complete_value_range(); - if complete_value_range.end <= self.bytes_to_skip { - bytes_skipped_this_level += complete_value_range.len(); - continue; - } + /// Inspects an Ion Version Marker. + fn inspect_ivm(&mut self, marker: LazyRawAnyVersionMarker<'_>) -> Result<()> { + const BINARY_IVM_LENGTH: usize = 4; + self.newline()?; + let mut formatter = BytesFormatter::new( + BYTES_PER_ROW, + vec![IonBytes::new( + BytesKind::VersionMarker, + marker.span().bytes(), + )], + ); + self.write_offset_length_and_bytes( + marker.range().start, + BINARY_IVM_LENGTH, + &mut formatter, + )?; + self.with_style(BytesKind::VersionMarker.style(), |out| { + let (major, minor) = marker.version(); + write!(out, "$ion_{major}_{minor}")?; + Ok(()) + })?; - // Saturating subtraction: if the result would underflow, the answer will be zero. - let bytes_processed = complete_value_range - .start - .saturating_sub(self.bytes_to_skip); - // See if we've already processed `limit_bytes`; if so, stop processing. - if bytes_processed >= self.limit_bytes { - let limit_message = if self.reader.depth() > 0 { - "// --limit-bytes reached, stepping out." - } else { - "// --limit-bytes reached, ending." - }; - output( - self.output, - None, - None, - &self.indentation_buffer, - "...", - limit_message.dimmed(), - )?; - self.decrease_indentation(); - return Ok(()); - } + self.with_style(comment_style(), |out| { + write!(out, " // Version marker")?; + Ok(()) + })?; + self.output.reset()?; + Ok(()) + } - // We're no longer skip-scanning to `bytes_to_skip`. If we skipped values at this depth - // to get to this point, make a note of it in the output. - if bytes_skipped_this_level > 0 { - self.text_buffer.clear(); - write!( - &mut self.text_buffer, - "// Skipped {} bytes of user-level data", - bytes_skipped_this_level - )?; - output( - self.output, - None, - None, - &self.indentation_buffer, - "...", - &self.text_buffer.dimmed(), - )?; - bytes_skipped_this_level = 0; + /// Inspects all values (however deeply nested) starting at the current level. + fn inspect_value<'x>( + &mut self, + depth: usize, + delimiter: &str, + value: LazyValue<'x, AnyEncoding>, + comment_fn: impl CommentFn<'x>, + ) -> Result<()> { + use ValueRef::*; + self.newline()?; + if value.has_annotations() { + self.inspect_annotations(depth, value)?; + self.newline()?; + } + match value.read()? { + SExp(sexp) => self.inspect_sexp(depth, delimiter, sexp), + List(list) => self.inspect_list(depth, delimiter, list), + Struct(struct_) => self.inspect_struct(depth, delimiter, struct_), + _ => self.inspect_scalar(depth, delimiter, value, comment_fn), + } + } + + /// Inspects the scalar `value`. If this value appears in a list or struct, the caller can set + /// `delimiter` to a comma (`","`) and it will be appended to the value's text representation. + fn inspect_scalar<'x>( + &mut self, + depth: usize, + delimiter: &str, + value: LazyValue<'x, AnyEncoding>, + comment_fn: impl CommentFn<'x>, + ) -> Result<()> { + use ExpandedValueSource::*; + let value_literal = match value.expanded().source() { + ValueLiteral(value_literal) => value_literal, + // In Ion 1.0, there are no template values or constructed values so we can defer + // implementing these. + Template(_, _) => { + todo!("Ion 1.1 template values") } + Constructed(_, _) => { + todo!("Ion 1.1 constructed values") + } + }; - self.write_field_if_present()?; - self.write_annotations_if_present()?; - // Print the value or, if it's a container, its opening delimiter: {, (, or [ - self.write_value()?; - - // If the current value is a container, step into it and inspect its contents. - match ion_type { - IonType::List | IonType::SExp | IonType::Struct => { - self.reader.step_in()?; - self.inspect_level()?; - self.reader.step_out()?; - // Print the container's closing delimiter: }, ), or ] - self.text_buffer.clear(); - self.text_buffer.push_str(closing_delimiter_for(ion_type)); - if ion_type != IonType::SExp && self.reader.depth() > 0 { - self.text_buffer.push(','); - } - output( - self.output, - None, - None, - &self.indentation_buffer, - "", - &self.text_buffer, - )?; - } - _ => {} + use LazyRawValueKind::*; + // Check what encoding this is. At the moment, only binary Ion 1.0 is supported. + match value_literal.kind() { + Binary_1_0(bin_val) => { + self.inspect_binary_1_0_scalar(depth, delimiter, value, bin_val, comment_fn) } + Binary_1_1(_) => todo!("Binary Ion 1.1 scalars"), + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), } + } - self.decrease_indentation(); - Ok(()) + /// Inspects the s-expression `sexp`, including all of its child values. If this sexp appears + /// in a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended + /// to the sexp's text representation. + fn inspect_sexp<'x>( + &mut self, + depth: usize, + delimiter: &str, + sexp: LazySExp<'x, AnyEncoding>, + ) -> Result<()> { + use ExpandedSExpSource::*; + let raw_sexp = match sexp.expanded().source() { + ValueLiteral(raw_sexp) => raw_sexp, + Template(_, _, _, _) => todo!("Ion 1.1 template SExp"), + }; + + use LazyRawSExpKind::*; + match raw_sexp.kind() { + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), + Binary_1_0(v) => self.inspect_binary_1_0_sexp(depth, delimiter, sexp, v), + Binary_1_1(_) => todo!("Binary Ion 1.1 SExp"), + } + } + + /// Inspects the list `list`, including all of its child values. If this list appears inside + /// a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended + /// to the list's text representation. + fn inspect_list<'x>( + &mut self, + depth: usize, + delimiter: &str, + list: LazyList<'x, AnyEncoding>, + ) -> Result<()> { + use ExpandedListSource::*; + let raw_list = match list.expanded().source() { + ValueLiteral(raw_list) => raw_list, + Template(_, _, _, _) => todo!("Ion 1.1 template List"), + }; + + use LazyRawListKind::*; + match raw_list.kind() { + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), + Binary_1_0(v) => self.inspect_binary_1_0_list(depth, delimiter, list, v), + Binary_1_1(_) => todo!("Binary Ion 1.1 List"), + } } - fn increase_indentation(&mut self) { - // Add a level's worth of indentation to the buffer. - if self.reader.depth() > 0 { - self.indentation_buffer.push_str(LEVEL_INDENTATION); + /// Inspects the struct `struct_`, including all of its fields. If this struct appears inside + /// a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended + /// to the struct's text representation. + fn inspect_struct( + &mut self, + depth: usize, + delimiter: &str, + struct_: LazyStruct<'_, AnyEncoding>, + ) -> Result<()> { + let raw_struct = match struct_.expanded().source() { + ExpandedStructSource::ValueLiteral(raw_struct) => raw_struct, + ExpandedStructSource::Template(_, _, _, _, _) => todo!("Ion 1.1 template Struct"), + }; + + use LazyRawValueKind::*; + match raw_struct.as_value().kind() { + Binary_1_0(v) => { + self.inspect_binary_1_0_struct(depth, delimiter, struct_, raw_struct, v) + } + Binary_1_1(_) => todo!("Binary Ion 1.1 Struct"), + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), } } - fn decrease_indentation(&mut self) { - // Remove a level's worth of indentation from the buffer. - if self.reader.depth() > 0 { - let new_length = self.indentation_buffer.len() - LEVEL_INDENTATION.len(); - self.indentation_buffer.truncate(new_length); + fn inspect_symbol_table( + &mut self, + next_symbol_id: usize, + struct_: LazyStruct<'_, AnyEncoding>, + ) -> Result<()> { + let value = struct_.as_value(); + if value.has_annotations() { + self.newline()?; + self.inspect_annotations(0, value)?; + } + let raw_struct = match struct_.expanded().source() { + ExpandedStructSource::ValueLiteral(raw_struct) => raw_struct, + ExpandedStructSource::Template(_, _, _, _, _) => todo!("Ion 1.1 template symbol table"), + }; + + use LazyRawValueKind::*; + match raw_struct.as_value().kind() { + Binary_1_0(v) => { + self.inspect_binary_1_0_symbol_table(next_symbol_id, struct_, raw_struct, v) + } + Binary_1_1(_) => todo!("Binary Ion 1.1 symbol table"), + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), } } - fn write_field_if_present(&mut self) -> Result<()> { - if self.reader.parent_type() != Some(IonType::Struct) { - // We're not in a struct; nothing to do. - return Ok(()); + fn inspect_annotations(&mut self, depth: usize, value: LazyValue) -> Result<()> { + let raw_value = match value.expanded().source() { + ExpandedValueSource::ValueLiteral(raw_value) => raw_value, + ExpandedValueSource::Template(_, _) => todo!("Ion 1.1 template value annotations"), + ExpandedValueSource::Constructed(_, _) => { + todo!("Ion 1.1 constructed value annotations") + } + }; + + use LazyRawValueKind::*; + match raw_value.kind() { + Binary_1_0(v) => self.inspect_binary_1_0_annotations(depth, value, v), + Binary_1_1(_) => todo!("Binary Ion 1.1 annotations"), + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), } - let field_token = self.reader.raw_field_name_token()?; - let field_id = field_token.local_sid().expect("No SID for field name."); - self.hex_buffer.clear(); - to_hex( - &mut self.hex_buffer, - self.reader.raw_field_id_bytes().unwrap(), + } + + // ===== Binary Ion 1.0 ====== + + // When inspecting a container, the container's header gets its own row in the output table. + // Unlike a scalar, the bytes of the container body do not begin immediately after the header + // bytes. + // This prints the container's offset, length, and header bytes, leaving the cursor positioned + // at the beginning of the `Text Ion` column. + fn inspect_binary_1_0_container_header( + &mut self, + raw_value: v1_0::LazyRawBinaryValue, + ) -> Result<()> { + let encoding = raw_value.encoded_data(); + let range = encoding.range(); + + let opcode_bytes: &[u8] = raw_value.encoded_data().opcode_span().bytes(); + let mut formatter = BytesFormatter::new( + BYTES_PER_ROW, + vec![ + IonBytes::new(BytesKind::Opcode, opcode_bytes), + IonBytes::new( + BytesKind::TrailingLength, + raw_value.encoded_data().trailing_length_span().bytes(), + ), + ], ); - let field_name_result = self.reader.field_name(); - let field_name = field_name_result - .as_ref() - .ok() - .and_then(|name| name.text()) - .unwrap_or(""); + self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter) + } - self.text_buffer.clear(); - write!(&mut self.text_buffer, "'{}':", field_name)?; + fn inspect_binary_1_0_sexp<'x>( + &mut self, + depth: usize, + delimiter: &str, + sexp: LazySExp<'x, AnyEncoding>, + raw_sexp: v1_0::LazyRawBinarySExp<'x>, + ) -> Result<()> { + self.inspect_binary_1_0_sequence( + depth, + "(", + "", + ")", + delimiter, + sexp.iter(), + raw_sexp, + raw_sexp.as_value(), + no_comment(), + ) + } - self.color_buffer.clear(); - write!(&mut self.color_buffer, " // ${}:", field_id)?; - write!(&mut self.text_buffer, "{}", &self.color_buffer.dimmed())?; - output( - self.output, - self.reader.field_id_offset(), - self.reader.field_id_length(), - &self.indentation_buffer, - &self.hex_buffer, - &self.text_buffer, - )?; + fn inspect_binary_1_0_list<'x>( + &mut self, + depth: usize, + delimiter: &str, + list: LazyList<'x, AnyEncoding>, + raw_list: v1_0::LazyRawBinaryList<'x>, + ) -> Result<()> { + self.inspect_binary_1_0_sequence( + depth, + "[", + ",", + "]", + delimiter, + list.iter(), + raw_list, + raw_list.as_value(), + no_comment(), + ) + } - if field_name_result.is_err() { - // If we had to write for the field name above, return a fatal error now. - bail!("Encountered a field ID (${}) with unknown text.", field_id); + fn inspect_binary_1_0_sequence<'x>( + &mut self, + depth: usize, + opening_delimiter: &str, + value_delimiter: &str, + closing_delimiter: &str, + trailing_delimiter: &str, + nested_values: impl IntoIterator>>, + nested_raw_values: impl LazyRawSequence<'x, v1_0::Binary>, + raw_value: LazyRawBinaryValue, + mut value_comment_fn: impl CommentFn<'x>, + ) -> Result<()> { + self.inspect_binary_1_0_container_header(raw_value)?; + self.write_indentation(depth)?; + self.with_style(text_ion_style(), |out| { + write!(out, "{opening_delimiter}")?; + Ok(()) + })?; + + let mut has_printed_skip_message = false; + for (raw_value_res, value_res) in nested_raw_values.iter().zip(nested_values) { + let (raw_nested_value, nested_value) = (raw_value_res?, value_res?); + match self.select_action( + depth + 1, + &mut has_printed_skip_message, + &Some(raw_nested_value), + "values", + "stepping out", + )? { + InspectorAction::Skip => continue, + InspectorAction::Inspect => {} + InspectorAction::LimitReached => break, + } + self.inspect_value(depth + 1, value_delimiter, nested_value, no_comment())?; + self.output.set_color(&comment_style())?; + value_comment_fn(self.output, nested_value)?; + self.output.reset()?; } - Ok(()) + self.newline()?; + self.write_blank_offset_length_and_bytes(depth)?; + self.with_style(text_ion_style(), |out| { + write!(out, "{closing_delimiter}{trailing_delimiter}")?; + Ok(()) + }) } - fn write_annotations_if_present(&mut self) -> IonResult<()> { - let num_annotations = self.reader.raw_annotations().count(); - if num_annotations > 0 { - self.hex_buffer.clear(); - to_hex( - &mut self.hex_buffer, - self.reader.raw_annotations_bytes().unwrap(), - ); - - self.text_buffer.clear(); - join_into(&mut self.text_buffer, "::", self.reader.annotations())?; - write!(&mut self.text_buffer, "::")?; - - self.color_buffer.clear(); - write!(&mut self.color_buffer, " // $")?; - join_into( - &mut self.color_buffer, - "::$", - self.reader - .raw_annotations() - .map(|a| a.map(|token| token.local_sid().unwrap())), - )?; - write!(&mut self.color_buffer, "::")?; - - write!(self.text_buffer, "{}", self.color_buffer.dimmed())?; - output( - self.output, - self.reader.annotations_offset(), - self.reader.annotations_length(), - &self.indentation_buffer, - &self.hex_buffer, - &self.text_buffer, - )?; + fn select_action( + &mut self, + depth: usize, + has_printed_skip_message: &mut bool, + maybe_item: &Option, + name_of_skipped_item: &str, + name_of_limit_action: &str, + ) -> Result { + if self.should_skip(maybe_item) { + if !*has_printed_skip_message { + self.write_skipping_message(depth, name_of_skipped_item)?; + *has_printed_skip_message = true; + } + return Ok(InspectorAction::Skip); + } + self.skip_complete = true; + + if self.is_past_limit(maybe_item) { + self.write_limiting_message(depth, name_of_limit_action)?; + return Ok(InspectorAction::LimitReached); } - Ok(()) - } - fn write_value(&mut self) -> IonResult<()> { - self.text_buffer.clear(); - // Populates `self.text_buffer` with the Ion text representation of the current value - // if it is a scalar. If the value is a container, format_value() will write the opening - // delimiter of that container instead. - self.format_value()?; + Ok(InspectorAction::Inspect) + } - self.hex_buffer.clear(); - to_hex( - &mut self.hex_buffer, - self.reader.raw_header_bytes().unwrap(), + fn inspect_binary_1_0_field_name( + &mut self, + depth: usize, + raw_name: LazyRawAnyFieldName, + name: SymbolRef, + ) -> Result<()> { + self.newline()?; + let range = raw_name.range(); + let raw_name_bytes = raw_name.span().bytes(); + let offset = range.start; + let length = range.len(); + let mut formatter = BytesFormatter::new( + BYTES_PER_ROW, + vec![IonBytes::new(BytesKind::FieldId, raw_name_bytes)], ); - // Only write the bytes representing the body of the value if it is a scalar. - // If it is a container, `inspect_level` will handle stepping into it and writing any - // nested values. - if !self.reader.ion_type().unwrap().is_container() { - self.hex_buffer.push(' '); - to_hex(&mut self.hex_buffer, self.reader.raw_value_bytes().unwrap()); - } + self.write_offset_length_and_bytes(offset, length, &mut formatter)?; + self.write_indentation(depth)?; + self.with_style(field_id_style(), |out| { + IoValueFormatter::new(out) + .value_formatter() + .format_symbol(name)?; + Ok(()) + })?; + write!(self.output, ": ")?; + // Print a text Ion comment showing how the field name was encoded, ($SID or text) + self.with_style(comment_style(), |out| { + match raw_name.read()? { + RawSymbolRef::SymbolId(sid) => { + write!(out, " // ${sid}") + } + RawSymbolRef::Text(_) => { + write!(out, " // ") + } + }?; + Ok(()) + }) + } - let length = self.reader.header_length() + self.reader.value_length(); - output( - self.output, - Some(self.reader.header_offset()), - Some(length), - &self.indentation_buffer, - &self.hex_buffer, - &self.text_buffer, - ) + /// Inspects all values (however deeply nested) starting at the current level. + fn inspect_binary_1_0_field( + &mut self, + depth: usize, + field: LazyField, + raw_field: LazyRawFieldExpr, + ) -> Result<()> { + let (raw_name, _raw_value) = raw_field.expect_name_value()?; + let name = field.name()?; + + self.inspect_binary_1_0_field_name(depth, raw_name, name)?; + self.inspect_value(depth, ",", field.value(), no_comment())?; + Ok(()) } - fn format_value(&mut self) -> IonResult<()> { - use ion_rs::IonType::*; - - // Destructure `self` to get multiple simultaneous mutable references to its constituent - // fields. This freezes `self`; it cannot be referred to for the rest of the function call. - let IonInspector { - ref mut reader, - ref mut text_ion_writer, - ref mut text_buffer, - ref mut color_buffer, - .. - } = self; - - // If we need to write comments alongside any of the values, we'll add them here so we can - // colorize them separately. - let comment_buffer = color_buffer; - comment_buffer.clear(); - - let writer = text_ion_writer; // Local alias for brevity. - let ion_type = reader - .ion_type() - .expect("format_value() called when reader was exhausted"); - if reader.is_null() { - writer.write_null(reader.ion_type().unwrap())?; - } else { - match ion_type { - Null => writer.write_null(ion_type), - Bool => writer.write_bool(reader.read_bool()?), - Int => writer.write_i64(reader.read_i64()?), - Float => writer.write_f64(reader.read_f64()?), - Decimal => writer.write_decimal(&reader.read_decimal()?), - Timestamp => writer.write_timestamp(&reader.read_timestamp()?), - Symbol => { - // TODO: Make this easier in the reader - let symbol_token = reader.read_raw_symbol()?; - let sid = symbol_token.local_sid().unwrap(); - let text = reader - .symbol_table() - .text_for(sid) - .unwrap_or_else(|| panic!("Could not resolve text for symbol ID ${}", sid)); - write!(comment_buffer, " // ${}", sid)?; - writer.write_symbol(text) + fn inspect_binary_1_0_struct( + &mut self, + depth: usize, + delimiter: &str, + struct_: LazyStruct, + raw_struct: LazyRawAnyStruct, + raw_value: LazyRawBinaryValue, + ) -> Result<()> { + self.inspect_binary_1_0_container_header(raw_value)?; + + self.write_indentation(depth)?; + self.with_style(text_ion_style(), |out| { + write!(out, "{{")?; + Ok(()) + })?; + let mut has_printed_skip_message = false; + for (raw_field_result, field_result) in raw_struct.iter().zip(struct_.iter()) { + let field = field_result?; + let raw_field = raw_field_result?; + match self.select_action( + depth + 1, + &mut has_printed_skip_message, + &Some(raw_field), + "fields", + "stepping out", + )? { + InspectorAction::Skip => continue, + InspectorAction::Inspect => { + self.inspect_binary_1_0_field(depth + 1, field, raw_field)? } - String => writer.write_string(reader.read_str()?), - Clob => writer.write_clob(reader.read_clob()?), - Blob => writer.write_blob(reader.read_blob()?), - // The containers don't use the RawTextWriter to format anything. They simply write - // the appropriate opening delimiter. - List => { - write!(text_buffer, "[")?; - return Ok(()); + InspectorAction::LimitReached => break, + } + } + // ===== Closing delimiter ===== + self.newline()?; + self.write_blank_offset_length_and_bytes(depth)?; + self.with_style(text_ion_style(), |out| { + write!(out, "}}{delimiter}")?; + Ok(()) + }) + } + + fn inspect_binary_1_0_symbol_table( + &mut self, + next_symbol_id: usize, + struct_: LazyStruct, + raw_struct: LazyRawAnyStruct, + raw_value: LazyRawBinaryValue, + ) -> Result<()> { + // The processing for a symbol table is very similar to that of a regular struct, + // but with special handling defined for the `imports` and `symbols` fields when present. + // Because symbol tables are always at the top level, there is no need for indentation. + const TOP_LEVEL_DEPTH: usize = 0; + self.newline()?; + self.inspect_binary_1_0_container_header(raw_value)?; + self.with_style(text_ion_style(), |out| { + write!(out, "{{")?; + Ok(()) + })?; + let mut has_printed_skip_message = false; + for (raw_field_result, field_result) in raw_struct.iter().zip(struct_.iter()) { + let field = field_result?; + let raw_field = raw_field_result?; + + match self.select_action( + TOP_LEVEL_DEPTH + 1, + &mut has_printed_skip_message, + &Some(raw_field), + "fields", + "stepping out", + )? { + InspectorAction::Skip => continue, + InspectorAction::Inspect if field.name()? == "symbols" => { + self.inspect_lst_symbols_field(next_symbol_id, field, raw_field)? } - SExp => { - write!(text_buffer, "(")?; - return Ok(()); + // TODO: if field.name()? == "imports" => {} + InspectorAction::Inspect => { + self.inspect_binary_1_0_field(TOP_LEVEL_DEPTH + 1, field, raw_field)? } - Struct => { - write!(text_buffer, "{{")?; - return Ok(()); + InspectorAction::LimitReached => break, + } + } + // ===== Closing delimiter ===== + self.newline()?; + self.write_blank_offset_length_and_bytes(TOP_LEVEL_DEPTH)?; + self.with_style(text_ion_style(), |out| { + write!(out, "}}")?; + Ok(()) + }) + } + + fn inspect_lst_symbols_field( + &mut self, + mut next_symbol_id: usize, + field: LazyField, + raw_field: LazyRawFieldExpr, + ) -> Result<()> { + const SYMBOL_LIST_DEPTH: usize = 1; + let (raw_name, raw_value) = raw_field.expect_name_value()?; + self.inspect_binary_1_0_field_name(SYMBOL_LIST_DEPTH, raw_name, field.name()?)?; + + let symbols_list = match field.value().read()? { + ValueRef::List(list) => list, + _ => { + return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| { + out.write_all(b" // Invalid, ignored")?; + Ok(true) + }); + } + }; + + let raw_symbols_list = raw_value.read()?.expect_list()?; + let nested_raw_values = raw_symbols_list.iter(); + let nested_values = symbols_list.iter(); + + let LazyRawValueKind::Binary_1_0(raw_value) = raw_value.kind() else { + unreachable!("binary 1.0 encoding already confirmed"); + }; + + self.newline()?; + self.inspect_binary_1_0_container_header(raw_value)?; + self.write_indentation(SYMBOL_LIST_DEPTH)?; + self.with_style(text_ion_style(), |out| { + write!(out, "[")?; + Ok(()) + })?; + + let mut has_printed_skip_message = false; + for (raw_value_res, value_res) in nested_raw_values.zip(nested_values) { + let (raw_nested_value, nested_value) = (raw_value_res?, value_res?); + match self.select_action( + SYMBOL_LIST_DEPTH + 1, + &mut has_printed_skip_message, + &Some(raw_nested_value), + "values", + "stepping out", + )? { + InspectorAction::Skip => continue, + InspectorAction::Inspect => {} + InspectorAction::LimitReached => break, + } + + self.output.set_color(&comment_style())?; + self.inspect_value(SYMBOL_LIST_DEPTH + 1, ",", nested_value, |out, value| { + match value.read()? { + ValueRef::String(_s) => write!(out, " // -> ${next_symbol_id}"), + _other => write!(out, " // -> ${next_symbol_id} (no text)"), + }?; + next_symbol_id += 1; + Ok(true) + })?; + self.output.reset()?; + } + + self.newline()?; + self.write_blank_offset_length_and_bytes(SYMBOL_LIST_DEPTH)?; + self.with_style(text_ion_style(), |out| { + write!(out, "],")?; + Ok(()) + }) + } + + fn inspect_binary_1_0_annotations( + &mut self, + depth: usize, + value: LazyValue, + raw_value: LazyRawBinaryValue, + ) -> Result<()> { + let encoding = raw_value.encoded_annotations().unwrap(); + let range = encoding.range(); + + let mut formatter = BytesFormatter::new( + BYTES_PER_ROW, + vec![ + IonBytes::new(BytesKind::AnnotationsHeader, encoding.header_span().bytes()), + IonBytes::new( + BytesKind::AnnotationsSequence, + encoding.sequence_span().bytes(), + ), + ], + ); + self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter)?; + + self.write_indentation(depth)?; + self.with_style(annotations_style(), |out| { + for annotation in value.annotations() { + IoValueFormatter::new(&mut *out) + .value_formatter() + .format_symbol(annotation?)?; + write!(out, "::")?; + } + Ok(()) + })?; + + self.with_style(comment_style(), |out| { + write!(out, " // ")?; + for (index, raw_annotation) in raw_value.annotations().enumerate() { + if index > 0 { + write!(out, ", ")?; } + match raw_annotation? { + RawSymbolRef::SymbolId(sid) => write!(out, "${sid}"), + RawSymbolRef::Text(_) => write!(out, ""), + }?; + } + Ok(()) + })?; + + Ok(()) + } + + fn inspect_binary_1_0_scalar<'x>( + &mut self, + depth: usize, + delimiter: &str, + value: LazyValue<'x, AnyEncoding>, + raw_value: LazyRawBinaryValue, + mut comment_fn: impl CommentFn<'x>, + ) -> Result<()> { + let encoding = raw_value.encoded_data(); + let range = encoding.range(); + + let opcode_bytes = IonBytes::new(BytesKind::Opcode, encoding.opcode_span().bytes()); + let length_bytes = IonBytes::new( + BytesKind::TrailingLength, + encoding.trailing_length_span().bytes(), + ); + // TODO: There is a bug in the `body_span()` method that causes it fail when the value is annotated. + // When it's fixed, this can be: + // let body_bytes = IonBytes::new(BytesKind::ValueBody, body_span); + let body_len = raw_value.encoded_data().body_range().len(); + let total_len = raw_value.encoded_data().range().len(); + let body_bytes = IonBytes::new( + BytesKind::ValueBody, + &encoding.span().bytes()[total_len - body_len..], + ); + + let mut formatter = + BytesFormatter::new(BYTES_PER_ROW, vec![opcode_bytes, length_bytes, body_bytes]); + + self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter)?; + self.write_indentation(depth)?; + + let style = text_ion_style(); + self.output.set_color(&style)?; + self.text_writer + .write(value.read()?) + .expect("failed to write text value to in-memory buffer") + .flush()?; + + let encoded = self.text_writer.output_mut(); + if encoded.ends_with(&[b' ']) { + let _ = encoded.pop(); + } + self.output + .write_all(self.text_writer.output().as_slice())?; + self.text_writer.output_mut().clear(); + self.output.write_all(delimiter.as_bytes())?; + self.output.reset()?; + + self.output.set_color(&comment_style())?; + let wrote_comment = comment_fn(self.output, value)?; + if let RawValueRef::Symbol(RawSymbolRef::SymbolId(symbol_id)) = raw_value.read()? { + match wrote_comment { + true => write!(self.output, " (${symbol_id})"), + false => write!(self.output, " // ${symbol_id}"), }?; } - // This is writing to a Vec, so flush() will always succeed. - let _ = writer.flush(); - // The writer produces valid UTF-8, so there's no need to re-validate it. - let value_text = unsafe { from_utf8_unchecked(writer.output().as_slice()) }; - write!(text_buffer, "{}", value_text.trim_end())?; - // If we're in a container, add a delimiting comma. Text Ion allows trailing commas, so we - // don't need to treat the last value as a special case. - if self.reader.depth() > 0 { - write!(text_buffer, ",")?; - } - write!(text_buffer, "{}", comment_buffer.dimmed())?; - // Clear the writer's output Vec. We encode each scalar independently of one another. - writer.output_mut().clear(); + self.output.reset()?; + + while !formatter.is_empty() { + self.newline()?; + self.write_offset_length_and_bytes("", "", &mut formatter)?; + self.write_indentation(depth)?; + } + + Ok(()) + } + + // ===== Table-writing methods ===== + + /// Prints the header of the output table + fn write_table_header(&mut self) -> Result<()> { + self.output.write_all(START_OF_HEADER.as_bytes())?; + write!(self.output, "\n{VERTICAL_LINE}")?; + self.write_with_style(header_style(), " Offset ")?; + write!(self.output, "{VERTICAL_LINE}")?; + self.write_with_style(header_style(), " Length ")?; + write!(self.output, "{VERTICAL_LINE}")?; + self.write_with_style(header_style(), " Binary Ion ")?; + write!(self.output, "{VERTICAL_LINE}")?; + self.write_with_style(header_style(), " Text Ion ")?; + write!(self.output, "{VERTICAL_LINE}\n")?; + self.output.write_all(END_OF_HEADER.as_bytes())?; + Ok(()) + } + + /// Writes a spacing string `depth` times. + fn write_indentation(&mut self, depth: usize) -> Result<()> { + // This spacing string includes a unicode dot to make it easy to see what level of depth + // the current value is found at. This dot is displayed with a muted color; its appearance + // is subtle. + const INDENTATION_WITH_GUIDE: &'static str = "· "; + + let mut color_spec = ColorSpec::new(); + color_spec + .set_dimmed(false) + .set_intense(true) + .set_bold(true) + .set_fg(Some(Color::Rgb(100, 100, 100))); + self.with_style(color_spec, |out| { + for _ in 0..depth { + out.write_all(INDENTATION_WITH_GUIDE.as_bytes())?; + } + Ok(()) + }) + } + + /// Prints the given `offset` and `length` in the first and second table columns, then uses the + /// `formatter` to print a single row of hex-encoded bytes in the third column ("Binary Ion"). + /// The `offset` and `length` are typically `usize`, but can be anything that implements `Display`. + fn write_offset_length_and_bytes( + &mut self, + offset: impl Display, + length: impl Display, + formatter: &mut BytesFormatter, + ) -> Result<()> { + write!( + self.output, + "{VERTICAL_LINE} {offset:12} {VERTICAL_LINE} {length:12} {VERTICAL_LINE} " + )?; + formatter.write_row(self.output)?; + write!(self.output, "{VERTICAL_LINE} ")?; Ok(()) } + + /// Prints a row with blank fiends in the `Offset`, `Length`, and `Binary Ion` columns. This method + /// does not print a trailing newline, allowing the caller to populate the `Text Ion` column as needed. + fn write_blank_offset_length_and_bytes(&mut self, depth: usize) -> Result<()> { + let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![]); + self.write_offset_length_and_bytes("", "", &mut formatter)?; + self.write_indentation(depth) + } + + /// Prints a row with an ellipsis (`...`) in the first three columns, and a text Ion comment in + /// the final column indicating what is being skipped over. + fn write_skipping_message(&mut self, depth: usize, name_of_skipped_item: &str) -> Result<()> { + write!(self.output, "\n{VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:23} {VERTICAL_LINE} ", "...", "...", "...")?; + self.write_indentation(depth)?; + self.with_style(comment_style(), |out| { + write!(out, "// ...skipping {name_of_skipped_item}...")?; + Ok(()) + }) + } + + /// Prints a row with an ellipsis (`...`) in the first three columns, and a text Ion comment in + /// the final column indicating that we have reached the maximum number of bytes to process + /// as determined by the `--limit-bytes` flag. + fn write_limiting_message(&mut self, depth: usize, action: &str) -> Result<()> { + write!(self.output, "\n{VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:23} {VERTICAL_LINE} ", "...", "...", "...")?; + self.write_indentation(depth)?; + let limit_bytes = self.limit_bytes; + self.with_style(comment_style(), |out| { + write!(out, "// --limit-bytes {} reached, {action}.", limit_bytes)?; + Ok(()) + }) + } } -const COLUMN_DELIMITER: &str = " | "; -const CHARS_PER_HEX_BYTE: usize = 3; -const HEX_BYTES_PER_ROW: usize = 8; -const HEX_COLUMN_SIZE: usize = HEX_BYTES_PER_ROW * CHARS_PER_HEX_BYTE; - -fn write_header(output: &mut OutputRef) -> IonResult<()> { - let line = "-".repeat(24 + 24 + 9 + 9 + (COLUMN_DELIMITER.len() * 3)); - - writeln!(output, "{}", line)?; - write!( - output, - "{:^9}{}", - "Offset".bold().bright_white(), - COLUMN_DELIMITER - )?; - write!( - output, - "{:^9}{}", - "Length".bold().bright_white(), - COLUMN_DELIMITER - )?; - write!( - output, - "{:^24}{}", - "Binary Ion".bold().bright_white(), - COLUMN_DELIMITER - )?; - writeln!(output, "{:^24}", "Text Ion".bold().bright_white())?; - writeln!(output, "{}", line)?; - Ok(()) +pub enum InspectorAction { + /// The current value appears before the offset specified by `--skip-bytes`. Ignore it. + Skip, + /// The current value appears after `--skip-bytes` and before `--limit-bytes`. Inspect it. + Inspect, + /// The current value appears after `--limit-bytes`, stop inspecting values. + LimitReached, } -// Accepting a `T` allows us to pass in `&str`, `&String`, `&ColoredString`, etc as out text_column -// TODO: This could be a method on IonInspector -fn output( - output: &mut OutputRef, - offset: Option, - length: Option, - indentation: &str, - hex_column: &str, - text_column: T, -) -> IonResult<()> { - // The current implementation always writes a single line of output for the offset, length, - // and text columns. Only the hex column can span multiple rows. - // TODO: It would be nice to allow important hex bytes (e.g. type descriptors or lengths) - // to be color-coded. This complicates the output function, however, as the length - // of a colored string is not the same as its display length. We would need to pass - // uncolored strings to the output function paired with the desired color/style so - // the output function could break the text into the necessary row lengths and then apply - // the provided colors just before writing. - - // Write the offset column - if let Some(offset) = offset { - write!(output, "{:9}{}", offset, COLUMN_DELIMITER)?; - } else { - write!(output, "{:9}{}", "", COLUMN_DELIMITER)?; - } - - // Write the length column - if let Some(length) = length { - write!(output, "{:9}{}", length, COLUMN_DELIMITER)?; - } else { - write!(output, "{:9}{}", "", COLUMN_DELIMITER)?; - } - - // If the hex string is short enough to fit in a single row... - if hex_column.len() < HEX_COLUMN_SIZE { - // ...print the hex string... - write!(output, "{}", hex_column)?; - // ...and then write enough padding spaces to fill the rest of the row. - for _ in 0..(HEX_COLUMN_SIZE - hex_column.len()) { - write!(output, " ")?; - } - } else { - // Otherwise, write the first row's worth of the hex string. - write!(output, "{}", &hex_column[..HEX_COLUMN_SIZE])?; - } - // Write a delimiter, the write the text Ion as the final column. - write!(output, "{}", COLUMN_DELIMITER)?; - write!(output, " ")?; - writeln!(output, "{}{}", indentation, text_column)?; - - // Revisit our hex column. Write as many additional rows as needed. - let mut col_1_written = HEX_COLUMN_SIZE; - while col_1_written < hex_column.len() { - // Padding for offset column - write!(output, "{:9}{}", "", COLUMN_DELIMITER)?; - // Padding for length column - write!(output, "{:9}{}", "", COLUMN_DELIMITER)?; - let remaining_bytes = hex_column.len() - col_1_written; - let bytes_to_write = min(remaining_bytes, HEX_COLUMN_SIZE); - let next_slice_to_write = &hex_column[col_1_written..(col_1_written + bytes_to_write)]; - write!(output, "{}", next_slice_to_write)?; - for _ in 0..(HEX_COLUMN_SIZE - bytes_to_write) { - write!(output, " ")?; - } - writeln!(output, "{}", COLUMN_DELIMITER)?; - col_1_written += HEX_COLUMN_SIZE; - // No need to write anything for the text column since it's the last one. +// ===== Named styles ===== + +fn header_style() -> ColorSpec { + let mut style = ColorSpec::new(); + style.set_bold(true).set_intense(true); + style +} + +fn comment_style() -> ColorSpec { + let mut style = ColorSpec::new(); + style.set_dimmed(true); + style +} + +fn text_ion_style() -> ColorSpec { + let mut style = ColorSpec::new(); + style.set_fg(Some(Color::Rgb(255, 255, 255))); + style +} + +fn field_id_style() -> ColorSpec { + let mut style = ColorSpec::new(); + style.set_fg(Some(Color::Cyan)).set_intense(true); + style +} + +fn annotations_style() -> ColorSpec { + let mut style = ColorSpec::new(); + style.set_fg(Some(Color::Magenta)); + style +} + +/// Kinds of encoding primitives found in a binary Ion stream. +#[derive(Copy, Clone, Debug)] +enum BytesKind { + FieldId, + Opcode, + TrailingLength, + ValueBody, + AnnotationsHeader, + AnnotationsSequence, + VersionMarker, +} + +impl BytesKind { + /// Returns a [`ColorSpec`] that should be used when printing bytes of the specified `BytesKind`. + fn style(&self) -> ColorSpec { + use BytesKind::*; + let mut color = ColorSpec::new(); + match self { + VersionMarker => color.set_fg(Some(Color::Yellow)).set_intense(true), + FieldId => color.set_fg(Some(Color::Cyan)).set_intense(true), + Opcode => color + .set_bold(true) + .set_fg(Some(Color::Rgb(0, 0, 0))) + .set_bg(Some(Color::Rgb(255, 255, 255))), + + TrailingLength => color + .set_bold(true) + .set_underline(true) + .set_fg(Some(Color::White)) + .set_intense(true), + ValueBody => color + .set_bold(false) + .set_fg(Some(Color::White)) + .set_intense(false), + AnnotationsHeader => color + .set_bold(false) + .set_fg(Some(Color::Black)) + .set_bg(Some(Color::Magenta)), + AnnotationsSequence => color.set_bold(false).set_fg(Some(Color::Magenta)), + }; + color } - Ok(()) } -fn closing_delimiter_for(container_type: IonType) -> &'static str { - match container_type { - IonType::List => "]", - IonType::SExp => ")", - IonType::Struct => "}", - _ => panic!("Attempted to close non-container type {:?}", container_type), +/// A slice of Ion bytes to be printed in the `Binary Ion` column. +/// +/// Each `IonBytes` has a `BytesKind` that maps to a display style as well as a counter tracking +/// how many of its bytes have been printed so far. +#[derive(Copy, Clone, Debug)] +struct IonBytes<'a> { + // The actual slice of bytes + pub bytes: &'a [u8], + // What the slice of bytes represents in Ion + pub kind: BytesKind, + // How many of this slice's bytes have been printed so far. + pub bytes_written: usize, +} + +impl<'a> IonBytes<'a> { + fn new(kind: BytesKind, bytes: &'a [u8]) -> Self { + Self { + bytes, + kind, + bytes_written: 0, + } + } + + fn mark_bytes_written(&mut self, num_bytes: usize) { + self.bytes_written += num_bytes + } + + fn next_n_bytes(&self, num_bytes: usize) -> &[u8] { + &self.bytes[self.bytes_written..self.bytes_written + num_bytes] + } + + fn bytes_remaining(&self) -> usize { + self.bytes.len() - self.bytes_written + } + + fn is_empty(&self) -> bool { + self.bytes_remaining() == 0 + } + + fn style(&self) -> ColorSpec { + self.kind.style() } } -fn to_hex(buffer: &mut String, bytes: &[u8]) { - if bytes.is_empty() { - return; +/// Prints bytes as colorized, hex-encoded rows of a configurable size. +/// +/// Stores a sequence of [`IonBytes`] instances to display. Upon request, writes out the next `n` +/// colorized, hex-encoded bytes, remembering where to resume when the next row is needed. +struct BytesFormatter<'a> { + slices: Vec>, + slices_written: usize, + formatted_bytes_per_row: usize, +} + +impl<'a> BytesFormatter<'a> { + pub fn new(formatted_bytes_per_row: usize, slices: Vec>) -> Self { + Self { + slices, + slices_written: 0, + formatted_bytes_per_row, + } + } + + /// Writes a row of `n` hex-encoded, colorized bytes, where `n` is determined by the + /// `formatted_bytes_per_row` argument in [`BytesFormatter::new`]. + /// + /// If there are fewer than `n` bytes remaining, prints all remaining bytes. + pub fn write_row(&mut self, output: &mut impl WriteColor) -> Result<()> { + let num_bytes = self.formatted_bytes_per_row; + let bytes_written = self.write_bytes(num_bytes, output)?; + let bytes_remaining = num_bytes - bytes_written; + // If we printed fewer bytes than are needed to make a row, write out enough padding + // to keep the columns aligned. + for _ in 0..bytes_remaining { + write!(output, " ")?; // Empty space the width of a formatted byte + } + Ok(()) + } + + /// Helper method to iterate over the remaining [`IonBytes`], printing their contents until + /// `num_bytes` is reached. + fn write_bytes(&mut self, num_bytes: usize, output: &mut impl WriteColor) -> Result { + let mut bytes_remaining = num_bytes; + while bytes_remaining > 0 && !self.is_empty() { + bytes_remaining -= self.write_bytes_from_current_slice(bytes_remaining, output)?; + if self.is_empty() { + // Even though `bytes_remaining` hasn't reached zero, we're out of data. + break; + } + } + + Ok(num_bytes - bytes_remaining) + } + + /// Helper method to print up to `num_bytes` bytes from the current [`IonBytes`]. + fn write_bytes_from_current_slice( + &mut self, + num_bytes: usize, + output: &mut impl WriteColor, + ) -> Result { + let Some(slice) = self.current_slice() else { + // No more to write + return Ok(0); + }; + + if slice.bytes.len() == 0 { + self.slices_written += 1; + return Ok(0); + } + + // We're going to write whichever is smaller: + // 1. the requested number of bytes from the current slice + // OR + // 2. the number of bytes remaining in the current slice + let bytes_to_write = num_bytes.min(slice.bytes_remaining()); + + // Set the appropriate style for this byte slice. + let style: ColorSpec = slice.style(); + output.set_color(&style)?; + write!( + output, + "{}", + hex_contents(slice.next_n_bytes(bytes_to_write)) + )?; + slice.mark_bytes_written(bytes_to_write); + output.reset()?; + + // If we completed the slice OR we finished writing all of the requested bytes + if slice.is_empty() || num_bytes == bytes_to_write { + write!(output, " ")?; + } + + if slice.is_empty() { + // This slice has been exhausted, we should resume from the beginning of the next one. + self.slices_written += 1; + } + + Ok(bytes_to_write) + } + + /// Returns a reference to the [`IonBytes`] from which the next bytes should be pulled. + fn current_slice(&mut self) -> Option<&mut IonBytes<'a>> { + if self.is_empty() { + return None; + } + Some(&mut self.slices[self.slices_written]) } - write!(buffer, "{:02x}", bytes[0]).unwrap(); - for byte in &bytes[1..] { - write!(buffer, " {:02x}", *byte).unwrap(); + + /// Returns `true` if all of the slices have been exhausted. + fn is_empty(&self) -> bool { + self.slices_written == self.slices.len() } } -fn join_into( - buffer: &mut String, - delimiter: &str, - mut values: impl Iterator>, -) -> IonResult<()> { - if let Some(first) = values.next() { - write!(buffer, "{}", first?).unwrap(); +/// Converts the given byte slice to a string containing hex-encoded bytes +fn hex_contents(source: &[u8]) -> String { + if source.is_empty() { + return String::new(); } - for value in values { - write!(buffer, "{}{}", delimiter, value?).unwrap(); + use std::fmt::Write; + let mut buffer = String::new(); + let bytes = source.iter(); + + let mut is_first = true; + for byte in bytes { + if is_first { + write!(buffer, "{:02x?}", byte).unwrap(); + is_first = false; + continue; + } + write!(buffer, " {:02x?}", byte).unwrap(); } - Ok(()) + buffer } diff --git a/src/bin/ion/commands/beta/primitive.rs b/src/bin/ion/commands/beta/primitive.rs index 472a81a..591395f 100644 --- a/src/bin/ion/commands/beta/primitive.rs +++ b/src/bin/ion/commands/beta/primitive.rs @@ -1,7 +1,7 @@ use crate::commands::IonCliCommand; use anyhow::{Context, Result}; use clap::{Arg, ArgMatches, Command}; -use ion_rs::{VarInt, VarUInt}; +use ion_rs::v1_0::{VarInt, VarUInt}; pub struct PrimitiveCommand; diff --git a/src/bin/ion/commands/beta/symtab/filter.rs b/src/bin/ion/commands/beta/symtab/filter.rs index 375673f..1006aee 100644 --- a/src/bin/ion/commands/beta/symtab/filter.rs +++ b/src/bin/ion/commands/beta/symtab/filter.rs @@ -1,10 +1,9 @@ use crate::commands::{IonCliCommand, WithIonCliArgument}; use anyhow::{bail, Context, Result}; use clap::{Arg, ArgAction, ArgMatches, Command}; -use ion_rs::RawBinaryReader; -use ion_rs::{IonReader, IonResult, IonType, SystemReader, SystemStreamItem}; -use memmap::MmapOptions; +use ion_rs::*; use std::fs::File; +use std::io; use std::io::{stdout, BufWriter, Write}; pub struct SymtabFilterCommand; @@ -15,8 +14,7 @@ impl IonCliCommand for SymtabFilterCommand { } fn about(&self) -> &'static str { - // XXX Currently only supports binary input - "Filters user data out of a binary Ion stream, leaving only the symbol table(s) behind." + "Filters user data out of an Ion stream, leaving only the symbol table(s) behind." } fn configure_args(&self, command: Command) -> Command { @@ -52,21 +50,12 @@ impl IonCliCommand for SymtabFilterCommand { for input_file in input_file_names { let file = File::open(input_file.as_str()) .with_context(|| format!("Could not open file '{}'", &input_file))?; - - let mmap = unsafe { - MmapOptions::new() - .map(&file) - .with_context(|| format!("Could not mmap '{}'", input_file))? - }; - - // Treat the mmap as a byte array. - let ion_data: &[u8] = &mmap[..]; - let raw_reader = RawBinaryReader::new(ion_data); - let mut system_reader = SystemReader::new(raw_reader); - omit_user_data(ion_data, &mut system_reader, &mut output, lift_requested)?; + let mut system_reader = SystemReader::new(AnyEncoding, file); + filter_out_user_data(&mut system_reader, &mut output, lift_requested)?; } } else { - bail!("this command does not yet support reading from STDIN") + let mut system_reader = SystemReader::new(AnyEncoding, io::stdin().lock()); + filter_out_user_data(&mut system_reader, &mut output, lift_requested)?; } output.flush()?; @@ -74,28 +63,45 @@ impl IonCliCommand for SymtabFilterCommand { } } -pub fn omit_user_data( - ion_data: &[u8], - reader: &mut SystemReader>, +pub fn filter_out_user_data( + reader: &mut SystemReader, output: &mut Box, lift_requested: bool, -) -> IonResult<()> { +) -> Result<()> { loop { - match reader.next()? { - SystemStreamItem::VersionMarker(major, minor) => { - output.write_all(&[0xE0, major, minor, 0xEA])?; + match reader.next_item()? { + SystemStreamItem::VersionMarker(marker) => { + output.write_all(marker.span().bytes())?; } - SystemStreamItem::SymbolTableValue(IonType::Struct) => { - if !lift_requested { - output.write_all(reader.raw_annotations_bytes().unwrap_or(&[]))?; + SystemStreamItem::SymbolTable(symtab) => { + let Some(raw_value) = symtab.as_value().raw() else { + // This symbol table came from a macro expansion; there are no encoded bytes + // to pass through. + bail!("found an ephemeral symbol table, which is not yet supported") + }; + if lift_requested { + // Only pass through the value portion of the symbol table, stripping off the + // `$ion_symbol_table` annotation. + output.write_all(raw_value.value_span().bytes())?; + } else { + // Pass through the complete symbol table, preserving the `$ion_symbol_table` + // annotation. + output.write_all(raw_value.span().bytes())?; } - output.write_all(reader.raw_header_bytes().unwrap())?; - let body_range = reader.value_range(); - let body_bytes = &ion_data[body_range]; - output.write_all(body_bytes)?; } - SystemStreamItem::Nothing => return Ok(()), - _ => {} + SystemStreamItem::Value(_) => continue, + SystemStreamItem::EndOfStream(_) => { + return Ok(()); + } + _ => unreachable!("#[non_exhaustive] enum, current variants covered"), + }; + // If this is a text encoding, then we need delimiting space to separate + // IVMs from their neighboring system stream items. Consider: + // $ion_1_0$ion_1_0 + // or + // $ion_symbol_table::{}$ion_1_0$ion_symbol_table::{} + if reader.detected_encoding().is_text() { + output.write_all(&[b'\n']).unwrap() } } } diff --git a/src/bin/ion/commands/beta/to/json.rs b/src/bin/ion/commands/beta/to/json.rs index ec0fa2a..89f5864 100644 --- a/src/bin/ion/commands/beta/to/json.rs +++ b/src/bin/ion/commands/beta/to/json.rs @@ -1,12 +1,12 @@ use crate::commands::{IonCliCommand, WithIonCliArgument}; use anyhow::{Context, Result}; use clap::{ArgMatches, Command}; -use ion_rs::{Element, ElementReader}; -use ion_rs::{Reader, ReaderBuilder}; +use ion_rs::*; use serde_json::{Map, Number, Value as JsonValue}; use std::fs::File; use std::io::{stdin, stdout, BufWriter, Write}; use std::str::FromStr; +use zstd::zstd_safe::WriteBuf; pub struct ToJsonCommand; @@ -45,15 +45,13 @@ impl IonCliCommand for ToJsonCommand { for input_file in input_file_names { let file = File::open(input_file.as_str()) .with_context(|| format!("Could not open file '{}'", &input_file))?; - let mut reader = ReaderBuilder::new() - .build(file) + let mut reader = Reader::new(AnyEncoding, file) .with_context(|| format!("Input file {} was not valid Ion.", &input_file))?; convert(&mut reader, &mut output)?; } } else { // No input files were specified, run the converter on STDIN. - let mut reader = ReaderBuilder::new() - .build(stdin().lock()) + let mut reader = Reader::new(AnyEncoding, stdin().lock()) .with_context(|| "Input was not valid Ion.")?; convert(&mut reader, &mut output)?; } @@ -63,73 +61,77 @@ impl IonCliCommand for ToJsonCommand { } } -pub fn convert(reader: &mut Reader, output: &mut Box) -> Result<()> { +pub fn convert( + reader: &mut Reader, + output: &mut Box, +) -> Result<()> { const FLUSH_EVERY_N: usize = 100; - let mut element_count = 0usize; - for result in reader.elements() { - let element = result.with_context(|| "invalid input")?; - writeln!(output, "{}", to_json_value(&element)?)?; - element_count += 1; - if element_count % FLUSH_EVERY_N == 0 { + let mut value_count = 0usize; + while let Some(value) = reader.next()? { + writeln!(output, "{}", to_json_value(value)?)?; + value_count += 1; + if value_count % FLUSH_EVERY_N == 0 { output.flush()?; } } Ok(()) } -fn to_json_value(element: &Element) -> Result { - if element.is_null() { - Ok(JsonValue::Null) - } else { - use ion_rs::Value::*; - let value = match element.value() { - Null(_ion_type) => JsonValue::Null, - Bool(b) => JsonValue::Bool(*b), - Int(i) => JsonValue::Number( - Number::from_str(&(*i).to_string()) - .with_context(|| format!("{element} could not be turned into a Number"))?, - ), - Float(f) => { - let value = *f; - if value.is_finite() { - JsonValue::Number( - Number::from_f64(value).with_context(|| { - format!("{element} could not be turned into a Number") - })?, - ) - } else { - // +inf, -inf, and nan are not JSON numbers, and are written as null in - // accordance with Ion's JSON down-conversion guidelines. - JsonValue::Null - } +fn to_json_value(value: LazyValue) -> Result { + use ValueRef::*; + let value = match value.read()? { + Null(_) => JsonValue::Null, + Bool(b) => JsonValue::Bool(b), + Int(i) => JsonValue::Number(Number::from(i.expect_i128()?)), + Float(f) if f.is_finite() => JsonValue::Number(Number::from_f64(f).expect("f64 is finite")), + // Special floats like +inf, -inf, and NaN are written as `null` in + // accordance with Ion's JSON down-conversion guidelines. + Float(_f) => JsonValue::Null, + Decimal(d) => { + let mut text = d.to_string().replace('d', "e"); + if text.ends_with(".") { + // If there's a trailing "." with no digits of precision, discard it. JSON's `Number` + // type does not do anything with this information. + let _ = text.pop(); } - Decimal(d) => JsonValue::Number( - Number::from_str(d.to_string().replace('d', "e").as_str()) - .with_context(|| format!("{element} could not be turned into a Number"))?, - ), - Timestamp(t) => JsonValue::String(t.to_string()), - Symbol(s) => s - .text() - .map(|text| JsonValue::String(text.to_owned())) - .unwrap_or_else(|| JsonValue::Null), - String(s) => JsonValue::String(s.text().to_owned()), - Blob(b) | Clob(b) => { - use base64::{engine::general_purpose as base64_encoder, Engine as _}; - let base64_text = base64_encoder::STANDARD.encode(b.as_ref()); - JsonValue::String(base64_text) - } - List(s) | SExp(s) => { - let result: Result> = s.elements().map(to_json_value).collect(); - JsonValue::Array(result?) - } - Struct(s) => { - let result: Result> = s - .fields() - .map(|(k, v)| to_json_value(v).map(|value| (k.text().unwrap().into(), value))) - .collect(); - JsonValue::Object(result?) + JsonValue::Number( + Number::from_str(text.as_str()) + .with_context(|| format!("{d} could not be turned into a Number"))?, + ) + } + Timestamp(t) => JsonValue::String(t.to_string()), + Symbol(s) => s + .text() + .map(|text| JsonValue::String(text.to_owned())) + .unwrap_or_else(|| JsonValue::Null), + String(s) => JsonValue::String(s.text().to_owned()), + Blob(b) | Clob(b) => { + use base64::{engine::general_purpose as base64_encoder, Engine as _}; + let base64_text = base64_encoder::STANDARD.encode(b.as_slice()); + JsonValue::String(base64_text) + } + SExp(s) => to_json_array(s.iter())?, + List(l) => to_json_array(l.iter())?, + Struct(s) => { + let mut map = Map::new(); + for field in s { + let field = field?; + let name = field.name()?.text().unwrap_or("$0").to_owned(); + let value = to_json_value(field.value())?; + map.insert(name, value); } - }; - Ok(value) - } + JsonValue::Object(map) + } + }; + Ok(value) +} + +fn to_json_array<'a>( + ion_values: impl IntoIterator>>, +) -> Result { + let result: Result> = ion_values + .into_iter() + .flat_map(|v| v.map(to_json_value)) + .collect(); + Ok(JsonValue::Array(result?)) } diff --git a/src/bin/ion/commands/dump.rs b/src/bin/ion/commands/dump.rs index 4b7ce53..20afae7 100644 --- a/src/bin/ion/commands/dump.rs +++ b/src/bin/ion/commands/dump.rs @@ -69,24 +69,26 @@ impl IonCliCommand for DumpCommand { for input_file in input_file_iter { let file = File::open(input_file) .with_context(|| format!("Could not open file '{}'", input_file))?; - let mut reader = if let Some(true) = args.get_one::("no-auto-decompress") { - ReaderBuilder::new().build(file)? + if let Some(true) = args.get_one::("no-auto-decompress") { + let mut reader = Reader::new(AnyEncoding, file)?; + write_in_format(&mut reader, &mut output, format, values)?; } else { let bfile = BufReader::with_capacity(BUF_READER_CAPACITY, file); let zfile = auto_decompressing_reader(bfile, INFER_HEADER_LENGTH)?; - ReaderBuilder::new().build(zfile)? + let mut reader = Reader::new(AnyEncoding, zfile)?; + write_in_format(&mut reader, &mut output, format, values)?; }; - write_in_format(&mut reader, &mut output, format, values)?; } } else { let input: StdinLock = stdin().lock(); - let mut reader = if let Some(true) = args.get_one::("no-auto-decompress") { - ReaderBuilder::new().build(input)? + if let Some(true) = args.get_one::("no-auto-decompress") { + let mut reader = Reader::new(AnyEncoding, input)?; + write_in_format(&mut reader, &mut output, format, values)?; } else { let zinput = auto_decompressing_reader(input, INFER_HEADER_LENGTH)?; - ReaderBuilder::new().build(zinput)? + let mut reader = Reader::new(AnyEncoding, zinput)?; + write_in_format(&mut reader, &mut output, format, values)?; }; - write_in_format(&mut reader, &mut output, format, values)?; } output.flush()?; @@ -104,39 +106,27 @@ pub(crate) fn run(_command: &str, args: &ArgMatches) -> Result<()> { /// Constructs the appropriate writer for the given format, then writes all values found in the /// Reader to the new Writer. If `count` is specified will write at most `count` values. -pub(crate) fn write_in_format( - reader: &mut Reader, +pub(crate) fn write_in_format( + reader: &mut Reader, output: &mut Box, format: &str, count: Option, ) -> IonResult { - // XXX: The text formats below each have additional logic to append a newline because the - // ion-rs writer doesn't handle this automatically like it should. - //TODO: Solve these newline issues, get rid of hack - // https://github.com/amazon-ion/ion-cli/issues/36 - // https://github.com/amazon-ion/ion-rust/issues/437 - const NEWLINE: u8 = 0x0A; let written = match format { "pretty" => { - let mut writer = TextWriterBuilder::pretty().build(output)?; - let values_written = transcribe_n_values(reader, &mut writer, count)?; - writer.output_mut().write_all(&[NEWLINE])?; - Ok(values_written) + let mut writer = Writer::new(v1_0::Text.with_format(TextFormat::Pretty), output)?; + transcribe_n_values(reader, &mut writer, count) } "text" => { - let mut writer = TextWriterBuilder::default().build(output)?; - let values_written = transcribe_n_values(reader, &mut writer, count)?; - writer.output_mut().write_all(&[NEWLINE])?; - Ok(values_written) + let mut writer = Writer::new(v1_0::Text.with_format(TextFormat::Compact), output)?; + transcribe_n_values(reader, &mut writer, count) } "lines" => { - let mut writer = TextWriterBuilder::lines().build(output)?; - let values_written = transcribe_n_values(reader, &mut writer, count)?; - writer.output_mut().write_all(&[NEWLINE])?; - Ok(values_written) + let mut writer = Writer::new(v1_0::Text.with_format(TextFormat::Lines), output)?; + transcribe_n_values(reader, &mut writer, count) } "binary" => { - let mut writer = BinaryWriterBuilder::new().build(output)?; + let mut writer = Writer::new(v1_0::Binary, output)?; transcribe_n_values(reader, &mut writer, count) } unrecognized => unreachable!( @@ -149,95 +139,36 @@ pub(crate) fn write_in_format( /// Writes each value encountered in the Reader to the provided IonWriter. If `count` is specified /// will write at most `count` values. -fn transcribe_n_values( - reader: &mut Reader, - writer: &mut W, +fn transcribe_n_values( + reader: &mut Reader, + writer: &mut Writer>, count: Option, ) -> IonResult { const FLUSH_EVERY_N: usize = 100; let mut values_since_flush: usize = 0; - let mut annotations = vec![]; - let mut index = 0; - loop { - // Could use Option::is_some_and if that reaches stable - if reader.depth() == 0 && matches!(count, Some(n) if n <= index) { + let max_items = count.unwrap_or(usize::MAX); + let mut index: usize = 0; + + while let Some(value) = reader.next()? { + if index >= max_items { break; } - match reader.next()? { - StreamItem::Value(ion_type) | StreamItem::Null(ion_type) => { - if reader.has_annotations() { - annotations.clear(); - for annotation in reader.annotations() { - annotations.push(annotation?); - } - writer.set_annotations(&annotations); - } + writer.write(value)?; - if reader.parent_type() == Some(IonType::Struct) { - writer.set_field_name(reader.field_name()?); - } - - if reader.is_null() { - writer.write_null(ion_type)?; - continue; - } - - use IonType::*; - match ion_type { - Null => unreachable!("null values are handled prior to this match"), - Bool => writer.write_bool(reader.read_bool()?)?, - Int => writer.write_int(&reader.read_int()?)?, - Float => { - let float64 = reader.read_f64()?; - let float32 = float64 as f32; - if float32 as f64 == float64 { - // No data lost during cast; write it as an f32 - writer.write_f32(float32)?; - } else { - writer.write_f64(float64)?; - } - } - Decimal => writer.write_decimal(&reader.read_decimal()?)?, - Timestamp => writer.write_timestamp(&reader.read_timestamp()?)?, - Symbol => writer.write_symbol(reader.read_symbol()?)?, - String => writer.write_string(reader.read_string()?)?, - Clob => writer.write_clob(reader.read_clob()?)?, - Blob => writer.write_blob(reader.read_blob()?)?, - List => { - reader.step_in()?; - writer.step_in(List)?; - } - SExp => { - reader.step_in()?; - writer.step_in(SExp)?; - } - Struct => { - reader.step_in()?; - writer.step_in(Struct)?; - } - } - } - StreamItem::Nothing if reader.depth() > 0 => { - reader.step_out()?; - writer.step_out()?; - } - StreamItem::Nothing => break, - } - if reader.depth() == 0 { - index += 1; - values_since_flush += 1; - if values_since_flush == FLUSH_EVERY_N { - writer.flush()?; - values_since_flush = 0; - } + index += 1; + values_since_flush += 1; + if values_since_flush == FLUSH_EVERY_N { + writer.flush()?; + values_since_flush = 0; } } + writer.flush()?; Ok(index) } -/// Autodetects a compressed byte stream and wraps the original reader +/// Auto-detects a compressed byte stream and wraps the original reader /// into a reader that transparently decompresses. /// /// To support non-seekable readers like `Stdin`, we could have used a diff --git a/src/bin/ion/commands/mod.rs b/src/bin/ion/commands/mod.rs index d55f250..4e84038 100644 --- a/src/bin/ion/commands/mod.rs +++ b/src/bin/ion/commands/mod.rs @@ -1,5 +1,6 @@ use anyhow::anyhow; use clap::{crate_authors, crate_version, Arg, ArgAction, ArgMatches, Command as ClapCommand}; + pub mod beta; pub mod dump; @@ -70,10 +71,6 @@ pub trait IonCliCommand { } } - /// Sets up the pager (e.g. `less`) to which long text output will be directed. The default - /// implementation does not configure a pager. - fn set_up_pager(&self) {} - /// The core logic of the command. /// /// The default implementation assumes this command is a namespace (i.e. a group of subcommands). diff --git a/src/bin/ion/file_writer.rs b/src/bin/ion/file_writer.rs new file mode 100644 index 0000000..d73e57f --- /dev/null +++ b/src/bin/ion/file_writer.rs @@ -0,0 +1,57 @@ +use std::fs::File; +use std::io; +use std::io::{BufWriter, Write}; +use termcolor::{ColorSpec, WriteColor}; + +/// A buffered `io::Write` implementation that implements [`WriteColor`] by reporting that it does +/// not support TTY escape sequences and treating all requests to change or reset the current color +/// as no-ops. +// +// When writing to a file instead of a TTY, we don't want to use `termcolor` escape sequences as +// they would be stored as literal bytes rather than being interpreted. To achieve this, we need an +// `io::Write` implementation that also implements `termcolor`'s `WriteColor` trait. `WriteColor` +// allows the type to specify to whether it supports interpreting escape codes. +// +// We cannot implement `WriteColor` for `BufWriter` directly due to Rust's coherence rules. Our +// crate must own the trait, the implementing type, or both. The `FileWriter` type defined below +// is a simple wrapper around a `BufWriter` that implements both `io::Write` and `termcolor`'s +// `WriteColor` trait. +pub struct FileWriter { + inner: BufWriter, +} + +impl FileWriter { + pub fn new(file: File) -> Self { + Self { + inner: BufWriter::new(file), + } + } +} + +// Delegates all `io::Write` methods to the nested `BufWriter`. +impl Write for FileWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.inner.write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + +impl WriteColor for FileWriter { + fn supports_color(&self) -> bool { + // FileWriter is never used to write to a TTY, so it does not support escape codes. + false + } + + fn set_color(&mut self, _spec: &ColorSpec) -> io::Result<()> { + // When asked to change the color spec, do nothing. + Ok(()) + } + + fn reset(&mut self) -> io::Result<()> { + // When asked to reset the color spec to the default settings, do nothing. + Ok(()) + } +} diff --git a/src/bin/ion/main.rs b/src/bin/ion/main.rs index 048ec39..49dca52 100644 --- a/src/bin/ion/main.rs +++ b/src/bin/ion/main.rs @@ -1,4 +1,5 @@ mod commands; +mod file_writer; use crate::commands::beta::BetaNamespace; use anyhow::Result; @@ -18,7 +19,7 @@ fn main() -> Result<()> { // If `ion-cli` is being invoked as part of a pipeline we want to allow the pipeline to // to shut off without printing an error to stderr Some(IonError::Io(error)) if error.source().kind() == ErrorKind::BrokenPipe => { - return Ok(()) + return Ok(()); } _ => return Err(e), } diff --git a/tests/cli.rs b/tests/cli.rs index 44a5c71..e667e1f 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -2,7 +2,6 @@ use anyhow::Result; use assert_cmd::Command; use ion_rs::Element; use rstest::*; -use std::fs; use std::fs::File; use std::io::{Read, Write}; use std::time::Duration; @@ -213,14 +212,21 @@ fn test_write_all_values(#[case] number: i32, #[case] expected_output: &str) -> let command_assert = cmd.assert(); let output = command_assert.get_output(); let stdout = String::from_utf8_lossy(&output.stdout); - assert_eq!(stdout.trim_end(), expected_output); + assert_eq!( + Element::read_all(stdout.trim_end())?, + Element::read_all(expected_output)? + ); Ok(()) } #[cfg(feature = "experimental-code-gen")] -#[rstest] -#[case::simple_struct( - r#" +mod code_gen_tests { + use super::*; + use std::fs; + + #[rstest] + #[case::simple_struct( + r#" type::{ name: simple_struct, fields: { @@ -229,32 +235,32 @@ fn test_write_all_values(#[case] number: i32, #[case] expected_output: &str) -> }, } "#, - &["id: i64", "name: String"], - &["pub fn name(&self) -> &String {", "pub fn id(&self) -> &i64 {"] -)] -#[case::value_struct( - r#" + & ["id: i64", "name: String"], + & ["pub fn name(&self) -> &String {", "pub fn id(&self) -> &i64 {"] + )] + #[case::value_struct( + r#" type::{ name: value_struct, type: int // this will be a field in struct } "#, - &["value: i64"], - &["pub fn value(&self) -> &i64 {"] -)] -#[case::sequence_struct( - r#" + & ["value: i64"], + & ["pub fn value(&self) -> &i64 {"] + )] + #[case::sequence_struct( + r#" type::{ name: sequence_struct, element: string, // this will be a sequence field in struct type: list } "#, - &["value: Vec"], - &["pub fn value(&self) -> &Vec {"] -)] -#[case::struct_with_reference_field( - r#" + & ["value: Vec"], + & ["pub fn value(&self) -> &Vec {"] + )] + #[case::struct_with_reference_field( + r#" type::{ name: struct_with_reference_field, fields: { @@ -267,11 +273,11 @@ fn test_write_all_values(#[case] number: i32, #[case] expected_output: &str) -> type: int } "#, - &["reference: OtherType"], - &["pub fn reference(&self) -> &OtherType {"] -)] -#[case::struct_with_nested_type( - r#" + & ["reference: OtherType"], + & ["pub fn reference(&self) -> &OtherType {"] + )] + #[case::struct_with_nested_type( + r#" type::{ name: struct_with_nested_type, fields: { @@ -279,56 +285,55 @@ fn test_write_all_values(#[case] number: i32, #[case] expected_output: &str) -> } } "#, - &["nested_type: NestedType1"], - &["pub fn nested_type(&self) -> &NestedType1 {"] -)] -/// Calls ion-cli beta generate with different schema file. Pass the test if the return value contains the expected properties and accessors. -fn test_code_generation_in_rust( - #[case] test_schema: &str, - #[case] expected_properties: &[&str], - #[case] expected_accessors: &[&str], -) -> Result<()> { - let mut cmd = Command::cargo_bin("ion")?; - let temp_dir = TempDir::new()?; - let input_schema_path = temp_dir.path().join("test_schema.isl"); - let mut input_schema_file = File::create(&input_schema_path)?; - input_schema_file.write(test_schema.as_bytes())?; - input_schema_file.flush()?; - cmd.args([ - "beta", - "generate", - "--schema", - "test_schema.isl", - "--output", - temp_dir.path().to_str().unwrap(), - "--language", - "rust", - "--directory", - temp_dir.path().to_str().unwrap(), - ]); - let command_assert = cmd.assert(); - let output_file_path = temp_dir.path().join("ion_generated_code.rs"); - command_assert.success(); - let contents = - fs::read_to_string(output_file_path).expect("Should have been able to read the file"); - for expected_property in expected_properties { - assert!(contents.contains(expected_property)); - } - for expected_accessor in expected_accessors { - assert!(contents.contains(expected_accessor)); + & ["nested_type: NestedType1"], + & ["pub fn nested_type(&self) -> &NestedType1 {"] + )] + /// Calls ion-cli beta generate with different schema file. Pass the test if the return value contains the expected properties and accessors. + fn test_code_generation_in_rust( + #[case] test_schema: &str, + #[case] expected_properties: &[&str], + #[case] expected_accessors: &[&str], + ) -> Result<()> { + let mut cmd = Command::cargo_bin("ion")?; + let temp_dir = TempDir::new()?; + let input_schema_path = temp_dir.path().join("test_schema.isl"); + let mut input_schema_file = File::create(&input_schema_path)?; + input_schema_file.write(test_schema.as_bytes())?; + input_schema_file.flush()?; + cmd.args([ + "beta", + "generate", + "--schema", + "test_schema.isl", + "--output", + temp_dir.path().to_str().unwrap(), + "--language", + "rust", + "--directory", + temp_dir.path().to_str().unwrap(), + ]); + let command_assert = cmd.assert(); + let output_file_path = temp_dir.path().join("ion_generated_code.rs"); + command_assert.success(); + let contents = + fs::read_to_string(output_file_path).expect("Should have been able to read the file"); + for expected_property in expected_properties { + assert!(contents.contains(expected_property)); + } + for expected_accessor in expected_accessors { + assert!(contents.contains(expected_accessor)); + } + // verify that it generates read-write APIs + assert!(contents.contains("pub fn read_from(reader: &mut Reader) -> SerdeResult {")); + assert!(contents + .contains("pub fn write_to(&self, writer: &mut W) -> SerdeResult<()> {")); + Ok(()) } - // verify that it generates read-write APIs - assert!(contents.contains("pub fn read_from(reader: &mut Reader) -> SerdeResult {")); - assert!(contents - .contains("pub fn write_to(&self, writer: &mut W) -> SerdeResult<()> {")); - Ok(()) -} -#[cfg(feature = "experimental-code-gen")] -#[rstest] -#[case( - "SimpleStruct", - r#" + #[rstest] + #[case( + "SimpleStruct", + r#" type::{ name: simple_struct, fields: { @@ -337,35 +342,35 @@ fn test_code_generation_in_rust( } } "#, - &["private int id;", "private String name;"], - &["public String getName() {", "public int getId() {"] -)] -#[case( - "ValueStruct", - r#" + & ["private int id;", "private String name;"], + & ["public String getName() {", "public int getId() {"] + )] + #[case( + "ValueStruct", + r#" type::{ name: value_struct, type: int // this will be a field in struct } "#, - &["private int value;"], - &["public int getValue() {"] -)] -#[case( - "SequenceStruct", - r#" + & ["private int value;"], + & ["public int getValue() {"] + )] + #[case( + "SequenceStruct", + r#" type::{ name: sequence_struct, element: string, // this will be a sequence field in struct type: list } "#, - &["private ArrayList value;"], - &["public ArrayList getValue() {"] -)] -#[case( - "StructWithReferenceField", - r#" + & ["private ArrayList value;"], + & ["public ArrayList getValue() {"] + )] + #[case( + "StructWithReferenceField", + r#" type::{ name: struct_with_reference_field, fields: { @@ -378,12 +383,12 @@ fn test_code_generation_in_rust( type: int } "#, - &["private OtherType reference;"], - &["public OtherType getReference() {"] -)] -#[case( - "StructWithNestedType", - r#" + & ["private OtherType reference;"], + & ["public OtherType getReference() {"] + )] + #[case( + "StructWithNestedType", + r#" type::{ name: struct_with_nested_type, fields: { @@ -391,45 +396,47 @@ fn test_code_generation_in_rust( } } "#, - &["private NestedType1 nestedType;"], - &["public NestedType1 getNestedType() {"] -)] -/// Calls ion-cli beta generate with different schema file. Pass the test if the return value contains the expected properties and accessors. -fn test_code_generation_in_java( - #[case] test_name: &str, - #[case] test_schema: &str, - #[case] expected_properties: &[&str], - #[case] expected_accessors: &[&str], -) -> Result<()> { - let mut cmd = Command::cargo_bin("ion")?; - let temp_dir = TempDir::new()?; - let input_schema_path = temp_dir.path().join("test_schema.isl"); - let mut input_schema_file = File::create(&input_schema_path)?; - input_schema_file.write(test_schema.as_bytes())?; - input_schema_file.flush()?; - cmd.args([ - "beta", - "generate", - "--schema", - "test_schema.isl", - "--output", - temp_dir.path().to_str().unwrap(), - "--language", - "java", - "--namespace", - "org.example", - "--directory", - temp_dir.path().to_str().unwrap(), - ]); - let command_assert = cmd.assert(); - let output_file_path = temp_dir.path().join(format!("{}.java", test_name)); - command_assert.success(); - let contents = fs::read_to_string(output_file_path).expect("Can not read generated code file."); - for expected_property in expected_properties { - assert!(contents.contains(expected_property)); - } - for expected_accessor in expected_accessors { - assert!(contents.contains(expected_accessor)); + & ["private NestedType1 nestedType;"], + & ["public NestedType1 getNestedType() {"] + )] + /// Calls ion-cli beta generate with different schema file. Pass the test if the return value contains the expected properties and accessors. + fn test_code_generation_in_java( + #[case] test_name: &str, + #[case] test_schema: &str, + #[case] expected_properties: &[&str], + #[case] expected_accessors: &[&str], + ) -> Result<()> { + let mut cmd = Command::cargo_bin("ion")?; + let temp_dir = TempDir::new()?; + let input_schema_path = temp_dir.path().join("test_schema.isl"); + let mut input_schema_file = File::create(&input_schema_path)?; + input_schema_file.write(test_schema.as_bytes())?; + input_schema_file.flush()?; + cmd.args([ + "beta", + "generate", + "--schema", + "test_schema.isl", + "--output", + temp_dir.path().to_str().unwrap(), + "--language", + "java", + "--namespace", + "org.example", + "--directory", + temp_dir.path().to_str().unwrap(), + ]); + let command_assert = cmd.assert(); + let output_file_path = temp_dir.path().join(format!("{}.java", test_name)); + command_assert.success(); + let contents = + fs::read_to_string(output_file_path).expect("Can not read generated code file."); + for expected_property in expected_properties { + assert!(contents.contains(expected_property)); + } + for expected_accessor in expected_accessors { + assert!(contents.contains(expected_accessor)); + } + Ok(()) } - Ok(()) } diff --git a/tests/code-gen-tests.rs b/tests/code-gen-tests.rs index 3632d2e..77f9ed5 100644 --- a/tests/code-gen-tests.rs +++ b/tests/code-gen-tests.rs @@ -1,12 +1,12 @@ +#![cfg(feature = "experimental-code-gen")] + use anyhow::Result; use assert_cmd::Command; use rstest::rstest; -use std::fs; use std::fs::File; use std::io::Write; use tempfile::TempDir; -#[cfg(feature = "experimental-code-gen")] #[test] fn roundtrip_tests_for_generated_code_gradle() -> Result<()> { // run the gradle project defined under `code-gen-projects`, @@ -39,7 +39,6 @@ fn roundtrip_tests_for_generated_code_gradle() -> Result<()> { Ok(()) } -#[cfg(feature = "experimental-code-gen")] #[test] fn roundtrip_tests_for_generated_code_cargo() -> Result<()> { // run the cargo project defined under `code-gen-projects`, @@ -93,7 +92,7 @@ fn roundtrip_tests_for_generated_code_cargo() -> Result<()> { #[cfg(feature = "experimental-code-gen")] #[rstest] #[case::any_element_list( - r#" +r#" type::{ name: any_element_list, type: list, // this doesn't specify the type for elements in the list with `element` constraint