From a448c887a3d4864a70d82fcd125c3340c5af13e2 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Tue, 30 Apr 2024 21:02:56 -0400 Subject: [PATCH 01/13] Adds new `ion beta inspect` --- Cargo.lock | 95 +- Cargo.toml | 9 +- src/bin/ion/commands/beta/inspect.rs | 1387 +++++++++++++++----------- src/bin/ion/commands/beta/mod.rs | 6 +- src/bin/ion/file_writer.rs | 55 + src/bin/ion/main.rs | 3 +- 6 files changed, 984 insertions(+), 571 deletions(-) create mode 100644 src/bin/ion/file_writer.rs diff --git a/Cargo.lock b/Cargo.lock index e06479d..8abd322 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -130,6 +130,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bigdecimal" version = "0.3.1" @@ -179,9 +185,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.15.1" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c764d619ca78fccbf3069b37bd7af92577f044bb15236036662d79b6559f25b7" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "byteorder" @@ -437,6 +443,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "delegate" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e018fccbeeb50ff26562ece792ed06659b9c2dae79ece77c4456bb10d9bf79b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.50", +] + [[package]] name = "deranged" version = "0.3.11" @@ -749,6 +766,12 @@ dependencies = [ "cc", ] +[[package]] +name = "ice_code" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6524844f553e8da5999f3000cf11d3f1ff926bb03fc087441c7b86dee4a7d48" + [[package]] name = "ident_case" version = "1.0.1" @@ -790,6 +813,7 @@ checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" dependencies = [ "equivalent", "hashbrown 0.14.3", + "serde", ] [[package]] @@ -833,7 +857,8 @@ dependencies = [ "convert_case", "flate2", "infer", - "ion-rs 1.0.0-rc.3", + "ion-rs 1.0.0-rc.3 (registry+https://github.com/rust-lang/crates.io-index)", + "ion-rs 1.0.0-rc.3 (git+https://github.com/amazon-ion/ion-rust.git)", "ion-schema", "matches", "memmap", @@ -843,6 +868,7 @@ dependencies = [ "serde_json", "tempfile", "tera", + "termcolor", "thiserror", "zstd", ] @@ -884,7 +910,27 @@ dependencies = [ "num-integer", "num-traits", "serde", - "serde_with", + "serde_with 2.3.3", + "smallvec", + "thiserror", +] + +[[package]] +name = "ion-rs" +version = "1.0.0-rc.3" +source = "git+https://github.com/amazon-ion/ion-rust.git#eab334e79e420d5a5928918ce7f1ac57741f9d3a" +dependencies = [ + "arrayvec", + "base64 0.12.3", + "bumpalo", + "chrono", + "delegate 0.12.0", + "ice_code", + "nom", + "num-integer", + "num-traits", + "serde", + "serde_with 3.8.1", "smallvec", "thiserror", ] @@ -1445,7 +1491,25 @@ dependencies = [ "indexmap 1.9.3", "serde", "serde_json", - "serde_with_macros", + "serde_with_macros 2.3.3", + "time", +] + +[[package]] +name = "serde_with" +version = "3.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ad483d2ab0149d5a5ebcd9972a3852711e0153d863bf5a5d0391d28883c4a20" +dependencies = [ + "base64 0.22.1", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.2.3", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros 3.8.1", "time", ] @@ -1461,6 +1525,18 @@ dependencies = [ "syn 2.0.50", ] +[[package]] +name = "serde_with_macros" +version = "3.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65569b702f41443e8bc8bbb1c5779bd0450bbe723b56198980e80ec45780bce2" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.50", +] + [[package]] name = "sha2" version = "0.10.8" @@ -1572,6 +1648,15 @@ dependencies = [ "unic-segment", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "termtree" version = "0.4.1" diff --git a/Cargo.toml b/Cargo.toml index 2bc0e43..f71dab3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,18 +17,21 @@ clap = { version = "4.0.17", features = ["cargo"] } colored = "2.0.0" flate2 = "1.0" infer = "0.15.0" -ion-rs = {version = "1.0.0-rc.2", features = ["experimental"]} +ion-rs = { version = "1.0.0-rc.2", features = ["experimental"] } +#new-ion-rs = { package = "ion-rs", version = "1.0.0-rc.3", path = "../ion-rust", features = ["experimental"] } +new-ion-rs = { package = "ion-rs", git = "https://github.com/amazon-ion/ion-rust.git", features = ["experimental"] } memmap = "0.7.0" tempfile = "3.2.0" ion-schema = "0.10.0" serde = { version = "1.0.163", features = ["derive"] } -serde_json = { version = "1.0.81", features = [ "arbitrary_precision", "preserve_order" ] } +serde_json = { version = "1.0.81", features = ["arbitrary_precision", "preserve_order"] } base64 = "0.21.1" -tera = { version = "1.18.1", optional = true } +tera = { version = "1.18.1", optional = true } convert_case = { version = "0.6.0", optional = true } matches = "0.1.10" thiserror = "1.0.50" zstd = "0.13.0" +termcolor = "1.4.1" [target.'cfg(not(target_os = "windows"))'.dependencies] pager = "0.16.1" diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index 04bd896..a17666b 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -1,19 +1,27 @@ -use std::cmp::min; -use std::fmt::{Display, Write}; +use std::fmt::Display; use std::fs::File; use std::io; -use std::io::BufWriter; -use std::ops::Range; -use std::str::{from_utf8_unchecked, FromStr}; +use std::io::Write; +use std::str::FromStr; -use crate::commands::{IonCliCommand, WithIonCliArgument}; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use clap::{Arg, ArgMatches, Command}; -use colored::Colorize; -use ion_rs::*; -use memmap::MmapOptions; -#[cfg(not(target_os = "windows"))] -use pager::Pager; +use new_ion_rs::*; + +use crate::commands::{IonCliCommand, WithIonCliArgument}; + +// The `inspect` command uses the `termcolor` crate to colorize its text when STDOUT is a TTY. +use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, StandardStreamLock, WriteColor}; +// When writing to a named file instead of STDOUT, `inspect` will use a `FileWriter` instead. +// `FileWriter` ignores all requests to emit TTY color escape codes. +use crate::file_writer::FileWriter; + + +// * The output stream could be STDOUT or a file handle, so we use `dyn io::Write` to abstract +// over the two implementations. +// * The Drop implementation will ensure that the output stream is flushed when the last reference +// is dropped, so we don't need to do that manually. +type OutputRef<'a> = Box; pub struct InspectCommand; @@ -40,12 +48,11 @@ impl IonCliCommand for InspectCommand { .help("Do not display any user values for the first `n` bytes of Ion data.") .long_help( "When specified, the inspector will skip ahead `n` bytes before -beginning to display the contents of the stream. System values like -Ion version markers and symbol tables in the bytes being skipped will -still be displayed. If the requested number of bytes falls in the -middle of a value, the whole value (complete with field ID and -annotations if applicable) will be displayed. If the value is nested -in one or more containers, those containers will be displayed too.", +beginning to display the contents of the stream. If the requested number +of bytes falls in the middle of a scalar, the whole value (complete with +field ID and annotations if applicable) will be displayed. If the value +is nested in one or more containers, the opening delimiters of those +containers be displayed.", ), ) .arg( @@ -58,24 +65,28 @@ in one or more containers, those containers will be displayed too.", .help("Only display the next 'n' bytes of Ion data.") .long_help( "When specified, the inspector will stop printing values after -processing `n` bytes of Ion data. If `n` falls within a value, the -complete value will be displayed.", +processing `n` bytes of Ion data. If `n` falls within a scalar, the +complete value will be displayed. If `n` falls within one or more containers, +the closing delimiters for those containers will be displayed. If this flag +is used with `--skip-bytes`, `n` is counted from the beginning of the first +value start after `--skip-bytes`. +", ), ) } - #[cfg(not(target_os = "windows"))] // TODO find a cross-platform pager implementation. - fn set_up_pager(&self) { - // Direct output to the pager specified by the PAGER environment variable, or "less -FIRX" - // if the environment variable is not set. Note: a pager is not used if the output is not - // a TTY. - Pager::with_default_pager("less -FIRX").setup(); - } - fn run(&self, _command_path: &mut Vec, args: &ArgMatches) -> Result<()> { - self.set_up_pager(); + // On macOS and Linux, the `inspect` command's output will automatically be rerouted to a paging + // utility like `less` when STDOUT is a TTY. + // TODO find a cross-platform pager implementation. + #[cfg(not(target_os = "windows"))] + { + // If STDOUT is a TTY, direct output to the pager specified by the PAGER environment + // variable, or "less -FIRX" if the environment variable is not set. + pager::Pager::with_default_pager("less -FIRX").setup(); + } - // --skip-bytes has a default value, so we can unwrap this safely. + // `--skip-bytes` has a default value, so we can unwrap this safely. let skip_bytes_arg = args.get_one::("skip-bytes").unwrap().as_str(); let bytes_to_skip = usize::from_str(skip_bytes_arg) @@ -83,7 +94,7 @@ complete value will be displayed.", // will be displayed if it bubbles up to the end user. .with_context(|| format!("Invalid value for '--skip-bytes': '{}'", skip_bytes_arg))?; - // --limit-bytes has a default value, so we can unwrap this safely. + // `--limit-bytes` has a default value, so we can unwrap this safely. let limit_bytes_arg = args.get_one::("limit-bytes").unwrap().as_str(); let mut limit_bytes = usize::from_str(limit_bytes_arg) @@ -92,18 +103,28 @@ complete value will be displayed.", // If unset, --limit-bytes is effectively usize::MAX. However, it's easier on users if we let // them specify "0" on the command line to mean "no limit". if limit_bytes == 0 { - limit_bytes = usize::MAX + limit_bytes = usize::MAX; } + // These types are provided by the `termcolor` crate. They wrap the normal `io::Stdout` and + // `io::StdOutLock` types, making it possible to write colorful text to the output stream when + // it's a TTY that understands formatting escape codes. These variables are declared here so + // the lifetime will extend through the remainder of the function. Unlike `io::StdoutLock`, + // the `StandardStreamLock` does not have a static lifetime. + let stdout: StandardStream; + let stdout_lock: StandardStreamLock<'_>; + // If the user has specified an output file, use it. let mut output: OutputRef = if let Some(file_name) = args.get_one::("output") { let output_file = File::create(file_name) - .with_context(|| format!("Could not open '{}'", file_name))?; - let buf_writer = BufWriter::new(output_file); - Box::new(buf_writer) + .with_context(|| format!("Could not open output file '{file_name}' for writing"))?; + let file_writer = FileWriter::new(output_file); + Box::new(file_writer) } else { // Otherwise, write to STDOUT. - Box::new(io::stdout().lock()) + stdout = StandardStream::stdout(ColorChoice::Always); + stdout_lock = stdout.lock(); + Box::new(stdout_lock) }; // Run the inspector on each input file that was specified. @@ -111,7 +132,7 @@ complete value will be displayed.", for input_file_name in input_file_iter { let input_file = File::open(input_file_name) .with_context(|| format!("Could not open '{}'", input_file_name))?; - inspect_file( + inspect_input( input_file_name, input_file, &mut output, @@ -120,32 +141,11 @@ complete value will be displayed.", )?; } } else { + let stdin_lock = io::stdin().lock(); // If no input file was specified, run the inspector on STDIN. - - // The inspector expects its input to be a byte array or mmap()ed file acting as a byte - // array. If the user wishes to provide data on STDIN, we'll need to copy those bytes to - // a temporary file and then read from that. - - // Create a temporary file that will delete itself when the program ends. - let mut input_file = tempfile::tempfile().with_context(|| { - concat!( - "Failed to create a temporary file to store STDIN.", - "Try passing an --input flag instead." - ) - })?; - - // Pipe the data from STDIN to the temporary file. - let mut writer = BufWriter::new(input_file); - io::copy(&mut io::stdin(), &mut writer) - .with_context(|| "Failed to copy STDIN to a temp file.")?; - // Get our file handle back from the BufWriter - input_file = writer - .into_inner() - .with_context(|| "Failed to read from temp file containing STDIN data.")?; - // Read from the now-populated temporary file. - inspect_file( - "STDIN temp file", - input_file, + inspect_input( + "STDIN", + stdin_lock, &mut output, bytes_to_skip, limit_bytes, @@ -155,578 +155,847 @@ complete value will be displayed.", } } -// Create a type alias to simplify working with a shared reference to our output stream. -type OutputRef = Box; -// * The output stream could be STDOUT or a file handle, so we use `dyn io::Write` to abstract -// over the two implementations. -// * The Drop implementation will ensure that the output stream is flushed when the last reference -// is dropped, so we don't need to do that manually. - -// Given a file, try to mmap() it and run the inspector over the resulting byte array. -fn inspect_file( - input_file_name: &str, - input_file: File, +/// Prints a table showing the offset, length, binary encoding, and text encoding of the Ion stream +/// contained in `input`. +fn inspect_input( + input_name: &str, + input: Input, output: &mut OutputRef, bytes_to_skip: usize, limit_bytes: usize, ) -> Result<()> { - // mmap involves operating system interactions that inherently place its usage outside of Rust's - // safety guarantees. If the file is unexpectedly truncated while it's being read, for example, - // problems could arise. - let mmap = unsafe { - MmapOptions::new() - .map(&input_file) - .with_context(|| format!("Could not mmap '{}'", input_file_name))? - }; - - // Treat the mmap as a byte array. - let ion_data: &[u8] = &mmap[..]; - // Confirm that the input data is binary Ion, then run the inspector. - match ion_data { - // Pattern match the byte array to verify it starts with an IVM - [0xE0, 0x01, 0x00, 0xEA, ..] => { - write_header(output)?; - let mut inspector = IonInspector::new(ion_data, output, bytes_to_skip, limit_bytes)?; - // This inspects all values at the top level, recursing as necessary. - inspector.inspect_level()?; - } - _ => { - // bail! constructs an `anyhow::Result` with the given context and returns. - bail!( - "Input file '{}' does not appear to be binary Ion.", - input_file_name - ); - } - }; + let mut reader = SystemReader::new(AnyEncoding, input); + let mut inspector = IonInspector::new(output, bytes_to_skip, limit_bytes)?; + // This inspects all values at the top level, recursing as necessary. + inspector.inspect_top_level(&mut reader) + .with_context(|| format!("input: {input_name}"))?; Ok(()) } -const IVM_HEX: &str = "e0 01 00 ea"; -const IVM_TEXT: &str = "// Ion 1.0 Version Marker"; -// System events (IVM, symtabs) are always at the top level. -const SYSTEM_EVENT_INDENTATION: &str = ""; -const LEVEL_INDENTATION: &str = " "; // 2 spaces per level -const TEXT_WRITER_INITIAL_BUFFER_SIZE: usize = 128; - -struct IonInspector<'a> { - output: &'a mut OutputRef, - reader: SystemReader>, +// See the Wikipedia page for Unicode Box Drawing[1] for other potentially useful glyphs. +// [1] https://en.wikipedia.org/wiki/Box-drawing_characters#Unicode +const VERTICAL_LINE: &str = "│"; +const START_OF_HEADER: &str = "┌──────────────┬──────────────┬─────────────────────────┬──────────────────────┐"; +const END_OF_HEADER: &str = "├──────────────┼──────────────┼─────────────────────────┼──────────────────────┘"; +const ROW_SEPARATOR: &str = r#"├──────────────┼──────────────┼─────────────────────────┤ +"#; +const END_OF_TABLE: &str = r#"└──────────────┴──────────────┴─────────────────────────┘ +"#; + +struct IonInspector<'a, 'b> { + output: &'a mut OutputRef<'b>, bytes_to_skip: usize, + skip_complete: bool, limit_bytes: usize, - // Reusable buffer for formatting bytes as hex - hex_buffer: String, - // Reusable buffer for formatting text - text_buffer: String, - // Reusable buffer for colorizing text - color_buffer: String, - // Reusable buffer for tracking indentation - indentation_buffer: String, // Text Ion writer for formatting scalar values - text_ion_writer: RawTextWriter>, + text_writer: v1_0::RawTextWriter>, } -impl<'a> IonInspector<'a> { - fn new<'b>( - input: &'b [u8], - out: &'b mut OutputRef, +// This buffer is used by the IonInspector's `text_writer` to format scalar values. +const TEXT_WRITER_INITIAL_BUFFER_SIZE: usize = 128; + +// The number of hex-encoded bytes to show in each row of the `Binary Ion` column. +const BYTES_PER_ROW: usize = 8; + +impl<'a, 'b> IonInspector<'a, 'b> { + fn new( + out: &'a mut OutputRef<'b>, bytes_to_skip: usize, limit_bytes: usize, - ) -> IonResult> { - let reader = SystemReader::new(RawBinaryReader::new(input)); - let text_ion_writer = RawTextWriterBuilder::new(TextKind::Compact) - .build(Vec::with_capacity(TEXT_WRITER_INITIAL_BUFFER_SIZE))?; + ) -> IonResult> { + let text_writer = WriteConfig::::new(TextFormat::Compact) + .build_raw_writer(Vec::with_capacity(TEXT_WRITER_INITIAL_BUFFER_SIZE))?; let inspector = IonInspector { output: out, - reader, bytes_to_skip, + skip_complete: bytes_to_skip == 0, limit_bytes, - hex_buffer: String::new(), - text_buffer: String::new(), - color_buffer: String::new(), - indentation_buffer: String::new(), - text_ion_writer, + text_writer, }; Ok(inspector) } - // Returns the offset of the first byte that pertains to the value on which the reader is - // currently parked. - fn first_value_byte_offset(&self) -> usize { - if let Some(offset) = self.reader.field_id_offset() { - return offset; - } - if let Some(offset) = self.reader.annotations_offset() { - return offset; - } - self.reader.header_offset() - } - - // Returns the byte offset range containing the current value and its annotations/field ID if - // applicable. - fn complete_value_range(&self) -> Range { - let start = self.first_value_byte_offset(); - let end = self.reader.value_range().end; - start..end - } - - // Displays all of the values (however deeply nested) at the current level. - fn inspect_level(&mut self) -> Result<()> { - self.increase_indentation(); - - // Per-level bytes skipped are tracked so we can add them to the text Ion comments that - // appear each time some number of values is skipped. - let mut bytes_skipped_this_level = 0; - + /// Iterates over the items in `reader`, printing a table section for each top level value. + fn inspect_top_level(&mut self, reader: &mut SystemReader) -> Result<()> { + self.write_table_header()?; + let mut is_first_item = true; + let mut has_printed_skip_message = false; loop { - let ion_type = match self.reader.next()? { - SystemStreamItem::Nothing => break, - SystemStreamItem::VersionMarker(major, minor) => { - if major != 1 || minor != 0 { - bail!( - "Only Ion 1.0 is supported. Found IVM for v{}.{}", - major, - minor - ); - } - output( - self.output, - None, - Some(4), - SYSTEM_EVENT_INDENTATION, - IVM_HEX, - IVM_TEXT.dimmed(), - ) - .expect("output() failure from on_ivm()"); - continue; + let item = reader.next_item()?; + // If the next item isn't `EndOfStream`, check to see whether its final byte offset is + // beyond the configured number of bytes to skip before printing can begin (the value of + // the `--set-bytes` flag). + if !matches!(item, SystemStreamItem::EndOfStream(_)) && self.should_skip(item.raw_stream_item()) { + // If we need to skip it, print a message indicating that some number of items have been skipped. + if !has_printed_skip_message { + self.write_skipping_message(0, "stream items")?; + // We only print this message once, so remember that we've already done this. + has_printed_skip_message = true; } - // We don't care if this is a system or user-level value; that distinction - // is handled inside the SystemReader. - SystemStreamItem::SymbolTableValue(ion_type) - | SystemStreamItem::Value(ion_type) - | SystemStreamItem::SymbolTableNull(ion_type) - | SystemStreamItem::Null(ion_type) => ion_type, - }; - // See if we've already processed `bytes_to_skip` bytes; if not, move to the next value. - let complete_value_range = self.complete_value_range(); - if complete_value_range.end <= self.bytes_to_skip { - bytes_skipped_this_level += complete_value_range.len(); + // Skip ahead to the next stream item. continue; } - // Saturating subtraction: if the result would underflow, the answer will be zero. - let bytes_processed = complete_value_range - .start - .saturating_sub(self.bytes_to_skip); - // See if we've already processed `limit_bytes`; if so, stop processing. - if bytes_processed >= self.limit_bytes { - let limit_message = if self.reader.depth() > 0 { - "// --limit-bytes reached, stepping out." - } else { - "// --limit-bytes reached, ending." - }; - output( - self.output, - None, - None, - &self.indentation_buffer, - "...", - limit_message.dimmed(), - )?; - self.decrease_indentation(); + // Also check the final byte offset to see if it goes beyond the processing limit set by + // the `--limit-bytes` flag. + if self.is_past_limit(item.raw_stream_item()) { + self.write_limiting_message(0, "ending")?; + // If the limit is reached at the top level, there's nothing more to do. return Ok(()); } - // We're no longer skip-scanning to `bytes_to_skip`. If we skipped values at this depth - // to get to this point, make a note of it in the output. - if bytes_skipped_this_level > 0 { - self.text_buffer.clear(); - write!( - &mut self.text_buffer, - "// Skipped {} bytes of user-level data", - bytes_skipped_this_level - )?; - output( - self.output, - None, - None, - &self.indentation_buffer, - "...", - &self.text_buffer.dimmed(), - )?; - bytes_skipped_this_level = 0; + // In most cases, we would take this opportunity to print a row separator to create + // a new table section for this top-level item. However, there are two exceptions we + // need to check for: + // 1. The first stream item follows the header and so does not require a row separator. + if !is_first_item + // 2. The end of the stream prints the end of the table, not a row separator. + && !matches!(item, SystemStreamItem::EndOfStream(_)) { + // If this item is neither the first nor last in the stream, print a row separator. + write!(self.output, "{ROW_SEPARATOR}")?; } - self.write_field_if_present()?; - self.write_annotations_if_present()?; - // Print the value or, if it's a container, its opening delimiter: {, (, or [ - self.write_value()?; - - // If the current value is a container, step into it and inspect its contents. - match ion_type { - IonType::List | IonType::SExp | IonType::Struct => { - self.reader.step_in()?; - self.inspect_level()?; - self.reader.step_out()?; - // Print the container's closing delimiter: }, ), or ] - self.text_buffer.clear(); - self.text_buffer.push_str(closing_delimiter_for(ion_type)); - if ion_type != IonType::SExp && self.reader.depth() > 0 { - self.text_buffer.push(','); - } - output( - self.output, - None, - None, - &self.indentation_buffer, - "", - &self.text_buffer, - )?; + match item { + SystemStreamItem::SymbolTable(lazy_struct) => { + let lazy_value = lazy_struct.as_value(); + self.inspect_value(0, "", lazy_value)?; } - _ => {} + SystemStreamItem::Value(lazy_value) => { + self.inspect_value(0, "", lazy_value)?; + } + SystemStreamItem::VersionMarker(marker) => { + self.inspect_ivm(marker)?; + } + SystemStreamItem::EndOfStream(_) => { + break; + } + // `SystemStreamItem` is marked `#[non_exhaustive]`, so this branch is needed. + // The arms above cover all of the existing variants at the time of writing. + _ => unimplemented!("a new SystemStreamItem variant was added") } - } - self.decrease_indentation(); + // Notice that we wait until _after_ the item has been inspected above to set the + // `skip_complete` flag. This is because the offset specified by `--skip-bytes` may + // have been located somewhere inside the item and the inspector needed to look for + // that point within its nested values. If this happens, the inspector will set the + // `skip_complete` flag when it reaches that offset at a deeper level of nesting. + // When it reaches this point, `skip_complete` will already be true. However, if the + // offset fell at the beginning of a top level value, the line below will set the flag + // for the first time. + self.skip_complete = true; + is_first_item = false; + } + self.output.write_all(END_OF_TABLE.as_bytes())?; Ok(()) } - fn increase_indentation(&mut self) { - // Add a level's worth of indentation to the buffer. - if self.reader.depth() > 0 { - self.indentation_buffer.push_str(LEVEL_INDENTATION); + /// If `maybe_item` is: + /// * `Some(entity)`, checks to see if the entity's final byte offset is beyond the configured + /// number of bytes to skip. + /// * `None`, then there is no stream-level entity backing the item (that is: it was the result + /// of a macro expansion). Checks to see if the inspector has already completed its + /// skipping phase on an earlier item. + fn should_skip(&mut self, maybe_item: Option) -> bool { + match maybe_item { + // If this item came from an input literal, see if the input literal ends after + // the requested number of bytes to skip. If not, we'll move to the next one. + Some(item) => item.range().end <= self.bytes_to_skip, + // If this item came from a macro, there's no corresponding input literal. If we + // haven't finished skipping input literals, we'll skip this ephemeral value. + None => !self.skip_complete } } - fn decrease_indentation(&mut self) { - // Remove a level's worth of indentation from the buffer. - if self.reader.depth() > 0 { - let new_length = self.indentation_buffer.len() - LEVEL_INDENTATION.len(); - self.indentation_buffer.truncate(new_length); + /// If `maybe_item` is: + /// * `Some(entity)`, checks to see if the entity's final byte offset is beyond the configured + /// number of bytes to inspect. + /// * `None`, then there is no stream-level entity backing the item. These will always be + /// inspected; if the e-expression that produced the value was not beyond the limit, + /// none of the ephemeral values it produces are either. + fn is_past_limit(&mut self, maybe_item: Option) -> bool { + maybe_item.map(|item| item.range().start >= self.bytes_to_skip + self.limit_bytes).unwrap_or(false) + } + + /// Convenience method to set the output stream to the specified color/style for the duration of `write_fn` + /// and then reset it upon completion. + fn with_style(&mut self, style: ColorSpec, write_fn: impl FnOnce(&mut OutputRef) -> Result<()>) -> Result<()> { + self.output.set_color(&style)?; + write_fn(&mut self.output)?; + self.output.reset()?; + Ok(()) + } + + /// Convenience method to set the output stream to the specified color/style, write `text`, + /// and then reset the output stream's style again. + fn write_with_style(&mut self, style: ColorSpec, text: &str) -> Result<()> { + self.with_style(style, |out| { + out.write_all(text.as_bytes())?; + Ok(()) + }) + } + + /// Inspects an Ion Version Marker. + fn inspect_ivm(&mut self, marker: LazyRawAnyVersionMarker<'_>) -> Result<()> { + const BINARY_IVM_LENGTH: usize = 4; + let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![IonBytes::new(BytesKind::VersionMarker, marker.span().bytes())]); + self.write_offset_length_and_bytes(marker.range().start, BINARY_IVM_LENGTH, &mut formatter)?; + self.with_style(BytesKind::VersionMarker.style(), |out| { + let (major, minor) = marker.version(); + write!(out, "$ion_{major}_{minor}")?; + Ok(()) + })?; + + self.with_style(comment_style(), |out| { + write!(out, " // Version marker\n")?; + Ok(()) + })?; + self.output.reset()?; + Ok(()) + } + + /// Inspects all values (however deeply nested) starting at the current level. + fn inspect_value(&mut self, depth: usize, delimiter: &str, value: LazyValue<'_, AnyEncoding>) -> Result<()> { + use ValueRef::*; + if value.has_annotations() { + self.inspect_annotations(depth, value)?; + } + match value.read()? { + SExp(sexp) => self.inspect_sexp(depth, delimiter, sexp), + List(list) => self.inspect_list(depth, delimiter, list), + Struct(struct_) => self.inspect_struct(depth, delimiter, struct_), + _ => self.inspect_scalar(depth, delimiter, value), } } - fn write_field_if_present(&mut self) -> Result<()> { - if self.reader.parent_type() != Some(IonType::Struct) { - // We're not in a struct; nothing to do. - return Ok(()); + /// Inspects the scalar `value`. If this value appears in a list or struct, the caller can set + /// `delimiter` to a comma (`","`) and it will be appended to the value's text representation. + fn inspect_scalar<'x>(&mut self, depth: usize, delimiter: &str, value: LazyValue<'x, AnyEncoding>) -> Result<()> { + use ExpandedValueSource::*; + let value_literal = match value.lower().source() { + ValueLiteral(value_literal) => value_literal, + // In Ion 1.0, there are no template values or constructed values so we can defer + // implementing these. + Template(_, _) => { todo!("Ion 1.1 template values") } + Constructed(_, _) => { todo!("Ion 1.1 constructed values") } + }; + + use LazyRawValueKind::*; + // Check what encoding this is. At the moment, only binary Ion 1.0 is supported. + match value_literal.kind() { + Binary_1_0(bin_val) => { + self.inspect_binary_1_0_scalar(depth, delimiter, value, bin_val) + } + Binary_1_1(_) => todo!("Binary Ion 1.1 scalars"), + Text_1_0(_) | Text_1_1(_) => unreachable!("text value") } - let field_token = self.reader.raw_field_name_token()?; - let field_id = field_token.local_sid().expect("No SID for field name."); - self.hex_buffer.clear(); - to_hex( - &mut self.hex_buffer, - self.reader.raw_field_id_bytes().unwrap(), - ); - - let field_name_result = self.reader.field_name(); - let field_name = field_name_result - .as_ref() - .ok() - .and_then(|name| name.text()) - .unwrap_or(""); - - self.text_buffer.clear(); - write!(&mut self.text_buffer, "'{}':", field_name)?; - - self.color_buffer.clear(); - write!(&mut self.color_buffer, " // ${}:", field_id)?; - write!(&mut self.text_buffer, "{}", &self.color_buffer.dimmed())?; - output( - self.output, - self.reader.field_id_offset(), - self.reader.field_id_length(), - &self.indentation_buffer, - &self.hex_buffer, - &self.text_buffer, - )?; - - if field_name_result.is_err() { - // If we had to write for the field name above, return a fatal error now. - bail!("Encountered a field ID (${}) with unknown text.", field_id); + } + + /// Inspects the s-expression `sexp`, including all of its child values. If this sexp appears + /// in a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended + /// to the sexp's text representation. + fn inspect_sexp<'x>(&mut self, depth: usize, delimiter: &str, sexp: LazySExp<'x, AnyEncoding>) -> Result<()> { + use ExpandedSExpSource::*; + let raw_sexp = match sexp.lower().source() { + ValueLiteral(raw_sexp) => raw_sexp, + Template(_, _, _, _) => todo!("Ion 1.1 template SExp") + }; + + use LazyRawSExpKind::*; + match raw_sexp.kind() { + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), + Binary_1_0(v) => self.inspect_binary_1_0_sexp(depth, delimiter, sexp, v), + Binary_1_1(_) => todo!("Binary Ion 1.1 SExp"), } + } - Ok(()) + /// Inspects the list `list`, including all of its child values. If this list appears inside + /// a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended + /// to the list's text representation. + fn inspect_list<'x>(&mut self, depth: usize, delimiter: &str, list: LazyList<'x, AnyEncoding>) -> Result<()> { + use ExpandedListSource::*; + let raw_list = match list.lower().source() { + ValueLiteral(raw_list) => raw_list, + Template(_, _, _, _) => todo!("Ion 1.1 template List") + }; + + use LazyRawListKind::*; + match raw_list.kind() { + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), + Binary_1_0(v) => self.inspect_binary_1_0_list(depth, delimiter, list, v), + Binary_1_1(_) => todo!("Binary Ion 1.1 List"), + } } - fn write_annotations_if_present(&mut self) -> IonResult<()> { - let num_annotations = self.reader.raw_annotations().count(); - if num_annotations > 0 { - self.hex_buffer.clear(); - to_hex( - &mut self.hex_buffer, - self.reader.raw_annotations_bytes().unwrap(), - ); - - self.text_buffer.clear(); - join_into(&mut self.text_buffer, "::", self.reader.annotations())?; - write!(&mut self.text_buffer, "::")?; - - self.color_buffer.clear(); - write!(&mut self.color_buffer, " // $")?; - join_into( - &mut self.color_buffer, - "::$", - self.reader - .raw_annotations() - .map(|a| a.map(|token| token.local_sid().unwrap())), - )?; - write!(&mut self.color_buffer, "::")?; - - write!(self.text_buffer, "{}", self.color_buffer.dimmed())?; - output( - self.output, - self.reader.annotations_offset(), - self.reader.annotations_length(), - &self.indentation_buffer, - &self.hex_buffer, - &self.text_buffer, - )?; + /// Inspects the struct `struct_`, including all of its fields. If this struct appears inside + /// a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended + /// to the struct's text representation. + fn inspect_struct(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct<'_, AnyEncoding>) -> Result<()> { + let raw_struct = match struct_.lower().source() { + ExpandedStructSource::ValueLiteral(raw_struct) => raw_struct, + ExpandedStructSource::Template(_, _, _, _, _) => todo!("Ion 1.1 template Struct") + }; + + use LazyRawValueKind::*; + match raw_struct.as_value().kind() { + Binary_1_0(v) => self.inspect_binary_1_0_struct(depth, delimiter, struct_, raw_struct, v), + Binary_1_1(_) => todo!("Binary Ion 1.1 Struct"), + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), } - Ok(()) } - fn write_value(&mut self) -> IonResult<()> { - self.text_buffer.clear(); - // Populates `self.text_buffer` with the Ion text representation of the current value - // if it is a scalar. If the value is a container, format_value() will write the opening - // delimiter of that container instead. - self.format_value()?; - - self.hex_buffer.clear(); - to_hex( - &mut self.hex_buffer, - self.reader.raw_header_bytes().unwrap(), - ); - // Only write the bytes representing the body of the value if it is a scalar. - // If it is a container, `inspect_level` will handle stepping into it and writing any - // nested values. - if !self.reader.ion_type().unwrap().is_container() { - self.hex_buffer.push(' '); - to_hex(&mut self.hex_buffer, self.reader.raw_value_bytes().unwrap()); + fn inspect_annotations(&mut self, depth: usize, value: LazyValue) -> Result<()> { + let raw_value = match value.lower().source() { + ExpandedValueSource::ValueLiteral(raw_value) => raw_value, + ExpandedValueSource::Template(_, _) => todo!("Ion 1.1 template value annotations"), + ExpandedValueSource::Constructed(_, _) => todo!("Ion 1.1 constructed value annotations") + }; + + use LazyRawValueKind::*; + match raw_value.kind() { + Binary_1_0(v) => self.inspect_binary_1_0_annotations(depth, value, v), + Binary_1_1(_) => todo!("Binary Ion 1.1 annotations"), + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), } + } - let length = self.reader.header_length() + self.reader.value_length(); - output( - self.output, - Some(self.reader.header_offset()), - Some(length), - &self.indentation_buffer, - &self.hex_buffer, - &self.text_buffer, - ) - } - - fn format_value(&mut self) -> IonResult<()> { - use ion_rs::IonType::*; - - // Destructure `self` to get multiple simultaneous mutable references to its constituent - // fields. This freezes `self`; it cannot be referred to for the rest of the function call. - let IonInspector { - ref mut reader, - ref mut text_ion_writer, - ref mut text_buffer, - ref mut color_buffer, - .. - } = self; - - // If we need to write comments alongside any of the values, we'll add them here so we can - // colorize them separately. - let comment_buffer = color_buffer; - comment_buffer.clear(); - - let writer = text_ion_writer; // Local alias for brevity. - let ion_type = reader - .ion_type() - .expect("format_value() called when reader was exhausted"); - if reader.is_null() { - writer.write_null(reader.ion_type().unwrap())?; - } else { - match ion_type { - Null => writer.write_null(ion_type), - Bool => writer.write_bool(reader.read_bool()?), - Int => writer.write_i64(reader.read_i64()?), - Float => writer.write_f64(reader.read_f64()?), - Decimal => writer.write_decimal(&reader.read_decimal()?), - Timestamp => writer.write_timestamp(&reader.read_timestamp()?), - Symbol => { - // TODO: Make this easier in the reader - let symbol_token = reader.read_raw_symbol()?; - let sid = symbol_token.local_sid().unwrap(); - let text = reader - .symbol_table() - .text_for(sid) - .unwrap_or_else(|| panic!("Could not resolve text for symbol ID ${}", sid)); - write!(comment_buffer, " // ${}", sid)?; - writer.write_symbol(text) - } - String => writer.write_string(reader.read_str()?), - Clob => writer.write_clob(reader.read_clob()?), - Blob => writer.write_blob(reader.read_blob()?), - // The containers don't use the RawTextWriter to format anything. They simply write - // the appropriate opening delimiter. - List => { - write!(text_buffer, "[")?; - return Ok(()); + // ===== Binary Ion 1.0 ====== + + fn inspect_binary_1_0_sexp<'x>(&mut self, depth: usize, delimiter: &str, sexp: LazySExp<'x, AnyEncoding>, raw_sexp: v1_0::LazyRawBinarySExp<'x>) -> Result<()> { + self.inspect_binary_1_0_sequence(depth, "(", "", ")", delimiter, sexp.iter(), raw_sexp, raw_sexp.as_value()) + } + + fn inspect_binary_1_0_list<'x>(&mut self, depth: usize, delimiter: &str, list: LazyList<'x, AnyEncoding>, raw_list: v1_0::LazyRawBinaryList<'x>) -> Result<()> { + self.inspect_binary_1_0_sequence(depth, "[", ",", "]", delimiter, list.iter(), raw_list, raw_list.as_value()) + } + + fn inspect_binary_1_0_sequence<'x>(&mut self, + depth: usize, + opening_delimiter: &str, + value_delimiter: &str, + closing_delimiter: &str, + trailing_delimiter: &str, + nested_values: impl IntoIterator>>, + nested_raw_values: impl LazyRawSequence<'x, v1_0::Binary>, + raw_value: v1_0::LazyRawBinaryValue, + ) -> Result<()> { + let encoding = raw_value.encoded_data(); + let range = encoding.range(); + + let opcode_bytes: &[u8] = raw_value.encoded_data().opcode_span().bytes(); + let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ + IonBytes::new(BytesKind::Opcode, opcode_bytes), + IonBytes::new(BytesKind::TrailingLength, raw_value.encoded_data().trailing_length_span().bytes()), + ]); + + self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter)?; + + self.write_indentation(depth)?; + self.with_style(text_ion_style(), |out| { + write!(out, "{opening_delimiter}\n")?; + Ok(()) + })?; + + let mut has_printed_skip_message = false; + for (raw_value_res, value_res) in nested_raw_values.iter().zip(nested_values) { + let (raw_nested_value, nested_value) = (raw_value_res?, value_res?); + if self.should_skip(Some(raw_nested_value)) { + if !has_printed_skip_message { + self.write_skipping_message(depth + 1, "values")?; + has_printed_skip_message = true; } - SExp => { - write!(text_buffer, "(")?; - return Ok(()); + continue; + } + if self.is_past_limit(Some(raw_nested_value)) { + self.write_limiting_message(depth + 1, "stepping out")?; + break; + } + self.inspect_value(depth + 1, value_delimiter, nested_value)?; + self.skip_complete = true; + } + + self.write_blank_offset_length_and_bytes(depth)?; + self.with_style(text_ion_style(), |out| { + write!(out, "{closing_delimiter}{trailing_delimiter}\n")?; + Ok(()) + }) + } + + fn inspect_binary_1_0_struct(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct, raw_struct: LazyRawAnyStruct, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { + let encoding = raw_value.encoded_data(); + let range = encoding.range(); + + let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ + IonBytes::new(BytesKind::Opcode, encoding.opcode_span().bytes()), + IonBytes::new(BytesKind::TrailingLength, encoding.trailing_length_span().bytes()), + ]); + + self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter)?; + + self.write_indentation(depth)?; + self.with_style(text_ion_style(), |out| { + write!(out, "{{\n")?; + Ok(()) + })?; + let mut has_printed_skip_message = false; + for (raw_field_result, field_result) in raw_struct.iter().zip(struct_.iter()) { + let (raw_field, field) = (raw_field_result?, field_result?); + let (raw_name, raw_value) = raw_field.expect_name_value()?; + let name = field.name()?; + + if self.should_skip(Some(raw_value)) { + if !has_printed_skip_message { + self.write_skipping_message(depth + 1, "fields")?; + has_printed_skip_message = true; } - Struct => { - write!(text_buffer, "{{")?; - return Ok(()); + continue; + } + self.skip_complete = true; + + if self.is_past_limit(Some(raw_field)) { + self.write_limiting_message(depth + 1, "stepping out")?; + break; + } + + // ===== Field name ===== + let range = raw_name.range(); + let raw_name_bytes = raw_name.span().bytes(); + let offset = range.start; + let length = range.len(); + let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ + IonBytes::new(BytesKind::FieldId, raw_name_bytes) + ]); + self.write_offset_length_and_bytes(offset, length, &mut formatter)?; + + self.write_indentation(depth + 1)?; + self.with_style(field_id_style(), |out| { + IoValueFormatter::new(out).value_formatter().format_symbol(name)?; + Ok(()) + })?; + write!(self.output, ": ")?; + // Print a text Ion comment showing how the field name was encoded, ($SID or text) + self.with_style(comment_style(), |out| { + match raw_name.read()? { + RawSymbolRef::SymbolId(sid) => { + write!(out, " // ${sid}\n") + } + RawSymbolRef::Text(_) => { + write!(out, " // \n") + } + }?; + Ok(()) + })?; + + // ===== Field value ===== + self.inspect_value(depth + 1, ",", field.value())?; + } + // ===== Closing delimiter ===== + self.write_blank_offset_length_and_bytes(depth)?; + self.with_style(text_ion_style(), |out| { + write!(out, "}}{delimiter}\n")?; + Ok(()) + }) + } + + fn inspect_binary_1_0_annotations(&mut self, depth: usize, value: LazyValue, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { + let encoding = raw_value.encoded_annotations().unwrap(); + let range = encoding.range(); + + let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ + IonBytes::new(BytesKind::AnnotationsHeader, encoding.header_span().bytes()), + IonBytes::new(BytesKind::AnnotationsSequence, encoding.sequence_span().bytes()), + ]); + self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter)?; + + self.write_indentation(depth)?; + self.with_style(annotations_style(), |out| { + for annotation in value.annotations() { + IoValueFormatter::new(&mut *out).value_formatter().format_symbol(annotation?)?; + write!(out, "::")?; + } + Ok(()) + })?; + + self.with_style(comment_style(), |out| { + write!(out, " // ")?; + for (index, raw_annotation) in raw_value.annotations().enumerate() { + if index > 0 { + write!(out, ", ")?; } - }?; + match raw_annotation? { + RawSymbolRef::SymbolId(sid) => write!(out, "${sid}"), + RawSymbolRef::Text(_) => write!(out, ""), + }?; + } + write!(out, "\n")?; + Ok(()) + })?; + + Ok(()) + } + + fn inspect_binary_1_0_scalar(&mut self, depth: usize, delimiter: &str, value: LazyValue, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { + let encoding = raw_value.encoded_data(); + let range = encoding.range(); + + let opcode_bytes = IonBytes::new(BytesKind::Opcode, encoding.opcode_span().bytes()); + let length_bytes = IonBytes::new(BytesKind::TrailingLength, encoding.trailing_length_span().bytes()); + let body_bytes = IonBytes::new(BytesKind::ValueBody, encoding.body_span().bytes()); + + let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![opcode_bytes, length_bytes, body_bytes]); + + self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter)?; + self.write_indentation(depth)?; + + let style = text_ion_style(); + self.output.set_color(&style)?; + self.text_writer + .write(value.read()?) + .expect("failed to write text value to in-memory buffer") + .flush()?; + + let encoded = self.text_writer.output_mut(); + if encoded.ends_with(&[b' ']) { + let _ = encoded.pop(); } - // This is writing to a Vec, so flush() will always succeed. - let _ = writer.flush(); - // The writer produces valid UTF-8, so there's no need to re-validate it. - let value_text = unsafe { from_utf8_unchecked(writer.output().as_slice()) }; - write!(text_buffer, "{}", value_text.trim_end())?; - // If we're in a container, add a delimiting comma. Text Ion allows trailing commas, so we - // don't need to treat the last value as a special case. - if self.reader.depth() > 0 { - write!(text_buffer, ",")?; + self.output.write_all(self.text_writer.output().as_slice())?; + self.text_writer.output_mut().clear(); + self.output.write_all(delimiter.as_bytes())?; + self.output.reset()?; + write!(self.output, "\n")?; + + while !formatter.is_empty() { + self.write_offset_length_and_bytes("", "", &mut formatter)?; + self.write_indentation(depth)?; + write!(self.output, "\n")?; } - write!(text_buffer, "{}", comment_buffer.dimmed())?; - // Clear the writer's output Vec. We encode each scalar independently of one another. - writer.output_mut().clear(); + + Ok(()) + } + + // ===== Table-writing methods ===== + + /// Prints the header of the output table + fn write_table_header(&mut self) -> Result<()> { + self.output.write_all(START_OF_HEADER.as_bytes())?; + write!(self.output, "\n{VERTICAL_LINE}")?; + self.write_with_style(header_style(), " Offset ")?; + write!(self.output, "{VERTICAL_LINE}")?; + self.write_with_style(header_style(), " Length ")?; + write!(self.output, "{VERTICAL_LINE}")?; + self.write_with_style(header_style(), " Binary Ion ")?; + write!(self.output, "{VERTICAL_LINE}")?; + self.write_with_style(header_style(), " Text Ion ")?; + write!(self.output, "{VERTICAL_LINE}\n")?; + self.output.write_all(END_OF_HEADER.as_bytes())?; + write!(self.output, "\n")?; Ok(()) } + + /// Writes a spacing string `depth` times. + fn write_indentation(&mut self, depth: usize) -> Result<()> { + // This spacing string includes a unicode dot to make it easy to see what level of depth + // the current value is found at. This dot is displayed with a muted color; its appearance + // is subtle. + const INDENTATION_WITH_GUIDE: &'static str = "· "; + + let mut color_spec = ColorSpec::new(); + color_spec.set_dimmed(false).set_intense(true).set_bold(true).set_fg(Some(Color::Rgb(100, 100, 100))); + self.with_style(color_spec, |out| { + for _ in 0..depth { + out.write_all(INDENTATION_WITH_GUIDE.as_bytes())?; + } + Ok(()) + }) + } + + /// Prints the given `offset` and `length` in the first and second table columns, then uses the + /// `formatter` to print a single row of hex-encoded bytes in the third column ("Binary Ion"). + /// The `offset` and `length` are typically `usize`, but can be anything that implements `Display`. + fn write_offset_length_and_bytes(&mut self, offset: impl Display, length: impl Display, formatter: &mut BytesFormatter) -> Result<()> { + write!(self.output, "{VERTICAL_LINE} {offset:12} {VERTICAL_LINE} {length:12} {VERTICAL_LINE} ")?; + formatter.write_row(self.output)?; + write!(self.output, "{VERTICAL_LINE} ")?; + Ok(()) + } + + /// Prints a row with blank fiends in the `Offset`, `Length`, and `Binary Ion` columns. This method + /// does not print a trailing newline, allowing the caller to populate the `Text Ion` column as needed. + fn write_blank_offset_length_and_bytes(&mut self, depth: usize) -> Result<()> { + let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![]); + self.write_offset_length_and_bytes("", "", &mut formatter)?; + self.write_indentation(depth) + } + + /// Prints a row with an ellipsis (`...`) in the first three columns, and a text Ion comment in + /// the final column indicating what is being skipped over. + fn write_skipping_message(&mut self, depth: usize, name_of_skipped_item: &str) -> Result<()> { + write!(self.output, "{VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:23} {VERTICAL_LINE} ", "...", "...", "...")?; + self.write_indentation(depth)?; + self.with_style(comment_style(), |out| { + write!(out, "// ...skipping {name_of_skipped_item}...\n")?; + Ok(()) + }) + } + + /// Prints a row with an ellipsis (`...`) in the first three columns, and a text Ion comment in + /// the final column indicating that we have reached the maximum number of bytes to process + /// as determined by the `--limit-bytes` flag. + fn write_limiting_message(&mut self, depth: usize, action: &str) -> Result<()> { + write!(self.output, "{VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:23} {VERTICAL_LINE} ", "...", "...", "...")?; + self.write_indentation(depth)?; + let limit_bytes = self.limit_bytes; + self.with_style(comment_style(), |out| { + write!(out, "// --limit-bytes {} reached, {action}.\n", limit_bytes)?; + Ok(()) + }) + } } -const COLUMN_DELIMITER: &str = " | "; -const CHARS_PER_HEX_BYTE: usize = 3; -const HEX_BYTES_PER_ROW: usize = 8; -const HEX_COLUMN_SIZE: usize = HEX_BYTES_PER_ROW * CHARS_PER_HEX_BYTE; - -fn write_header(output: &mut OutputRef) -> IonResult<()> { - let line = "-".repeat(24 + 24 + 9 + 9 + (COLUMN_DELIMITER.len() * 3)); - - writeln!(output, "{}", line)?; - write!( - output, - "{:^9}{}", - "Offset".bold().bright_white(), - COLUMN_DELIMITER - )?; - write!( - output, - "{:^9}{}", - "Length".bold().bright_white(), - COLUMN_DELIMITER - )?; - write!( - output, - "{:^24}{}", - "Binary Ion".bold().bright_white(), - COLUMN_DELIMITER - )?; - writeln!(output, "{:^24}", "Text Ion".bold().bright_white())?; - writeln!(output, "{}", line)?; - Ok(()) +// ===== Named styles ===== + +fn header_style() -> ColorSpec { + let mut style = ColorSpec::new(); + style.set_bold(true).set_intense(true); + style } -// Accepting a `T` allows us to pass in `&str`, `&String`, `&ColoredString`, etc as out text_column -// TODO: This could be a method on IonInspector -fn output( - output: &mut OutputRef, - offset: Option, - length: Option, - indentation: &str, - hex_column: &str, - text_column: T, -) -> IonResult<()> { - // The current implementation always writes a single line of output for the offset, length, - // and text columns. Only the hex column can span multiple rows. - // TODO: It would be nice to allow important hex bytes (e.g. type descriptors or lengths) - // to be color-coded. This complicates the output function, however, as the length - // of a colored string is not the same as its display length. We would need to pass - // uncolored strings to the output function paired with the desired color/style so - // the output function could break the text into the necessary row lengths and then apply - // the provided colors just before writing. - - // Write the offset column - if let Some(offset) = offset { - write!(output, "{:9}{}", offset, COLUMN_DELIMITER)?; - } else { - write!(output, "{:9}{}", "", COLUMN_DELIMITER)?; - } - - // Write the length column - if let Some(length) = length { - write!(output, "{:9}{}", length, COLUMN_DELIMITER)?; - } else { - write!(output, "{:9}{}", "", COLUMN_DELIMITER)?; - } - - // If the hex string is short enough to fit in a single row... - if hex_column.len() < HEX_COLUMN_SIZE { - // ...print the hex string... - write!(output, "{}", hex_column)?; - // ...and then write enough padding spaces to fill the rest of the row. - for _ in 0..(HEX_COLUMN_SIZE - hex_column.len()) { - write!(output, " ")?; - } - } else { - // Otherwise, write the first row's worth of the hex string. - write!(output, "{}", &hex_column[..HEX_COLUMN_SIZE])?; - } - // Write a delimiter, the write the text Ion as the final column. - write!(output, "{}", COLUMN_DELIMITER)?; - write!(output, " ")?; - writeln!(output, "{}{}", indentation, text_column)?; - - // Revisit our hex column. Write as many additional rows as needed. - let mut col_1_written = HEX_COLUMN_SIZE; - while col_1_written < hex_column.len() { - // Padding for offset column - write!(output, "{:9}{}", "", COLUMN_DELIMITER)?; - // Padding for length column - write!(output, "{:9}{}", "", COLUMN_DELIMITER)?; - let remaining_bytes = hex_column.len() - col_1_written; - let bytes_to_write = min(remaining_bytes, HEX_COLUMN_SIZE); - let next_slice_to_write = &hex_column[col_1_written..(col_1_written + bytes_to_write)]; - write!(output, "{}", next_slice_to_write)?; - for _ in 0..(HEX_COLUMN_SIZE - bytes_to_write) { - write!(output, " ")?; - } - writeln!(output, "{}", COLUMN_DELIMITER)?; - col_1_written += HEX_COLUMN_SIZE; - // No need to write anything for the text column since it's the last one. +fn comment_style() -> ColorSpec { + let mut style = ColorSpec::new(); + style.set_dimmed(true); + style +} + +fn text_ion_style() -> ColorSpec { + let mut style = ColorSpec::new(); + style.set_fg(Some(Color::Rgb(255, 255, 255))); + style +} + +fn field_id_style() -> ColorSpec { + let mut style = ColorSpec::new(); + style.set_fg(Some(Color::Cyan)).set_intense(true); + style +} + +fn annotations_style() -> ColorSpec { + let mut style = ColorSpec::new(); + style.set_fg(Some(Color::Magenta)); + style +} + +/// Kinds of encoding primitives found in a binary Ion stream. +#[derive(Copy, Clone, Debug)] +enum BytesKind { + FieldId, + Opcode, + TrailingLength, + ValueBody, + AnnotationsHeader, + AnnotationsSequence, + VersionMarker, +} + +impl BytesKind { + /// Returns a [`ColorSpec`] that should be used when printing bytes of the specified `BytesKind`. + fn style(&self) -> ColorSpec { + use BytesKind::*; + let mut color = ColorSpec::new(); + match self { + VersionMarker => + color + .set_fg(Some(Color::Yellow)) + .set_intense(true), + FieldId => + color + .set_fg(Some(Color::Cyan)) + .set_intense(true), + Opcode => + color + .set_bold(true) + .set_fg(Some(Color::Rgb(0, 0, 0))) + .set_bg(Some(Color::Rgb(255, 255, 255))), + + TrailingLength => + color + .set_bold(true) + .set_underline(true) + .set_fg(Some(Color::White)) + .set_intense(true), + ValueBody => + color.set_bold(false) + .set_fg(Some(Color::White)) + .set_intense(false), + AnnotationsHeader => + color.set_bold(false) + .set_fg(Some(Color::Black)) + .set_bg(Some(Color::Magenta)), + AnnotationsSequence => + color.set_bold(false) + .set_fg(Some(Color::Magenta)), + }; + color } - Ok(()) } -fn closing_delimiter_for(container_type: IonType) -> &'static str { - match container_type { - IonType::List => "]", - IonType::SExp => ")", - IonType::Struct => "}", - _ => panic!("Attempted to close non-container type {:?}", container_type), +/// A slice of Ion bytes to be printed in the `Binary Ion` column. +/// +/// Each `IonBytes` has a `BytesKind` that maps to a display style as well as a counter tracking +/// how many of its bytes have been printed so far. +#[derive(Copy, Clone, Debug)] +struct IonBytes<'a> { + // The actual slice of bytes + pub bytes: &'a [u8], + // What the slice of bytes represents in Ion + pub kind: BytesKind, + // How many of this slice's bytes have been printed so far. + pub bytes_written: usize, +} + +impl<'a> IonBytes<'a> { + fn new(kind: BytesKind, bytes: &'a [u8]) -> Self { + Self { + bytes, + kind, + bytes_written: 0, + } + } + + fn mark_bytes_written(&mut self, num_bytes: usize) { + self.bytes_written += num_bytes } + + fn next_n_bytes(&self, num_bytes: usize) -> &[u8] { + &self.bytes[self.bytes_written..self.bytes_written + num_bytes] + } + + fn bytes_remaining(&self) -> usize { + self.bytes.len() - self.bytes_written + } + + fn is_empty(&self) -> bool { + self.bytes_remaining() == 0 + } + + fn style(&self) -> ColorSpec { + self.kind.style() + } +} + +/// Prints bytes as colorized, hex-encoded rows of a configurable size. +/// +/// Stores a sequence of [`IonBytes`] instances to display. Upon request, writes out the next `n` +/// colorized, hex-encoded bytes, remembering where to resume when the next row is needed. +struct BytesFormatter<'a> { + slices: Vec>, + slices_written: usize, + formatted_bytes_per_row: usize, } -fn to_hex(buffer: &mut String, bytes: &[u8]) { - if bytes.is_empty() { - return; +impl<'a> BytesFormatter<'a> { + pub fn new(formatted_bytes_per_row: usize, slices: Vec>) -> Self { + Self { slices, slices_written: 0, formatted_bytes_per_row } + } + + /// Writes a row of `n` hex-encoded, colorized bytes, where `n` is determined by the + /// `formatted_bytes_per_row` argument in [`BytesFormatter::new`]. + /// + /// If there are fewer than `n` bytes remaining, prints all remaining bytes. + pub fn write_row(&mut self, output: &mut impl WriteColor) -> Result<()> { + let num_bytes = self.formatted_bytes_per_row; + let bytes_written = self.write_bytes(num_bytes, output)?; + let bytes_remaining = num_bytes - bytes_written; + // If we printed fewer bytes than are needed to make a row, write out enough padding + // to keep the columns aligned. + for _ in 0..bytes_remaining { + write!(output, " ")?; // Empty space the width of a formatted byte + } + Ok(()) + } + + /// Helper method to iterate over the remaining [`IonBytes`], printing their contents until + /// `num_bytes` is reached. + fn write_bytes(&mut self, num_bytes: usize, output: &mut impl WriteColor) -> Result { + let mut bytes_remaining = num_bytes; + while bytes_remaining > 0 && !self.is_empty() { + bytes_remaining -= self.write_bytes_from_current_slice(bytes_remaining, output)?; + if self.is_empty() { + // Even though `bytes_remaining` hasn't reached zero, we're out of data. + break; + } + } + + Ok(num_bytes - bytes_remaining) + } + + /// Helper method to print up to `num_bytes` bytes from the current [`IonBytes`]. + fn write_bytes_from_current_slice(&mut self, num_bytes: usize, output: &mut impl WriteColor) -> Result { + let Some(slice) = self.current_slice() else { + // No more to write + return Ok(0); + }; + + if slice.bytes.len() == 0 { + self.slices_written += 1; + return Ok(0); + } + + // We're going to write whichever is smaller: + // 1. the requested number of bytes from the current slice + // OR + // 2. the number of bytes remaining in the current slice + let bytes_to_write = num_bytes.min(slice.bytes_remaining()); + + // Set the appropriate style for this byte slice. + let style: ColorSpec = slice.style(); + output.set_color(&style)?; + write!(output, "{}", hex_contents(slice.next_n_bytes(bytes_to_write)))?; + slice.mark_bytes_written(bytes_to_write); + output.reset()?; + + // If we completed the slice OR we finished writing all of the requested bytes + if slice.is_empty() || num_bytes == bytes_to_write { + write!(output, " ")?; + } + + if slice.is_empty() { + // This slice has been exhausted, we should resume from the beginning of the next one. + self.slices_written += 1; + } + + Ok(bytes_to_write) } - write!(buffer, "{:02x}", bytes[0]).unwrap(); - for byte in &bytes[1..] { - write!(buffer, " {:02x}", *byte).unwrap(); + + /// Returns a reference to the [`IonBytes`] from which the next bytes should be pulled. + fn current_slice(&mut self) -> Option<&mut IonBytes<'a>> { + if self.is_empty() { + return None; + } + Some(&mut self.slices[self.slices_written]) + } + + /// Returns `true` if all of the slices have been exhausted. + fn is_empty(&self) -> bool { + self.slices_written == self.slices.len() } } -fn join_into( - buffer: &mut String, - delimiter: &str, - mut values: impl Iterator>, -) -> IonResult<()> { - if let Some(first) = values.next() { - write!(buffer, "{}", first?).unwrap(); +/// Converts the given byte slice to a string containing hex-encoded bytes +fn hex_contents(source: &[u8]) -> String { + if source.is_empty() { + return String::new(); } - for value in values { - write!(buffer, "{}{}", delimiter, value?).unwrap(); + use std::fmt::Write; + let mut buffer = String::new(); + let bytes = source.iter(); + + let mut is_first = true; + for byte in bytes { + if is_first { + write!(buffer, "{:02X?}", byte).unwrap(); + is_first = false; + continue; + } + write!(buffer, " {:02X?}", byte).unwrap(); } - Ok(()) + buffer } diff --git a/src/bin/ion/commands/beta/mod.rs b/src/bin/ion/commands/beta/mod.rs index 5d9f5c4..b623797 100644 --- a/src/bin/ion/commands/beta/mod.rs +++ b/src/bin/ion/commands/beta/mod.rs @@ -4,18 +4,18 @@ pub mod from; #[cfg(feature = "experimental-code-gen")] pub mod generate; pub mod head; -pub mod inspect; pub mod primitive; pub mod schema; pub mod symtab; pub mod to; +pub mod inspect; use crate::commands::beta::count::CountCommand; +use crate::commands::beta::inspect::InspectCommand; use crate::commands::beta::from::FromNamespace; #[cfg(feature = "experimental-code-gen")] use crate::commands::beta::generate::GenerateCommand; use crate::commands::beta::head::HeadCommand; -use crate::commands::beta::inspect::InspectCommand; use crate::commands::beta::primitive::PrimitiveCommand; use crate::commands::beta::schema::SchemaNamespace; use crate::commands::beta::symtab::SymtabNamespace; @@ -44,7 +44,7 @@ impl IonCliCommand for BetaNamespace { Box::new(ToNamespace), Box::new(SymtabNamespace), #[cfg(feature = "experimental-code-gen")] - Box::new(GenerateCommand), + Box::new(GenerateCommand), ] } } diff --git a/src/bin/ion/file_writer.rs b/src/bin/ion/file_writer.rs new file mode 100644 index 0000000..f002b72 --- /dev/null +++ b/src/bin/ion/file_writer.rs @@ -0,0 +1,55 @@ +use termcolor::{ColorSpec, WriteColor}; +use std::io; +use std::io::{BufWriter, Write}; +use std::fs::File; + +/// A buffered `io::Write` implementation that implements [`WriteColor`] by reporting that it does +/// not support TTY escape sequences and treating all requests to change or reset the current color +/// as no-ops. +// +// When writing to a file instead of a TTY, we don't want to use `termcolor` escape sequences as +// they would be stored as literal bytes rather than being interpreted. To achieve this, we need an +// `io::Write` implementation that also implements `termcolor`'s `WriteColor` trait. `WriteColor` +// allows the type to specify to whether it supports interpreting escape codes. +// +// We cannot implement `WriteColor` for `BufWriter` directly due to Rust's coherence rules. Our +// crate must own the trait, the implementing type, or both. The `FileWriter` type defined below +// is a simple wrapper around a `BufWriter` that implements both `io::Write` and `termcolor`'s +// `WriteColor` trait. +pub struct FileWriter { + inner: BufWriter, +} + +impl FileWriter { + pub fn new(file: File) -> Self { + Self { inner: BufWriter::new(file) } + } +} + +// Delegates all `io::Write` methods to the nested `BufWriter`. +impl Write for FileWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.inner.write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + +impl WriteColor for FileWriter { + fn supports_color(&self) -> bool { + // FileWriter is never used to write to a TTY, so it does not support escape codes. + false + } + + fn set_color(&mut self, _spec: &ColorSpec) -> io::Result<()> { + // When asked to change the color spec, do nothing. + Ok(()) + } + + fn reset(&mut self) -> io::Result<()> { + // When asked to reset the color spec to the default settings, do nothing. + Ok(()) + } +} diff --git a/src/bin/ion/main.rs b/src/bin/ion/main.rs index 048ec39..49dca52 100644 --- a/src/bin/ion/main.rs +++ b/src/bin/ion/main.rs @@ -1,4 +1,5 @@ mod commands; +mod file_writer; use crate::commands::beta::BetaNamespace; use anyhow::Result; @@ -18,7 +19,7 @@ fn main() -> Result<()> { // If `ion-cli` is being invoked as part of a pipeline we want to allow the pipeline to // to shut off without printing an error to stderr Some(IonError::Io(error)) if error.source().kind() == ErrorKind::BrokenPipe => { - return Ok(()) + return Ok(()); } _ => return Err(e), } From 2cf1a0e855474313cb273b42bdfc31cb48f7b882 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 31 May 2024 11:29:27 -0400 Subject: [PATCH 02/13] updates the `dump` and `filter` commands --- Cargo.lock | 7 +- Cargo.toml | 4 +- src/bin/ion/commands/beta/count.rs | 16 +-- src/bin/ion/commands/beta/inspect.rs | 22 +++- src/bin/ion/commands/beta/symtab/filter.rs | 73 ++++++----- src/bin/ion/commands/dump.rs | 145 ++++++--------------- src/bin/ion/commands/mod.rs | 5 +- 7 files changed, 104 insertions(+), 168 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8abd322..b859da5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -857,8 +857,8 @@ dependencies = [ "convert_case", "flate2", "infer", - "ion-rs 1.0.0-rc.3 (registry+https://github.com/rust-lang/crates.io-index)", - "ion-rs 1.0.0-rc.3 (git+https://github.com/amazon-ion/ion-rust.git)", + "ion-rs 1.0.0-rc.3", + "ion-rs 1.0.0-rc.4", "ion-schema", "matches", "memmap", @@ -917,8 +917,7 @@ dependencies = [ [[package]] name = "ion-rs" -version = "1.0.0-rc.3" -source = "git+https://github.com/amazon-ion/ion-rust.git#eab334e79e420d5a5928918ce7f1ac57741f9d3a" +version = "1.0.0-rc.4" dependencies = [ "arrayvec", "base64 0.12.3", diff --git a/Cargo.toml b/Cargo.toml index f71dab3..0cc2632 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,8 +18,8 @@ colored = "2.0.0" flate2 = "1.0" infer = "0.15.0" ion-rs = { version = "1.0.0-rc.2", features = ["experimental"] } -#new-ion-rs = { package = "ion-rs", version = "1.0.0-rc.3", path = "../ion-rust", features = ["experimental"] } -new-ion-rs = { package = "ion-rs", git = "https://github.com/amazon-ion/ion-rust.git", features = ["experimental"] } +new-ion-rs = { package = "ion-rs", version = "1.0.0-rc.4", path = "../ion-rust", features = ["experimental"] } +# new-ion-rs = { package = "ion-rs", git = "https://github.com/amazon-ion/ion-rust.git", features = ["experimental"] } memmap = "0.7.0" tempfile = "3.2.0" ion-schema = "0.10.0" diff --git a/src/bin/ion/commands/beta/count.rs b/src/bin/ion/commands/beta/count.rs index 7e857e9..777e413 100644 --- a/src/bin/ion/commands/beta/count.rs +++ b/src/bin/ion/commands/beta/count.rs @@ -1,7 +1,7 @@ use crate::commands::{IonCliCommand, WithIonCliArgument}; use anyhow::{Context, Result}; use clap::{ArgMatches, Command}; -use ion_rs::*; +use new_ion_rs::*; use std::fs::File; use std::io::{stdin, BufReader, StdinLock}; @@ -25,13 +25,13 @@ impl IonCliCommand for CountCommand { for input_file in input_file_iter { let file = File::open(input_file) .with_context(|| format!("Could not open file '{}'", input_file))?; - let mut reader = ReaderBuilder::new().build(file)?; + let mut reader = Reader::new(AnyEncoding, file)?; print_top_level_value_count(&mut reader)?; } } else { let input: StdinLock = stdin().lock(); let buf_reader = BufReader::new(input); - let mut reader = ReaderBuilder::new().build(buf_reader)?; + let mut reader = Reader::new(AnyEncoding, buf_reader)?; print_top_level_value_count(&mut reader)?; }; @@ -39,15 +39,9 @@ impl IonCliCommand for CountCommand { } } -fn print_top_level_value_count(reader: &mut Reader) -> Result<()> { +fn print_top_level_value_count(reader: &mut Reader) -> Result<()> { let mut count: usize = 0; - loop { - let item = reader - .next() - .with_context(|| "could not count values in Ion stream")?; - if item == StreamItem::Nothing { - break; - } + while let Some(_) = reader.next()? { count += 1; } println!("{}", count); diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index a17666b..bdbd74a 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -371,7 +371,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// `delimiter` to a comma (`","`) and it will be appended to the value's text representation. fn inspect_scalar<'x>(&mut self, depth: usize, delimiter: &str, value: LazyValue<'x, AnyEncoding>) -> Result<()> { use ExpandedValueSource::*; - let value_literal = match value.lower().source() { + let value_literal = match value.expanded().source() { ValueLiteral(value_literal) => value_literal, // In Ion 1.0, there are no template values or constructed values so we can defer // implementing these. @@ -395,7 +395,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// to the sexp's text representation. fn inspect_sexp<'x>(&mut self, depth: usize, delimiter: &str, sexp: LazySExp<'x, AnyEncoding>) -> Result<()> { use ExpandedSExpSource::*; - let raw_sexp = match sexp.lower().source() { + let raw_sexp = match sexp.expanded().source() { ValueLiteral(raw_sexp) => raw_sexp, Template(_, _, _, _) => todo!("Ion 1.1 template SExp") }; @@ -413,7 +413,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// to the list's text representation. fn inspect_list<'x>(&mut self, depth: usize, delimiter: &str, list: LazyList<'x, AnyEncoding>) -> Result<()> { use ExpandedListSource::*; - let raw_list = match list.lower().source() { + let raw_list = match list.expanded().source() { ValueLiteral(raw_list) => raw_list, Template(_, _, _, _) => todo!("Ion 1.1 template List") }; @@ -430,7 +430,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended /// to the struct's text representation. fn inspect_struct(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct<'_, AnyEncoding>) -> Result<()> { - let raw_struct = match struct_.lower().source() { + let raw_struct = match struct_.expanded().source() { ExpandedStructSource::ValueLiteral(raw_struct) => raw_struct, ExpandedStructSource::Template(_, _, _, _, _) => todo!("Ion 1.1 template Struct") }; @@ -444,7 +444,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { } fn inspect_annotations(&mut self, depth: usize, value: LazyValue) -> Result<()> { - let raw_value = match value.lower().source() { + let raw_value = match value.expanded().source() { ExpandedValueSource::ValueLiteral(raw_value) => raw_value, ExpandedValueSource::Template(_, _) => todo!("Ion 1.1 template value annotations"), ExpandedValueSource::Constructed(_, _) => todo!("Ion 1.1 constructed value annotations") @@ -991,11 +991,19 @@ fn hex_contents(source: &[u8]) -> String { let mut is_first = true; for byte in bytes { if is_first { - write!(buffer, "{:02X?}", byte).unwrap(); + write!(buffer, "{:02x?}", byte).unwrap(); is_first = false; continue; } - write!(buffer, " {:02X?}", byte).unwrap(); + write!(buffer, " {:02x?}", byte).unwrap(); } buffer } + +#[test] +fn do_it() { + let stdout = StandardStream::stdout(ColorChoice::Always); + let mut output: Box = Box::new(stdout.lock()); + let mut reader = SystemReader::new(AnyEncoding, File::open("/tmp/symbols").unwrap()); + IonInspector::new(&mut output, 0, usize::MAX).unwrap().inspect_top_level(&mut reader).unwrap() +} \ No newline at end of file diff --git a/src/bin/ion/commands/beta/symtab/filter.rs b/src/bin/ion/commands/beta/symtab/filter.rs index 375673f..76faab6 100644 --- a/src/bin/ion/commands/beta/symtab/filter.rs +++ b/src/bin/ion/commands/beta/symtab/filter.rs @@ -1,11 +1,10 @@ use crate::commands::{IonCliCommand, WithIonCliArgument}; use anyhow::{bail, Context, Result}; use clap::{Arg, ArgAction, ArgMatches, Command}; -use ion_rs::RawBinaryReader; -use ion_rs::{IonReader, IonResult, IonType, SystemReader, SystemStreamItem}; -use memmap::MmapOptions; use std::fs::File; +use std::io; use std::io::{stdout, BufWriter, Write}; +use new_ion_rs::*; pub struct SymtabFilterCommand; @@ -16,7 +15,7 @@ impl IonCliCommand for SymtabFilterCommand { fn about(&self) -> &'static str { // XXX Currently only supports binary input - "Filters user data out of a binary Ion stream, leaving only the symbol table(s) behind." + "Filters user data out of an Ion stream, leaving only the symbol table(s) behind." } fn configure_args(&self, command: Command) -> Command { @@ -52,21 +51,12 @@ impl IonCliCommand for SymtabFilterCommand { for input_file in input_file_names { let file = File::open(input_file.as_str()) .with_context(|| format!("Could not open file '{}'", &input_file))?; - - let mmap = unsafe { - MmapOptions::new() - .map(&file) - .with_context(|| format!("Could not mmap '{}'", input_file))? - }; - - // Treat the mmap as a byte array. - let ion_data: &[u8] = &mmap[..]; - let raw_reader = RawBinaryReader::new(ion_data); - let mut system_reader = SystemReader::new(raw_reader); - omit_user_data(ion_data, &mut system_reader, &mut output, lift_requested)?; + let mut system_reader = SystemReader::new(AnyEncoding, file); + filter_out_user_data(&mut system_reader, &mut output, lift_requested)?; } } else { - bail!("this command does not yet support reading from STDIN") + let mut system_reader = SystemReader::new(AnyEncoding, io::stdin().lock()); + filter_out_user_data(&mut system_reader, &mut output, lift_requested)?; } output.flush()?; @@ -74,28 +64,45 @@ impl IonCliCommand for SymtabFilterCommand { } } -pub fn omit_user_data( - ion_data: &[u8], - reader: &mut SystemReader>, +pub fn filter_out_user_data( + reader: &mut SystemReader, output: &mut Box, lift_requested: bool, -) -> IonResult<()> { +) -> Result<()> { loop { - match reader.next()? { - SystemStreamItem::VersionMarker(major, minor) => { - output.write_all(&[0xE0, major, minor, 0xEA])?; + match reader.next_item()? { + SystemStreamItem::VersionMarker(marker) => { + output.write_all(marker.span().bytes())?; } - SystemStreamItem::SymbolTableValue(IonType::Struct) => { - if !lift_requested { - output.write_all(reader.raw_annotations_bytes().unwrap_or(&[]))?; + SystemStreamItem::SymbolTable(symtab) => { + let Some(raw_value) = symtab.as_value().raw() else { + // This symbol table came from a macro expansion; there are no encoded bytes + // to pass through. + bail!("found an ephemeral symbol table, which is not yet supported") + }; + if lift_requested { + // Only pass through the value portion of the symbol table, stripping off the + // `$ion_symbol_table` annotation. + output.write_all(raw_value.value_span().bytes())?; + } else { + // Pass through the complete symbol table, preserving the `$ion_symbol_table` + // annotation. + output.write_all(raw_value.span().bytes())?; } - output.write_all(reader.raw_header_bytes().unwrap())?; - let body_range = reader.value_range(); - let body_bytes = &ion_data[body_range]; - output.write_all(body_bytes)?; } - SystemStreamItem::Nothing => return Ok(()), - _ => {} + SystemStreamItem::Value(_) => continue, + SystemStreamItem::EndOfStream(_) => { + return Ok(()); + } + _ => unreachable!("#[non_exhaustive] enum, current variants covered") + }; + // If this is a text encoding, then we need delimiting space to separate + // IVMs from their neighboring system stream items. Consider: + // $ion_1_0$ion_1_0 + // or + // $ion_symbol_table::{}$ion_1_0$ion_symbol_table::{} + if reader.detected_encoding().is_text() { + output.write_all(&[b'\n']).unwrap() } } } diff --git a/src/bin/ion/commands/dump.rs b/src/bin/ion/commands/dump.rs index 4b7ce53..750d4d9 100644 --- a/src/bin/ion/commands/dump.rs +++ b/src/bin/ion/commands/dump.rs @@ -1,7 +1,7 @@ use crate::commands::{IonCliCommand, WithIonCliArgument}; use anyhow::{Context, Result}; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command}; -use ion_rs::*; +use new_ion_rs::*; use std::fs::File; use std::io::{self, stdin, stdout, BufRead, BufReader, Chain, Cursor, Read, StdinLock, Write}; @@ -69,24 +69,26 @@ impl IonCliCommand for DumpCommand { for input_file in input_file_iter { let file = File::open(input_file) .with_context(|| format!("Could not open file '{}'", input_file))?; - let mut reader = if let Some(true) = args.get_one::("no-auto-decompress") { - ReaderBuilder::new().build(file)? + if let Some(true) = args.get_one::("no-auto-decompress") { + let mut reader = Reader::new(AnyEncoding, file)?; + write_in_format(&mut reader, &mut output, format, values)?; } else { let bfile = BufReader::with_capacity(BUF_READER_CAPACITY, file); let zfile = auto_decompressing_reader(bfile, INFER_HEADER_LENGTH)?; - ReaderBuilder::new().build(zfile)? + let mut reader = Reader::new(AnyEncoding, zfile)?; + write_in_format(&mut reader, &mut output, format, values)?; }; - write_in_format(&mut reader, &mut output, format, values)?; } } else { let input: StdinLock = stdin().lock(); - let mut reader = if let Some(true) = args.get_one::("no-auto-decompress") { - ReaderBuilder::new().build(input)? + if let Some(true) = args.get_one::("no-auto-decompress") { + let mut reader = Reader::new(AnyEncoding, input)?; + write_in_format(&mut reader, &mut output, format, values)?; } else { let zinput = auto_decompressing_reader(input, INFER_HEADER_LENGTH)?; - ReaderBuilder::new().build(zinput)? + let mut reader = Reader::new(AnyEncoding, zinput)?; + write_in_format(&mut reader, &mut output, format, values)?; }; - write_in_format(&mut reader, &mut output, format, values)?; } output.flush()?; @@ -104,39 +106,27 @@ pub(crate) fn run(_command: &str, args: &ArgMatches) -> Result<()> { /// Constructs the appropriate writer for the given format, then writes all values found in the /// Reader to the new Writer. If `count` is specified will write at most `count` values. -pub(crate) fn write_in_format( - reader: &mut Reader, +pub(crate) fn write_in_format( + reader: &mut Reader, output: &mut Box, format: &str, count: Option, ) -> IonResult { - // XXX: The text formats below each have additional logic to append a newline because the - // ion-rs writer doesn't handle this automatically like it should. - //TODO: Solve these newline issues, get rid of hack - // https://github.com/amazon-ion/ion-cli/issues/36 - // https://github.com/amazon-ion/ion-rust/issues/437 - const NEWLINE: u8 = 0x0A; let written = match format { "pretty" => { - let mut writer = TextWriterBuilder::pretty().build(output)?; - let values_written = transcribe_n_values(reader, &mut writer, count)?; - writer.output_mut().write_all(&[NEWLINE])?; - Ok(values_written) + let mut writer = Writer::new(v1_0::Text.with_format(TextFormat::Pretty), output)?; + transcribe_n_values(reader, &mut writer, count) } "text" => { - let mut writer = TextWriterBuilder::default().build(output)?; - let values_written = transcribe_n_values(reader, &mut writer, count)?; - writer.output_mut().write_all(&[NEWLINE])?; - Ok(values_written) + let mut writer = Writer::new(v1_0::Text.with_format(TextFormat::Compact), output)?; + transcribe_n_values(reader, &mut writer, count) } "lines" => { - let mut writer = TextWriterBuilder::lines().build(output)?; - let values_written = transcribe_n_values(reader, &mut writer, count)?; - writer.output_mut().write_all(&[NEWLINE])?; - Ok(values_written) + let mut writer = Writer::new(v1_0::Text.with_format(TextFormat::Lines), output)?; + transcribe_n_values(reader, &mut writer, count) } "binary" => { - let mut writer = BinaryWriterBuilder::new().build(output)?; + let mut writer = Writer::new(v1_0::Binary, output)?; transcribe_n_values(reader, &mut writer, count) } unrecognized => unreachable!( @@ -149,95 +139,36 @@ pub(crate) fn write_in_format( /// Writes each value encountered in the Reader to the provided IonWriter. If `count` is specified /// will write at most `count` values. -fn transcribe_n_values( - reader: &mut Reader, - writer: &mut W, +fn transcribe_n_values( + reader: &mut Reader, + writer: &mut Writer>, count: Option, ) -> IonResult { const FLUSH_EVERY_N: usize = 100; let mut values_since_flush: usize = 0; - let mut annotations = vec![]; - let mut index = 0; - loop { - // Could use Option::is_some_and if that reaches stable - if reader.depth() == 0 && matches!(count, Some(n) if n <= index) { + let max_items = count.unwrap_or(usize::MAX); + let mut index: usize = 0; + + while let Some(value) = reader.next()? { + if index >= max_items { break; } - match reader.next()? { - StreamItem::Value(ion_type) | StreamItem::Null(ion_type) => { - if reader.has_annotations() { - annotations.clear(); - for annotation in reader.annotations() { - annotations.push(annotation?); - } - writer.set_annotations(&annotations); - } + writer.write(value)?; - if reader.parent_type() == Some(IonType::Struct) { - writer.set_field_name(reader.field_name()?); - } - - if reader.is_null() { - writer.write_null(ion_type)?; - continue; - } - - use IonType::*; - match ion_type { - Null => unreachable!("null values are handled prior to this match"), - Bool => writer.write_bool(reader.read_bool()?)?, - Int => writer.write_int(&reader.read_int()?)?, - Float => { - let float64 = reader.read_f64()?; - let float32 = float64 as f32; - if float32 as f64 == float64 { - // No data lost during cast; write it as an f32 - writer.write_f32(float32)?; - } else { - writer.write_f64(float64)?; - } - } - Decimal => writer.write_decimal(&reader.read_decimal()?)?, - Timestamp => writer.write_timestamp(&reader.read_timestamp()?)?, - Symbol => writer.write_symbol(reader.read_symbol()?)?, - String => writer.write_string(reader.read_string()?)?, - Clob => writer.write_clob(reader.read_clob()?)?, - Blob => writer.write_blob(reader.read_blob()?)?, - List => { - reader.step_in()?; - writer.step_in(List)?; - } - SExp => { - reader.step_in()?; - writer.step_in(SExp)?; - } - Struct => { - reader.step_in()?; - writer.step_in(Struct)?; - } - } - } - StreamItem::Nothing if reader.depth() > 0 => { - reader.step_out()?; - writer.step_out()?; - } - StreamItem::Nothing => break, - } - if reader.depth() == 0 { - index += 1; - values_since_flush += 1; - if values_since_flush == FLUSH_EVERY_N { - writer.flush()?; - values_since_flush = 0; - } + index += 1; + values_since_flush += 1; + if values_since_flush == FLUSH_EVERY_N { + writer.flush()?; + values_since_flush = 0; } } + writer.flush()?; Ok(index) } -/// Autodetects a compressed byte stream and wraps the original reader +/// Auto-detects a compressed byte stream and wraps the original reader /// into a reader that transparently decompresses. /// /// To support non-seekable readers like `Stdin`, we could have used a @@ -254,8 +185,8 @@ fn auto_decompressing_reader( mut reader: R, header_len: usize, ) -> IonResult -where - R: BufRead + 'static, + where + R: BufRead + 'static, { // read header let mut header_bytes = vec![0; header_len]; diff --git a/src/bin/ion/commands/mod.rs b/src/bin/ion/commands/mod.rs index d55f250..4e84038 100644 --- a/src/bin/ion/commands/mod.rs +++ b/src/bin/ion/commands/mod.rs @@ -1,5 +1,6 @@ use anyhow::anyhow; use clap::{crate_authors, crate_version, Arg, ArgAction, ArgMatches, Command as ClapCommand}; + pub mod beta; pub mod dump; @@ -70,10 +71,6 @@ pub trait IonCliCommand { } } - /// Sets up the pager (e.g. `less`) to which long text output will be directed. The default - /// implementation does not configure a pager. - fn set_up_pager(&self) {} - /// The core logic of the command. /// /// The default implementation assumes this command is a namespace (i.e. a group of subcommands). From d4c617310ece77358e53fd924ab2dcd72c31d506 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 31 May 2024 12:20:26 -0400 Subject: [PATCH 03/13] update `to json` --- src/bin/ion/commands/beta/to/json.rs | 124 +++++++++++++-------------- 1 file changed, 58 insertions(+), 66 deletions(-) diff --git a/src/bin/ion/commands/beta/to/json.rs b/src/bin/ion/commands/beta/to/json.rs index ec0fa2a..5be8ef0 100644 --- a/src/bin/ion/commands/beta/to/json.rs +++ b/src/bin/ion/commands/beta/to/json.rs @@ -1,12 +1,11 @@ use crate::commands::{IonCliCommand, WithIonCliArgument}; use anyhow::{Context, Result}; use clap::{ArgMatches, Command}; -use ion_rs::{Element, ElementReader}; -use ion_rs::{Reader, ReaderBuilder}; use serde_json::{Map, Number, Value as JsonValue}; use std::fs::File; use std::io::{stdin, stdout, BufWriter, Write}; use std::str::FromStr; +use new_ion_rs::*; pub struct ToJsonCommand; @@ -45,15 +44,13 @@ impl IonCliCommand for ToJsonCommand { for input_file in input_file_names { let file = File::open(input_file.as_str()) .with_context(|| format!("Could not open file '{}'", &input_file))?; - let mut reader = ReaderBuilder::new() - .build(file) + let mut reader = Reader::new(AnyEncoding, file) .with_context(|| format!("Input file {} was not valid Ion.", &input_file))?; convert(&mut reader, &mut output)?; } } else { // No input files were specified, run the converter on STDIN. - let mut reader = ReaderBuilder::new() - .build(stdin().lock()) + let mut reader = Reader::new(AnyEncoding, stdin().lock()) .with_context(|| "Input was not valid Ion.")?; convert(&mut reader, &mut output)?; } @@ -63,73 +60,68 @@ impl IonCliCommand for ToJsonCommand { } } -pub fn convert(reader: &mut Reader, output: &mut Box) -> Result<()> { +pub fn convert(reader: &mut Reader, output: &mut Box) -> Result<()> { const FLUSH_EVERY_N: usize = 100; - let mut element_count = 0usize; - for result in reader.elements() { - let element = result.with_context(|| "invalid input")?; - writeln!(output, "{}", to_json_value(&element)?)?; - element_count += 1; - if element_count % FLUSH_EVERY_N == 0 { + let mut value_count = 0usize; + while let Some(value) = reader.next()? { + writeln!(output, "{}", to_json_value(value)?)?; + value_count += 1; + if value_count % FLUSH_EVERY_N == 0 { output.flush()?; } } Ok(()) } -fn to_json_value(element: &Element) -> Result { - if element.is_null() { - Ok(JsonValue::Null) - } else { - use ion_rs::Value::*; - let value = match element.value() { - Null(_ion_type) => JsonValue::Null, - Bool(b) => JsonValue::Bool(*b), - Int(i) => JsonValue::Number( - Number::from_str(&(*i).to_string()) - .with_context(|| format!("{element} could not be turned into a Number"))?, - ), - Float(f) => { - let value = *f; - if value.is_finite() { - JsonValue::Number( - Number::from_f64(value).with_context(|| { - format!("{element} could not be turned into a Number") - })?, - ) - } else { - // +inf, -inf, and nan are not JSON numbers, and are written as null in - // accordance with Ion's JSON down-conversion guidelines. - JsonValue::Null - } +fn to_json_value(value: LazyValue) -> Result { + use ValueRef::*; + let value = match value.read()? { + Null(_) => JsonValue::Null, + Bool(b) => JsonValue::Bool(b), + Int(i) => JsonValue::Number(Number::from(i.expect_i128()?)), + Float(f) if f.is_finite() => JsonValue::Number(Number::from_f64(f).expect("f64 is finite")), + // Special floats like +inf, -inf, and NaN are written as `null` in + // accordance with Ion's JSON down-conversion guidelines. + Float(_f) => JsonValue::Null, + Decimal(d) => { + let mut text = d.to_string().replace('d', "e"); + if text.ends_with(".") { + // If there's a trailing "." with no digits of precision, discard it. JSON's `Number` + // type does not do anything with this information. + let _ = text.pop(); } - Decimal(d) => JsonValue::Number( - Number::from_str(d.to_string().replace('d', "e").as_str()) - .with_context(|| format!("{element} could not be turned into a Number"))?, - ), - Timestamp(t) => JsonValue::String(t.to_string()), - Symbol(s) => s - .text() - .map(|text| JsonValue::String(text.to_owned())) - .unwrap_or_else(|| JsonValue::Null), - String(s) => JsonValue::String(s.text().to_owned()), - Blob(b) | Clob(b) => { - use base64::{engine::general_purpose as base64_encoder, Engine as _}; - let base64_text = base64_encoder::STANDARD.encode(b.as_ref()); - JsonValue::String(base64_text) - } - List(s) | SExp(s) => { - let result: Result> = s.elements().map(to_json_value).collect(); - JsonValue::Array(result?) - } - Struct(s) => { - let result: Result> = s - .fields() - .map(|(k, v)| to_json_value(v).map(|value| (k.text().unwrap().into(), value))) - .collect(); - JsonValue::Object(result?) + JsonValue::Number( + Number::from_str(text.as_str()) + .with_context(|| format!("{d} could not be turned into a Number"))?, + ) + } + Timestamp(t) => JsonValue::String(t.to_string()), + Symbol(s) => s.text() + .map(|text| JsonValue::String(text.to_owned())) + .unwrap_or_else(|| JsonValue::Null), + String(s) => JsonValue::String(s.text().to_owned()), + Blob(b) | Clob(b) => { + use base64::{engine::general_purpose as base64_encoder, Engine as _}; + let base64_text = base64_encoder::STANDARD.encode(b.as_ref()); + JsonValue::String(base64_text) + } + SExp(s) => to_json_array(s.iter())?, + List(l) => to_json_array(l.iter())?, + Struct(s) => { + let mut map = Map::new(); + for field in s { + let field = field?; + let name = field.name()?.text().unwrap_or("$0").to_owned(); + let value = to_json_value(field.value())?; + map.insert(name, value); } - }; - Ok(value) - } + JsonValue::Object(map) + } + }; + Ok(value) +} + +fn to_json_array<'a>(ion_values: impl IntoIterator>>) -> Result { + let result: Result> = ion_values.into_iter().flat_map(|v| v.map(to_json_value)).collect(); + Ok(JsonValue::Array(result?)) } From 1cc0d6635c35aa12a4c2d94b1d373e094ac94aea Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 31 May 2024 12:24:55 -0400 Subject: [PATCH 04/13] Remove references to old version of ion_rs --- Cargo.lock | 72 +--------------------- Cargo.toml | 4 +- src/bin/ion/commands/beta/count.rs | 2 +- src/bin/ion/commands/beta/inspect.rs | 2 +- src/bin/ion/commands/beta/primitive.rs | 2 +- src/bin/ion/commands/beta/symtab/filter.rs | 2 +- src/bin/ion/commands/beta/to/json.rs | 2 +- src/bin/ion/commands/dump.rs | 2 +- 8 files changed, 10 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b859da5..2e6dfe3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -118,12 +118,6 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff" -[[package]] -name = "base64" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" - [[package]] name = "base64" version = "0.21.7" @@ -432,17 +426,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "delegate" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee5df75c70b95bd3aacc8e2fd098797692fb1d54121019c4de481e42f04c8a1" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "delegate" version = "0.12.0" @@ -857,7 +840,6 @@ dependencies = [ "convert_case", "flate2", "infer", - "ion-rs 1.0.0-rc.3", "ion-rs 1.0.0-rc.4", "ion-schema", "matches", @@ -893,28 +875,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "ion-rs" -version = "1.0.0-rc.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4048cdda3ac98a729cdbac81a4dbcae988bdf01696da36244d65c1d234ee93a" -dependencies = [ - "arrayvec", - "base64 0.12.3", - "bumpalo", - "bytes", - "chrono", - "delegate 0.10.0", - "nom", - "num-bigint 0.4.4", - "num-integer", - "num-traits", - "serde", - "serde_with 2.3.3", - "smallvec", - "thiserror", -] - [[package]] name = "ion-rs" version = "1.0.0-rc.4" @@ -929,7 +889,7 @@ dependencies = [ "num-integer", "num-traits", "serde", - "serde_with 3.8.1", + "serde_with", "smallvec", "thiserror", ] @@ -1478,22 +1438,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_with" -version = "2.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07ff71d2c147a7b57362cead5e22f772cd52f6ab31cfcd9edcd7f6aeb2a0afbe" -dependencies = [ - "base64 0.13.1", - "chrono", - "hex", - "indexmap 1.9.3", - "serde", - "serde_json", - "serde_with_macros 2.3.3", - "time", -] - [[package]] name = "serde_with" version = "3.8.1" @@ -1508,22 +1452,10 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "serde_with_macros 3.8.1", + "serde_with_macros", "time", ] -[[package]] -name = "serde_with_macros" -version = "2.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "881b6f881b17d13214e5d494c939ebab463d01264ce1811e9d4ac3a882e7695f" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn 2.0.50", -] - [[package]] name = "serde_with_macros" version = "3.8.1" diff --git a/Cargo.toml b/Cargo.toml index 0cc2632..7bd8e8b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,8 +17,8 @@ clap = { version = "4.0.17", features = ["cargo"] } colored = "2.0.0" flate2 = "1.0" infer = "0.15.0" -ion-rs = { version = "1.0.0-rc.2", features = ["experimental"] } -new-ion-rs = { package = "ion-rs", version = "1.0.0-rc.4", path = "../ion-rust", features = ["experimental"] } +#ion-rs = { version = "1.0.0-rc.2", features = ["experimental"] } +ion-rs = { package = "ion-rs", version = "1.0.0-rc.4", path = "../ion-rust", features = ["experimental"] } # new-ion-rs = { package = "ion-rs", git = "https://github.com/amazon-ion/ion-rust.git", features = ["experimental"] } memmap = "0.7.0" tempfile = "3.2.0" diff --git a/src/bin/ion/commands/beta/count.rs b/src/bin/ion/commands/beta/count.rs index 777e413..ee71181 100644 --- a/src/bin/ion/commands/beta/count.rs +++ b/src/bin/ion/commands/beta/count.rs @@ -1,7 +1,7 @@ use crate::commands::{IonCliCommand, WithIonCliArgument}; use anyhow::{Context, Result}; use clap::{ArgMatches, Command}; -use new_ion_rs::*; +use ion_rs::*; use std::fs::File; use std::io::{stdin, BufReader, StdinLock}; diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index bdbd74a..a748fc7 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -6,7 +6,7 @@ use std::str::FromStr; use anyhow::{Context, Result}; use clap::{Arg, ArgMatches, Command}; -use new_ion_rs::*; +use ion_rs::*; use crate::commands::{IonCliCommand, WithIonCliArgument}; diff --git a/src/bin/ion/commands/beta/primitive.rs b/src/bin/ion/commands/beta/primitive.rs index 472a81a..591395f 100644 --- a/src/bin/ion/commands/beta/primitive.rs +++ b/src/bin/ion/commands/beta/primitive.rs @@ -1,7 +1,7 @@ use crate::commands::IonCliCommand; use anyhow::{Context, Result}; use clap::{Arg, ArgMatches, Command}; -use ion_rs::{VarInt, VarUInt}; +use ion_rs::v1_0::{VarInt, VarUInt}; pub struct PrimitiveCommand; diff --git a/src/bin/ion/commands/beta/symtab/filter.rs b/src/bin/ion/commands/beta/symtab/filter.rs index 76faab6..55b0765 100644 --- a/src/bin/ion/commands/beta/symtab/filter.rs +++ b/src/bin/ion/commands/beta/symtab/filter.rs @@ -4,7 +4,7 @@ use clap::{Arg, ArgAction, ArgMatches, Command}; use std::fs::File; use std::io; use std::io::{stdout, BufWriter, Write}; -use new_ion_rs::*; +use ion_rs::*; pub struct SymtabFilterCommand; diff --git a/src/bin/ion/commands/beta/to/json.rs b/src/bin/ion/commands/beta/to/json.rs index 5be8ef0..e0e7cff 100644 --- a/src/bin/ion/commands/beta/to/json.rs +++ b/src/bin/ion/commands/beta/to/json.rs @@ -5,7 +5,7 @@ use serde_json::{Map, Number, Value as JsonValue}; use std::fs::File; use std::io::{stdin, stdout, BufWriter, Write}; use std::str::FromStr; -use new_ion_rs::*; +use ion_rs::*; pub struct ToJsonCommand; diff --git a/src/bin/ion/commands/dump.rs b/src/bin/ion/commands/dump.rs index 750d4d9..3a4d5f2 100644 --- a/src/bin/ion/commands/dump.rs +++ b/src/bin/ion/commands/dump.rs @@ -1,7 +1,7 @@ use crate::commands::{IonCliCommand, WithIonCliArgument}; use anyhow::{Context, Result}; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command}; -use new_ion_rs::*; +use ion_rs::*; use std::fs::File; use std::io::{self, stdin, stdout, BufRead, BufReader, Chain, Cursor, Read, StdinLock, Write}; From 1d73dc2ef2b026c0923fe69870ca88cff1f48bbe Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sat, 1 Jun 2024 11:51:54 -0400 Subject: [PATCH 05/13] Refactoring to DRY things up --- Cargo.lock | 2 + Cargo.toml | 4 +- src/bin/ion/commands/beta/inspect.rs | 242 +++++++++++++++------------ 3 files changed, 139 insertions(+), 109 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2e6dfe3..9b93dfe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -878,6 +878,8 @@ dependencies = [ [[package]] name = "ion-rs" version = "1.0.0-rc.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87db81c1c2e08fd2de74c8dc62fbc4d6f192a34a883dafe21a00cad7a037e3da" dependencies = [ "arrayvec", "base64 0.12.3", diff --git a/Cargo.toml b/Cargo.toml index 7bd8e8b..1788f3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,9 +17,7 @@ clap = { version = "4.0.17", features = ["cargo"] } colored = "2.0.0" flate2 = "1.0" infer = "0.15.0" -#ion-rs = { version = "1.0.0-rc.2", features = ["experimental"] } -ion-rs = { package = "ion-rs", version = "1.0.0-rc.4", path = "../ion-rust", features = ["experimental"] } -# new-ion-rs = { package = "ion-rs", git = "https://github.com/amazon-ion/ion-rust.git", features = ["experimental"] } +ion-rs = { version = "1.0.0-rc.4", features = ["experimental"] } memmap = "0.7.0" tempfile = "3.2.0" ion-schema = "0.10.0" diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index a748fc7..1446833 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -2,6 +2,7 @@ use std::fmt::Display; use std::fs::File; use std::io; use std::io::Write; +use std::ops::ControlFlow; use std::str::FromStr; use anyhow::{Context, Result}; @@ -16,7 +17,6 @@ use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, StandardStreamLoc // `FileWriter` ignores all requests to emit TTY color escape codes. use crate::file_writer::FileWriter; - // * The output stream could be STDOUT or a file handle, so we use `dyn io::Write` to abstract // over the two implementations. // * The Drop implementation will ensure that the output stream is flushed when the last reference @@ -217,40 +217,21 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// Iterates over the items in `reader`, printing a table section for each top level value. fn inspect_top_level(&mut self, reader: &mut SystemReader) -> Result<()> { + const TOP_LEVEL_DEPTH: usize = 0; self.write_table_header()?; let mut is_first_item = true; let mut has_printed_skip_message = false; loop { let item = reader.next_item()?; - // If the next item isn't `EndOfStream`, check to see whether its final byte offset is - // beyond the configured number of bytes to skip before printing can begin (the value of - // the `--set-bytes` flag). - if !matches!(item, SystemStreamItem::EndOfStream(_)) && self.should_skip(item.raw_stream_item()) { - // If we need to skip it, print a message indicating that some number of items have been skipped. - if !has_printed_skip_message { - self.write_skipping_message(0, "stream items")?; - // We only print this message once, so remember that we've already done this. - has_printed_skip_message = true; - } - // Skip ahead to the next stream item. - continue; - } + let is_last_item = matches!(item, SystemStreamItem::EndOfStream(_)); - // Also check the final byte offset to see if it goes beyond the processing limit set by - // the `--limit-bytes` flag. - if self.is_past_limit(item.raw_stream_item()) { - self.write_limiting_message(0, "ending")?; - // If the limit is reached at the top level, there's nothing more to do. - return Ok(()); + match self.select_action(TOP_LEVEL_DEPTH, &mut has_printed_skip_message, &item.raw_stream_item(), "stream items", "ending")? { + InspectorAction::Skip => continue, + InspectorAction::Inspect => {} + InspectorAction::LimitReached => break, } - // In most cases, we would take this opportunity to print a row separator to create - // a new table section for this top-level item. However, there are two exceptions we - // need to check for: - // 1. The first stream item follows the header and so does not require a row separator. - if !is_first_item - // 2. The end of the stream prints the end of the table, not a row separator. - && !matches!(item, SystemStreamItem::EndOfStream(_)) { + if !is_first_item && !is_last_item { // If this item is neither the first nor last in the stream, print a row separator. write!(self.output, "{ROW_SEPARATOR}")?; } @@ -295,7 +276,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// * `None`, then there is no stream-level entity backing the item (that is: it was the result /// of a macro expansion). Checks to see if the inspector has already completed its /// skipping phase on an earlier item. - fn should_skip(&mut self, maybe_item: Option) -> bool { + fn should_skip(&mut self, maybe_item: &Option) -> bool { match maybe_item { // If this item came from an input literal, see if the input literal ends after // the requested number of bytes to skip. If not, we'll move to the next one. @@ -312,8 +293,9 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// * `None`, then there is no stream-level entity backing the item. These will always be /// inspected; if the e-expression that produced the value was not beyond the limit, /// none of the ephemeral values it produces are either. - fn is_past_limit(&mut self, maybe_item: Option) -> bool { - maybe_item.map(|item| item.range().start >= self.bytes_to_skip + self.limit_bytes).unwrap_or(false) + fn is_past_limit(&mut self, maybe_item: &Option) -> bool { + let limit = self.bytes_to_skip + self.limit_bytes; + maybe_item.as_ref().map(|item| item.range().start >= limit).unwrap_or(false) } /// Convenience method to set the output stream to the specified color/style for the duration of `write_fn` @@ -443,6 +425,23 @@ impl<'a, 'b> IonInspector<'a, 'b> { } } + /// Inspects the struct `struct_`, including all of its fields. If this struct appears inside + /// a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended + /// to the struct's text representation. + fn inspect_symbol_table(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct<'_, AnyEncoding>) -> Result<()> { + let raw_struct = match struct_.expanded().source() { + ExpandedStructSource::ValueLiteral(raw_struct) => raw_struct, + ExpandedStructSource::Template(_, _, _, _, _) => todo!("Ion 1.1 template symbol table") + }; + + use LazyRawValueKind::*; + match raw_struct.as_value().kind() { + Binary_1_0(v) => self.inspect_binary_1_0_symbol_table(depth, delimiter, struct_, raw_struct, v), + Binary_1_1(_) => todo!("Binary Ion 1.1 symbol table"), + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), + } + } + fn inspect_annotations(&mut self, depth: usize, value: LazyValue) -> Result<()> { let raw_value = match value.expanded().source() { ExpandedValueSource::ValueLiteral(raw_value) => raw_value, @@ -460,6 +459,24 @@ impl<'a, 'b> IonInspector<'a, 'b> { // ===== Binary Ion 1.0 ====== + // When inspecting a container, the container's header gets its own row in the output table. + // Unlike a scalar, the bytes of the container body do not begin immediately after the header + // bytes. + // This prints the container's offset, length, and header bytes, leaving the cursor positioned + // at the beginning of the `Text Ion` column. + fn inspect_binary_1_0_container_header(&mut self, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { + let encoding = raw_value.encoded_data(); + let range = encoding.range(); + + let opcode_bytes: &[u8] = raw_value.encoded_data().opcode_span().bytes(); + let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ + IonBytes::new(BytesKind::Opcode, opcode_bytes), + IonBytes::new(BytesKind::TrailingLength, raw_value.encoded_data().trailing_length_span().bytes()), + ]); + + self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter) + } + fn inspect_binary_1_0_sexp<'x>(&mut self, depth: usize, delimiter: &str, sexp: LazySExp<'x, AnyEncoding>, raw_sexp: v1_0::LazyRawBinarySExp<'x>) -> Result<()> { self.inspect_binary_1_0_sequence(depth, "(", "", ")", delimiter, sexp.iter(), raw_sexp, raw_sexp.as_value()) } @@ -478,17 +495,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { nested_raw_values: impl LazyRawSequence<'x, v1_0::Binary>, raw_value: v1_0::LazyRawBinaryValue, ) -> Result<()> { - let encoding = raw_value.encoded_data(); - let range = encoding.range(); - - let opcode_bytes: &[u8] = raw_value.encoded_data().opcode_span().bytes(); - let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ - IonBytes::new(BytesKind::Opcode, opcode_bytes), - IonBytes::new(BytesKind::TrailingLength, raw_value.encoded_data().trailing_length_span().bytes()), - ]); - - self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter)?; - + self.inspect_binary_1_0_container_header(raw_value)?; self.write_indentation(depth)?; self.with_style(text_ion_style(), |out| { write!(out, "{opening_delimiter}\n")?; @@ -498,19 +505,12 @@ impl<'a, 'b> IonInspector<'a, 'b> { let mut has_printed_skip_message = false; for (raw_value_res, value_res) in nested_raw_values.iter().zip(nested_values) { let (raw_nested_value, nested_value) = (raw_value_res?, value_res?); - if self.should_skip(Some(raw_nested_value)) { - if !has_printed_skip_message { - self.write_skipping_message(depth + 1, "values")?; - has_printed_skip_message = true; - } - continue; - } - if self.is_past_limit(Some(raw_nested_value)) { - self.write_limiting_message(depth + 1, "stepping out")?; - break; + match self.select_action(depth + 1, &mut has_printed_skip_message, &Some(raw_nested_value), "values", "stepping out")? { + InspectorAction::Skip => continue, + InspectorAction::Inspect => {} + InspectorAction::LimitReached => break, } self.inspect_value(depth + 1, value_delimiter, nested_value)?; - self.skip_complete = true; } self.write_blank_offset_length_and_bytes(depth)?; @@ -520,16 +520,77 @@ impl<'a, 'b> IonInspector<'a, 'b> { }) } - fn inspect_binary_1_0_struct(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct, raw_struct: LazyRawAnyStruct, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { - let encoding = raw_value.encoded_data(); - let range = encoding.range(); + fn select_action(&mut self, + depth: usize, + has_printed_skip_message: &mut bool, + maybe_item: &Option, + name_of_skipped_item: &str, + name_of_limit_action: &str, + ) -> Result { + if self.should_skip(maybe_item) { + if !*has_printed_skip_message { + self.write_skipping_message(depth, name_of_skipped_item)?; + *has_printed_skip_message = true; + } + return Ok(InspectorAction::Skip); + } + self.skip_complete = true; + + if self.is_past_limit(maybe_item) { + self.write_limiting_message(depth, name_of_limit_action)?; + return Ok(InspectorAction::LimitReached); + } + + Ok(InspectorAction::Inspect) + } + /// Inspects all values (however deeply nested) starting at the current level. + fn inspect_binary_1_0_field(&mut self, depth: usize, has_printed_skip_message: &mut bool, raw_field: LazyRawFieldExpr, field: LazyField) -> Result> { + let (raw_name, _raw_value) = raw_field.expect_name_value()?; + let name = field.name()?; + + match self.select_action(depth, has_printed_skip_message, &Some(raw_field), "fields", "stepping out")? { + InspectorAction::Skip => return Ok(ControlFlow::Continue(())), + InspectorAction::Inspect => {} + InspectorAction::LimitReached => return Ok(ControlFlow::Break(())), + } + + // ===== Field name ===== + let range = raw_name.range(); + let raw_name_bytes = raw_name.span().bytes(); + let offset = range.start; + let length = range.len(); let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ - IonBytes::new(BytesKind::Opcode, encoding.opcode_span().bytes()), - IonBytes::new(BytesKind::TrailingLength, encoding.trailing_length_span().bytes()), + IonBytes::new(BytesKind::FieldId, raw_name_bytes) ]); + self.write_offset_length_and_bytes(offset, length, &mut formatter)?; - self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter)?; + self.write_indentation(depth)?; + self.with_style(field_id_style(), |out| { + IoValueFormatter::new(out).value_formatter().format_symbol(name)?; + Ok(()) + })?; + write!(self.output, ": ")?; + // Print a text Ion comment showing how the field name was encoded, ($SID or text) + self.with_style(comment_style(), |out| { + match raw_name.read()? { + RawSymbolRef::SymbolId(sid) => { + write!(out, " // ${sid}\n") + } + RawSymbolRef::Text(_) => { + write!(out, " // \n") + } + }?; + Ok(()) + })?; + + // ===== Field value ===== + self.inspect_value(depth, ",", field.value())?; + Ok(ControlFlow::Continue(())) + } + + fn inspect_binary_1_0_struct(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct, raw_struct: LazyRawAnyStruct, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { + self.inspect_binary_1_0_container_header(raw_value)?; self.write_indentation(depth)?; self.with_style(text_ion_style(), |out| { @@ -538,55 +599,10 @@ impl<'a, 'b> IonInspector<'a, 'b> { })?; let mut has_printed_skip_message = false; for (raw_field_result, field_result) in raw_struct.iter().zip(struct_.iter()) { - let (raw_field, field) = (raw_field_result?, field_result?); - let (raw_name, raw_value) = raw_field.expect_name_value()?; - let name = field.name()?; - - if self.should_skip(Some(raw_value)) { - if !has_printed_skip_message { - self.write_skipping_message(depth + 1, "fields")?; - has_printed_skip_message = true; - } - continue; - } - self.skip_complete = true; - - if self.is_past_limit(Some(raw_field)) { - self.write_limiting_message(depth + 1, "stepping out")?; - break; + match self.inspect_binary_1_0_field(depth + 1, &mut has_printed_skip_message, raw_field_result?, field_result?)? { + ControlFlow::Continue(_) => {} + ControlFlow::Break(_) => break, } - - // ===== Field name ===== - let range = raw_name.range(); - let raw_name_bytes = raw_name.span().bytes(); - let offset = range.start; - let length = range.len(); - let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ - IonBytes::new(BytesKind::FieldId, raw_name_bytes) - ]); - self.write_offset_length_and_bytes(offset, length, &mut formatter)?; - - self.write_indentation(depth + 1)?; - self.with_style(field_id_style(), |out| { - IoValueFormatter::new(out).value_formatter().format_symbol(name)?; - Ok(()) - })?; - write!(self.output, ": ")?; - // Print a text Ion comment showing how the field name was encoded, ($SID or text) - self.with_style(comment_style(), |out| { - match raw_name.read()? { - RawSymbolRef::SymbolId(sid) => { - write!(out, " // ${sid}\n") - } - RawSymbolRef::Text(_) => { - write!(out, " // \n") - } - }?; - Ok(()) - })?; - - // ===== Field value ===== - self.inspect_value(depth + 1, ",", field.value())?; } // ===== Closing delimiter ===== self.write_blank_offset_length_and_bytes(depth)?; @@ -596,6 +612,11 @@ impl<'a, 'b> IonInspector<'a, 'b> { }) } + + fn inspect_binary_1_0_symbol_table(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct, raw_struct: LazyRawAnyStruct, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { + todo!() + } + fn inspect_binary_1_0_annotations(&mut self, depth: usize, value: LazyValue, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { let encoding = raw_value.encoded_annotations().unwrap(); let range = encoding.range(); @@ -751,6 +772,15 @@ impl<'a, 'b> IonInspector<'a, 'b> { } } +pub enum InspectorAction { + /// The current value appears before the offset specified by `--skip-bytes`. Ignore it. + Skip, + /// The current value appears after `--skip-bytes` and before `--limit-bytes`. Inspect it. + Inspect, + /// The current value appears after `--limit-bytes`, stop inspecting values. + LimitReached, +} + // ===== Named styles ===== fn header_style() -> ColorSpec { From e07f24fe9d874e859b7a0be8896c7e75326e18b7 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sat, 1 Jun 2024 14:50:39 -0400 Subject: [PATCH 06/13] `inspect` now shows symbol IDs assigned to LST symbols --- src/bin/ion/commands/beta/inspect.rs | 273 ++++++++++++++++++++------- 1 file changed, 210 insertions(+), 63 deletions(-) diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index 1446833..e93554d 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -2,12 +2,12 @@ use std::fmt::Display; use std::fs::File; use std::io; use std::io::Write; -use std::ops::ControlFlow; use std::str::FromStr; use anyhow::{Context, Result}; use clap::{Arg, ArgMatches, Command}; use ion_rs::*; +use ion_rs::v1_0::LazyRawBinaryValue; use crate::commands::{IonCliCommand, WithIonCliArgument}; @@ -177,10 +177,10 @@ fn inspect_input( const VERTICAL_LINE: &str = "│"; const START_OF_HEADER: &str = "┌──────────────┬──────────────┬─────────────────────────┬──────────────────────┐"; const END_OF_HEADER: &str = "├──────────────┼──────────────┼─────────────────────────┼──────────────────────┘"; -const ROW_SEPARATOR: &str = r#"├──────────────┼──────────────┼─────────────────────────┤ -"#; -const END_OF_TABLE: &str = r#"└──────────────┴──────────────┴─────────────────────────┘ -"#; +const ROW_SEPARATOR: &str = r#" +├──────────────┼──────────────┼─────────────────────────┤"#; +const END_OF_TABLE: &str = r#" +└──────────────┴──────────────┴─────────────────────────┘"#; struct IonInspector<'a, 'b> { output: &'a mut OutputRef<'b>, @@ -197,6 +197,17 @@ const TEXT_WRITER_INITIAL_BUFFER_SIZE: usize = 128; // The number of hex-encoded bytes to show in each row of the `Binary Ion` column. const BYTES_PER_ROW: usize = 8; +// Friendly trait alias (by way of an empty extension) for a closure that takes an output reference +// and a value and writes a comment for that value. +trait CommentFn<'x>: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result<()> {} + +impl<'x, F> CommentFn<'x> for F where F: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result<()> {} + +/// Returns a `CommentFn` implementation that does nothing. +fn no_comment<'x>() -> impl CommentFn<'x> { + |_, _| Ok(()) +} + impl<'a, 'b> IonInspector<'a, 'b> { fn new( out: &'a mut OutputRef<'b>, @@ -222,6 +233,9 @@ impl<'a, 'b> IonInspector<'a, 'b> { let mut is_first_item = true; let mut has_printed_skip_message = false; loop { + // TODO: This does not account for shared symbol table imports. However, the CLI does not + // yet support specifying a catalog, so it's correct enough for the moment. + let next_symbol_id = reader.symbol_table().len(); let item = reader.next_item()?; let is_last_item = matches!(item, SystemStreamItem::EndOfStream(_)); @@ -238,11 +252,10 @@ impl<'a, 'b> IonInspector<'a, 'b> { match item { SystemStreamItem::SymbolTable(lazy_struct) => { - let lazy_value = lazy_struct.as_value(); - self.inspect_value(0, "", lazy_value)?; + self.inspect_symbol_table(next_symbol_id, lazy_struct)?; } SystemStreamItem::Value(lazy_value) => { - self.inspect_value(0, "", lazy_value)?; + self.inspect_value(0, "", lazy_value, no_comment())?; } SystemStreamItem::VersionMarker(marker) => { self.inspect_ivm(marker)?; @@ -316,9 +329,15 @@ impl<'a, 'b> IonInspector<'a, 'b> { }) } + /// Convenience method to move output to the next line. + fn newline(&mut self) -> Result<()> { + Ok(self.output.write_all(b"\n")?) + } + /// Inspects an Ion Version Marker. fn inspect_ivm(&mut self, marker: LazyRawAnyVersionMarker<'_>) -> Result<()> { const BINARY_IVM_LENGTH: usize = 4; + self.newline()?; let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![IonBytes::new(BytesKind::VersionMarker, marker.span().bytes())]); self.write_offset_length_and_bytes(marker.range().start, BINARY_IVM_LENGTH, &mut formatter)?; self.with_style(BytesKind::VersionMarker.style(), |out| { @@ -328,7 +347,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { })?; self.with_style(comment_style(), |out| { - write!(out, " // Version marker\n")?; + write!(out, " // Version marker")?; Ok(()) })?; self.output.reset()?; @@ -336,22 +355,24 @@ impl<'a, 'b> IonInspector<'a, 'b> { } /// Inspects all values (however deeply nested) starting at the current level. - fn inspect_value(&mut self, depth: usize, delimiter: &str, value: LazyValue<'_, AnyEncoding>) -> Result<()> { + fn inspect_value<'x>(&mut self, depth: usize, delimiter: &str, value: LazyValue<'x, AnyEncoding>, comment_fn: impl CommentFn<'x>) -> Result<()> { use ValueRef::*; + self.newline()?; if value.has_annotations() { self.inspect_annotations(depth, value)?; + self.newline()?; } match value.read()? { SExp(sexp) => self.inspect_sexp(depth, delimiter, sexp), List(list) => self.inspect_list(depth, delimiter, list), Struct(struct_) => self.inspect_struct(depth, delimiter, struct_), - _ => self.inspect_scalar(depth, delimiter, value), + _ => self.inspect_scalar(depth, delimiter, value, comment_fn), } } /// Inspects the scalar `value`. If this value appears in a list or struct, the caller can set /// `delimiter` to a comma (`","`) and it will be appended to the value's text representation. - fn inspect_scalar<'x>(&mut self, depth: usize, delimiter: &str, value: LazyValue<'x, AnyEncoding>) -> Result<()> { + fn inspect_scalar<'x>(&mut self, depth: usize, delimiter: &str, value: LazyValue<'x, AnyEncoding>, comment_fn: impl CommentFn<'x>) -> Result<()> { use ExpandedValueSource::*; let value_literal = match value.expanded().source() { ValueLiteral(value_literal) => value_literal, @@ -365,7 +386,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { // Check what encoding this is. At the moment, only binary Ion 1.0 is supported. match value_literal.kind() { Binary_1_0(bin_val) => { - self.inspect_binary_1_0_scalar(depth, delimiter, value, bin_val) + self.inspect_binary_1_0_scalar(depth, delimiter, value, bin_val, comment_fn) } Binary_1_1(_) => todo!("Binary Ion 1.1 scalars"), Text_1_0(_) | Text_1_1(_) => unreachable!("text value") @@ -425,10 +446,12 @@ impl<'a, 'b> IonInspector<'a, 'b> { } } - /// Inspects the struct `struct_`, including all of its fields. If this struct appears inside - /// a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended - /// to the struct's text representation. - fn inspect_symbol_table(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct<'_, AnyEncoding>) -> Result<()> { + fn inspect_symbol_table(&mut self, next_symbol_id: usize, struct_: LazyStruct<'_, AnyEncoding>) -> Result<()> { + let value = struct_.as_value(); + if value.has_annotations() { + self.newline()?; + self.inspect_annotations(0, value)?; + } let raw_struct = match struct_.expanded().source() { ExpandedStructSource::ValueLiteral(raw_struct) => raw_struct, ExpandedStructSource::Template(_, _, _, _, _) => todo!("Ion 1.1 template symbol table") @@ -436,7 +459,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { use LazyRawValueKind::*; match raw_struct.as_value().kind() { - Binary_1_0(v) => self.inspect_binary_1_0_symbol_table(depth, delimiter, struct_, raw_struct, v), + Binary_1_0(v) => self.inspect_binary_1_0_symbol_table(next_symbol_id, struct_, raw_struct, v), Binary_1_1(_) => todo!("Binary Ion 1.1 symbol table"), Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), } @@ -477,12 +500,42 @@ impl<'a, 'b> IonInspector<'a, 'b> { self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter) } - fn inspect_binary_1_0_sexp<'x>(&mut self, depth: usize, delimiter: &str, sexp: LazySExp<'x, AnyEncoding>, raw_sexp: v1_0::LazyRawBinarySExp<'x>) -> Result<()> { - self.inspect_binary_1_0_sequence(depth, "(", "", ")", delimiter, sexp.iter(), raw_sexp, raw_sexp.as_value()) + fn inspect_binary_1_0_sexp<'x>(&mut self, + depth: usize, + delimiter: &str, + sexp: LazySExp<'x, AnyEncoding>, + raw_sexp: v1_0::LazyRawBinarySExp<'x>, + ) -> Result<()> { + self.inspect_binary_1_0_sequence( + depth, + "(", + "", + ")", + delimiter, + sexp.iter(), + raw_sexp, + raw_sexp.as_value(), + no_comment(), + ) } - fn inspect_binary_1_0_list<'x>(&mut self, depth: usize, delimiter: &str, list: LazyList<'x, AnyEncoding>, raw_list: v1_0::LazyRawBinaryList<'x>) -> Result<()> { - self.inspect_binary_1_0_sequence(depth, "[", ",", "]", delimiter, list.iter(), raw_list, raw_list.as_value()) + fn inspect_binary_1_0_list<'x>(&mut self, + depth: usize, + delimiter: &str, + list: LazyList<'x, AnyEncoding>, + raw_list: v1_0::LazyRawBinaryList<'x>, + ) -> Result<()> { + self.inspect_binary_1_0_sequence( + depth, + "[", + ",", + "]", + delimiter, + list.iter(), + raw_list, + raw_list.as_value(), + no_comment(), + ) } fn inspect_binary_1_0_sequence<'x>(&mut self, @@ -493,7 +546,8 @@ impl<'a, 'b> IonInspector<'a, 'b> { trailing_delimiter: &str, nested_values: impl IntoIterator>>, nested_raw_values: impl LazyRawSequence<'x, v1_0::Binary>, - raw_value: v1_0::LazyRawBinaryValue, + raw_value: LazyRawBinaryValue, + mut value_comment_fn: impl CommentFn<'x>, ) -> Result<()> { self.inspect_binary_1_0_container_header(raw_value)?; self.write_indentation(depth)?; @@ -510,12 +564,16 @@ impl<'a, 'b> IonInspector<'a, 'b> { InspectorAction::Inspect => {} InspectorAction::LimitReached => break, } - self.inspect_value(depth + 1, value_delimiter, nested_value)?; + self.inspect_value(depth + 1, value_delimiter, nested_value, no_comment())?; + self.output.set_color(&comment_style())?; + value_comment_fn(self.output, nested_value)?; + self.output.reset()?; } + self.newline()?; self.write_blank_offset_length_and_bytes(depth)?; self.with_style(text_ion_style(), |out| { - write!(out, "{closing_delimiter}{trailing_delimiter}\n")?; + write!(out, "{closing_delimiter}{trailing_delimiter}")?; Ok(()) }) } @@ -544,18 +602,8 @@ impl<'a, 'b> IonInspector<'a, 'b> { Ok(InspectorAction::Inspect) } - /// Inspects all values (however deeply nested) starting at the current level. - fn inspect_binary_1_0_field(&mut self, depth: usize, has_printed_skip_message: &mut bool, raw_field: LazyRawFieldExpr, field: LazyField) -> Result> { - let (raw_name, _raw_value) = raw_field.expect_name_value()?; - let name = field.name()?; - - match self.select_action(depth, has_printed_skip_message, &Some(raw_field), "fields", "stepping out")? { - InspectorAction::Skip => return Ok(ControlFlow::Continue(())), - InspectorAction::Inspect => {} - InspectorAction::LimitReached => return Ok(ControlFlow::Break(())), - } - - // ===== Field name ===== + fn inspect_binary_1_0_field_name(&mut self, depth: usize, raw_name: LazyRawAnyFieldName, name: SymbolRef) -> Result<()> { + self.newline()?; let range = raw_name.range(); let raw_name_bytes = raw_name.span().bytes(); let offset = range.start; @@ -564,7 +612,6 @@ impl<'a, 'b> IonInspector<'a, 'b> { IonBytes::new(BytesKind::FieldId, raw_name_bytes) ]); self.write_offset_length_and_bytes(offset, length, &mut formatter)?; - self.write_indentation(depth)?; self.with_style(field_id_style(), |out| { IoValueFormatter::new(out).value_formatter().format_symbol(name)?; @@ -575,49 +622,142 @@ impl<'a, 'b> IonInspector<'a, 'b> { self.with_style(comment_style(), |out| { match raw_name.read()? { RawSymbolRef::SymbolId(sid) => { - write!(out, " // ${sid}\n") + write!(out, " // ${sid}") } RawSymbolRef::Text(_) => { - write!(out, " // \n") + write!(out, " // ") } }?; Ok(()) - })?; + }) + } - // ===== Field value ===== - self.inspect_value(depth, ",", field.value())?; - Ok(ControlFlow::Continue(())) + /// Inspects all values (however deeply nested) starting at the current level. + fn inspect_binary_1_0_field(&mut self, depth: usize, field: LazyField, raw_field: LazyRawFieldExpr) -> Result<()> { + let (raw_name, _raw_value) = raw_field.expect_name_value()?; + let name = field.name()?; + + self.inspect_binary_1_0_field_name(depth, raw_name, name)?; + self.inspect_value(depth, ",", field.value(), no_comment())?; + Ok(()) } - fn inspect_binary_1_0_struct(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct, raw_struct: LazyRawAnyStruct, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { + fn inspect_binary_1_0_struct(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct, raw_struct: LazyRawAnyStruct, raw_value: LazyRawBinaryValue) -> Result<()> { self.inspect_binary_1_0_container_header(raw_value)?; self.write_indentation(depth)?; self.with_style(text_ion_style(), |out| { - write!(out, "{{\n")?; + write!(out, "{{")?; Ok(()) })?; let mut has_printed_skip_message = false; for (raw_field_result, field_result) in raw_struct.iter().zip(struct_.iter()) { - match self.inspect_binary_1_0_field(depth + 1, &mut has_printed_skip_message, raw_field_result?, field_result?)? { - ControlFlow::Continue(_) => {} - ControlFlow::Break(_) => break, + let field = field_result?; + let raw_field = raw_field_result?; + match self.select_action(depth + 1, &mut has_printed_skip_message, &Some(raw_field), "fields", "stepping out")? { + InspectorAction::Skip => continue, + InspectorAction::Inspect => self.inspect_binary_1_0_field(depth + 1, field, raw_field)?, + InspectorAction::LimitReached => break, } } // ===== Closing delimiter ===== + self.newline()?; self.write_blank_offset_length_and_bytes(depth)?; self.with_style(text_ion_style(), |out| { - write!(out, "}}{delimiter}\n")?; + write!(out, "}}{delimiter}")?; Ok(()) }) } + fn inspect_binary_1_0_symbol_table(&mut self, next_symbol_id: usize, struct_: LazyStruct, raw_struct: LazyRawAnyStruct, raw_value: LazyRawBinaryValue) -> Result<()> { + // The processing for a symbol table is very similar to that of a regular struct, + // but with special handling defined for the `imports` and `symbols` fields when present. + // Because symbol tables are always at the top level, there is no need for indentation. + const TOP_LEVEL_DEPTH: usize = 0; + self.newline()?; + self.inspect_binary_1_0_container_header(raw_value)?; + self.with_style(text_ion_style(), |out| { + write!(out, "{{")?; + Ok(()) + })?; + let mut has_printed_skip_message = false; + for (raw_field_result, field_result) in raw_struct.iter().zip(struct_.iter()) { + let field = field_result?; + let raw_field = raw_field_result?; - fn inspect_binary_1_0_symbol_table(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct, raw_struct: LazyRawAnyStruct, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { - todo!() + match self.select_action(TOP_LEVEL_DEPTH + 1, &mut has_printed_skip_message, &Some(raw_field), "fields", "stepping out")? { + InspectorAction::Skip => continue, + InspectorAction::Inspect if field.name()? == "symbols" => self.inspect_lst_symbols_field(next_symbol_id, field, raw_field)?, + // TODO: if field.name()? == "imports" => {} + InspectorAction::Inspect => self.inspect_binary_1_0_field(TOP_LEVEL_DEPTH + 1, field, raw_field)?, + InspectorAction::LimitReached => break, + } + } + // ===== Closing delimiter ===== + self.newline()?; + self.write_blank_offset_length_and_bytes(TOP_LEVEL_DEPTH)?; + self.with_style(text_ion_style(), |out| { + write!(out, "}}")?; + Ok(()) + }) + } + + fn inspect_lst_symbols_field(&mut self, mut next_symbol_id: usize, field: LazyField, raw_field: LazyRawFieldExpr) -> Result<()> { + const SYMBOL_LIST_DEPTH: usize = 1; + let (raw_name, raw_value) = raw_field.expect_name_value()?; + self.inspect_binary_1_0_field_name(SYMBOL_LIST_DEPTH, raw_name, field.name()?)?; + let symbols_list = match field.value().read()? { + ValueRef::Symbol(s) if s == "$ion_symbol_table" => return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| Ok(out.write_all(b" // Append new symbols")?)), + ValueRef::List(list) => list, + _ => return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| Ok(out.write_all(b" // Invalid, ignored")?)), + }; + + let raw_symbols_list = raw_value.read()?.expect_list()?; + let nested_raw_values = raw_symbols_list.iter(); + let nested_values = symbols_list.iter(); + + let LazyRawValueKind::Binary_1_0(raw_value) = raw_value.kind() else { + unreachable!("binary 1.0 encoding already confirmed"); + }; + + self.newline()?; + self.inspect_binary_1_0_container_header(raw_value)?; + self.write_indentation(SYMBOL_LIST_DEPTH)?; + self.with_style(text_ion_style(), |out| { + write!(out, "[")?; + Ok(()) + })?; + + let mut has_printed_skip_message = false; + for (raw_value_res, value_res) in nested_raw_values.zip(nested_values) { + let (raw_nested_value, nested_value) = (raw_value_res?, value_res?); + match self.select_action(SYMBOL_LIST_DEPTH + 1, &mut has_printed_skip_message, &Some(raw_nested_value), "values", "stepping out")? { + InspectorAction::Skip => continue, + InspectorAction::Inspect => {} + InspectorAction::LimitReached => break, + } + + self.output.set_color(&comment_style())?; + self.inspect_value(SYMBOL_LIST_DEPTH + 1, ",", nested_value, |out, value| { + match value.read()? { + ValueRef::String(_s) => write!(out, " // -> ${next_symbol_id}"), + _other => write!(out, " // -> ${next_symbol_id} (no text)"), + }?; + next_symbol_id += 1; + Ok(()) + })?; + self.output.reset()?; + } + + self.newline()?; + self.write_blank_offset_length_and_bytes(SYMBOL_LIST_DEPTH)?; + self.with_style(text_ion_style(), |out| { + write!(out, "],")?; + Ok(()) + }) } - fn inspect_binary_1_0_annotations(&mut self, depth: usize, value: LazyValue, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { + fn inspect_binary_1_0_annotations(&mut self, depth: usize, value: LazyValue, raw_value: LazyRawBinaryValue) -> Result<()> { let encoding = raw_value.encoded_annotations().unwrap(); let range = encoding.range(); @@ -647,20 +787,25 @@ impl<'a, 'b> IonInspector<'a, 'b> { RawSymbolRef::Text(_) => write!(out, ""), }?; } - write!(out, "\n")?; Ok(()) })?; Ok(()) } - fn inspect_binary_1_0_scalar(&mut self, depth: usize, delimiter: &str, value: LazyValue, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { + fn inspect_binary_1_0_scalar<'x>(&mut self, depth: usize, delimiter: &str, value: LazyValue<'x, AnyEncoding>, raw_value: LazyRawBinaryValue, mut comment_fn: impl CommentFn<'x>) -> Result<()> { let encoding = raw_value.encoded_data(); let range = encoding.range(); let opcode_bytes = IonBytes::new(BytesKind::Opcode, encoding.opcode_span().bytes()); let length_bytes = IonBytes::new(BytesKind::TrailingLength, encoding.trailing_length_span().bytes()); - let body_bytes = IonBytes::new(BytesKind::ValueBody, encoding.body_span().bytes()); + // TODO: There is a bug in the `body_span()` method that causes it fail when the value is annotated. + // When it's fixed, this can be: + // let body_bytes = IonBytes::new(BytesKind::ValueBody, body_span); + let body_len = raw_value.encoded_data().body_range().len(); + let total_len = raw_value.encoded_data().range().len(); + let body_bytes = IonBytes::new(BytesKind::ValueBody, &encoding.span().bytes()[total_len - body_len..]); + let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![opcode_bytes, length_bytes, body_bytes]); @@ -682,12 +827,15 @@ impl<'a, 'b> IonInspector<'a, 'b> { self.text_writer.output_mut().clear(); self.output.write_all(delimiter.as_bytes())?; self.output.reset()?; - write!(self.output, "\n")?; + + self.output.set_color(&comment_style())?; + comment_fn(self.output, value)?; + self.output.reset()?; while !formatter.is_empty() { + self.newline()?; self.write_offset_length_and_bytes("", "", &mut formatter)?; self.write_indentation(depth)?; - write!(self.output, "\n")?; } Ok(()) @@ -708,7 +856,6 @@ impl<'a, 'b> IonInspector<'a, 'b> { self.write_with_style(header_style(), " Text Ion ")?; write!(self.output, "{VERTICAL_LINE}\n")?; self.output.write_all(END_OF_HEADER.as_bytes())?; - write!(self.output, "\n")?; Ok(()) } @@ -750,10 +897,10 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// Prints a row with an ellipsis (`...`) in the first three columns, and a text Ion comment in /// the final column indicating what is being skipped over. fn write_skipping_message(&mut self, depth: usize, name_of_skipped_item: &str) -> Result<()> { - write!(self.output, "{VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:23} {VERTICAL_LINE} ", "...", "...", "...")?; + write!(self.output, "\n{VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:23} {VERTICAL_LINE} ", "...", "...", "...")?; self.write_indentation(depth)?; self.with_style(comment_style(), |out| { - write!(out, "// ...skipping {name_of_skipped_item}...\n")?; + write!(out, "// ...skipping {name_of_skipped_item}...")?; Ok(()) }) } @@ -762,11 +909,11 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// the final column indicating that we have reached the maximum number of bytes to process /// as determined by the `--limit-bytes` flag. fn write_limiting_message(&mut self, depth: usize, action: &str) -> Result<()> { - write!(self.output, "{VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:23} {VERTICAL_LINE} ", "...", "...", "...")?; + write!(self.output, "\n{VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:>12} {VERTICAL_LINE} {:23} {VERTICAL_LINE} ", "...", "...", "...")?; self.write_indentation(depth)?; let limit_bytes = self.limit_bytes; self.with_style(comment_style(), |out| { - write!(out, "// --limit-bytes {} reached, {action}.\n", limit_bytes)?; + write!(out, "// --limit-bytes {} reached, {action}.", limit_bytes)?; Ok(()) }) } From 73e3c91c06ec48bacdde3eeb752f7d2265b4d722 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sat, 1 Jun 2024 15:07:46 -0400 Subject: [PATCH 07/13] Prints SID used to encode Symbol values. --- src/bin/ion/commands/beta/inspect.rs | 33 +++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index e93554d..b4e4aa1 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -7,7 +7,7 @@ use std::str::FromStr; use anyhow::{Context, Result}; use clap::{Arg, ArgMatches, Command}; use ion_rs::*; -use ion_rs::v1_0::LazyRawBinaryValue; +use ion_rs::v1_0::{LazyRawBinaryValue, RawValueRef}; use crate::commands::{IonCliCommand, WithIonCliArgument}; @@ -197,15 +197,16 @@ const TEXT_WRITER_INITIAL_BUFFER_SIZE: usize = 128; // The number of hex-encoded bytes to show in each row of the `Binary Ion` column. const BYTES_PER_ROW: usize = 8; -// Friendly trait alias (by way of an empty extension) for a closure that takes an output reference -// and a value and writes a comment for that value. -trait CommentFn<'x>: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result<()> {} +/// Friendly trait alias (by way of an empty extension) for a closure that takes an output reference +/// and a value and writes a comment for that value. Returns `true` if it wrote a comment, `false` +/// otherwise. +trait CommentFn<'x>: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result {} -impl<'x, F> CommentFn<'x> for F where F: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result<()> {} +impl<'x, F> CommentFn<'x> for F where F: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result {} /// Returns a `CommentFn` implementation that does nothing. fn no_comment<'x>() -> impl CommentFn<'x> { - |_, _| Ok(()) + |_, _| Ok(false) } impl<'a, 'b> IonInspector<'a, 'b> { @@ -707,9 +708,15 @@ impl<'a, 'b> IonInspector<'a, 'b> { let (raw_name, raw_value) = raw_field.expect_name_value()?; self.inspect_binary_1_0_field_name(SYMBOL_LIST_DEPTH, raw_name, field.name()?)?; let symbols_list = match field.value().read()? { - ValueRef::Symbol(s) if s == "$ion_symbol_table" => return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| Ok(out.write_all(b" // Append new symbols")?)), + ValueRef::Symbol(s) if s == "$ion_symbol_table" => return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| { + out.write_all(b" // Appends new symbols")?; + Ok(true) + }), ValueRef::List(list) => list, - _ => return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| Ok(out.write_all(b" // Invalid, ignored")?)), + _ => return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| { + out.write_all(b" // Invalid, ignored")?; + Ok(true) + }), }; let raw_symbols_list = raw_value.read()?.expect_list()?; @@ -744,7 +751,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { _other => write!(out, " // -> ${next_symbol_id} (no text)"), }?; next_symbol_id += 1; - Ok(()) + Ok(true) })?; self.output.reset()?; } @@ -829,7 +836,13 @@ impl<'a, 'b> IonInspector<'a, 'b> { self.output.reset()?; self.output.set_color(&comment_style())?; - comment_fn(self.output, value)?; + let wrote_comment = comment_fn(self.output, value)?; + if let RawValueRef::Symbol(RawSymbolRef::SymbolId(symbol_id)) = raw_value.read()? { + match wrote_comment { + true => write!(self.output, " (${symbol_id})"), + false => write!(self.output, " // ${symbol_id}"), + }?; + } self.output.reset()?; while !formatter.is_empty() { From 592c27f6deaef7d81fb7b13388f8dec4addd8ce8 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sat, 1 Jun 2024 15:11:12 -0400 Subject: [PATCH 08/13] Removes dependency on `memmap` --- Cargo.lock | 11 ----------- Cargo.toml | 1 - 2 files changed, 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9b93dfe..b44c121 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -843,7 +843,6 @@ dependencies = [ "ion-rs 1.0.0-rc.4", "ion-schema", "matches", - "memmap", "pager", "rstest", "serde", @@ -994,16 +993,6 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" -[[package]] -name = "memmap" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "minimal-lexical" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 1788f3a..719607b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,6 @@ colored = "2.0.0" flate2 = "1.0" infer = "0.15.0" ion-rs = { version = "1.0.0-rc.4", features = ["experimental"] } -memmap = "0.7.0" tempfile = "3.2.0" ion-schema = "0.10.0" serde = { version = "1.0.163", features = ["derive"] } From e59676dccf908656a17ca2dfb39a89ae8c875297 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sat, 1 Jun 2024 20:56:05 -0400 Subject: [PATCH 09/13] formatting cleanup, set ion_rs dep to v1.0.0-rc.5 --- Cargo.lock | 6 +- Cargo.toml | 2 +- src/bin/ion/commands/beta/generate/utils.rs | 2 +- src/bin/ion/commands/beta/inspect.rs | 480 ++++++++++++++------ src/bin/ion/commands/beta/mod.rs | 6 +- src/bin/ion/commands/beta/symtab/filter.rs | 4 +- src/bin/ion/commands/beta/to/json.rs | 22 +- src/bin/ion/commands/dump.rs | 4 +- src/bin/ion/file_writer.rs | 8 +- tests/cli.rs | 281 ++++++------ tests/code-gen-tests.rs | 7 +- 11 files changed, 510 insertions(+), 312 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b44c121..89d980a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -840,7 +840,7 @@ dependencies = [ "convert_case", "flate2", "infer", - "ion-rs 1.0.0-rc.4", + "ion-rs 1.0.0-rc.5", "ion-schema", "matches", "pager", @@ -876,9 +876,9 @@ dependencies = [ [[package]] name = "ion-rs" -version = "1.0.0-rc.4" +version = "1.0.0-rc.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87db81c1c2e08fd2de74c8dc62fbc4d6f192a34a883dafe21a00cad7a037e3da" +checksum = "ae6628b313b01f34e167393a688a78ce907ff7307cb9e4a93c9d3599cabb1b03" dependencies = [ "arrayvec", "base64 0.12.3", diff --git a/Cargo.toml b/Cargo.toml index 719607b..2e62e5d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ clap = { version = "4.0.17", features = ["cargo"] } colored = "2.0.0" flate2 = "1.0" infer = "0.15.0" -ion-rs = { version = "1.0.0-rc.4", features = ["experimental"] } +ion-rs = { version = "1.0.0-rc.5", features = ["experimental"] } tempfile = "3.2.0" ion-schema = "0.10.0" serde = { version = "1.0.163", features = ["derive"] } diff --git a/src/bin/ion/commands/beta/generate/utils.rs b/src/bin/ion/commands/beta/generate/utils.rs index b2fdcae..b3852d9 100644 --- a/src/bin/ion/commands/beta/generate/utils.rs +++ b/src/bin/ion/commands/beta/generate/utils.rs @@ -5,7 +5,7 @@ use serde::Serialize; use std::fmt::{Display, Formatter}; /// Represents a field that will be added to generated data model. -/// This will be used by the template engine to fill properties of a struct/classs. +/// This will be used by the template engine to fill properties of a struct/class. #[derive(Serialize)] pub struct Field { pub(crate) name: String, diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index b4e4aa1..a55de7a 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -6,8 +6,8 @@ use std::str::FromStr; use anyhow::{Context, Result}; use clap::{Arg, ArgMatches, Command}; -use ion_rs::*; use ion_rs::v1_0::{LazyRawBinaryValue, RawValueRef}; +use ion_rs::*; use crate::commands::{IonCliCommand, WithIonCliArgument}; @@ -143,13 +143,7 @@ value start after `--skip-bytes`. } else { let stdin_lock = io::stdin().lock(); // If no input file was specified, run the inspector on STDIN. - inspect_input( - "STDIN", - stdin_lock, - &mut output, - bytes_to_skip, - limit_bytes, - )?; + inspect_input("STDIN", stdin_lock, &mut output, bytes_to_skip, limit_bytes)?; } Ok(()) } @@ -167,7 +161,8 @@ fn inspect_input( let mut reader = SystemReader::new(AnyEncoding, input); let mut inspector = IonInspector::new(output, bytes_to_skip, limit_bytes)?; // This inspects all values at the top level, recursing as necessary. - inspector.inspect_top_level(&mut reader) + inspector + .inspect_top_level(&mut reader) .with_context(|| format!("input: {input_name}"))?; Ok(()) } @@ -175,8 +170,10 @@ fn inspect_input( // See the Wikipedia page for Unicode Box Drawing[1] for other potentially useful glyphs. // [1] https://en.wikipedia.org/wiki/Box-drawing_characters#Unicode const VERTICAL_LINE: &str = "│"; -const START_OF_HEADER: &str = "┌──────────────┬──────────────┬─────────────────────────┬──────────────────────┐"; -const END_OF_HEADER: &str = "├──────────────┼──────────────┼─────────────────────────┼──────────────────────┘"; +const START_OF_HEADER: &str = + "┌──────────────┬──────────────┬─────────────────────────┬──────────────────────┐"; +const END_OF_HEADER: &str = + "├──────────────┼──────────────┼─────────────────────────┼──────────────────────┘"; const ROW_SEPARATOR: &str = r#" ├──────────────┼──────────────┼─────────────────────────┤"#; const END_OF_TABLE: &str = r#" @@ -202,7 +199,10 @@ const BYTES_PER_ROW: usize = 8; /// otherwise. trait CommentFn<'x>: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result {} -impl<'x, F> CommentFn<'x> for F where F: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result {} +impl<'x, F> CommentFn<'x> for F where + F: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result +{ +} /// Returns a `CommentFn` implementation that does nothing. fn no_comment<'x>() -> impl CommentFn<'x> { @@ -228,7 +228,10 @@ impl<'a, 'b> IonInspector<'a, 'b> { } /// Iterates over the items in `reader`, printing a table section for each top level value. - fn inspect_top_level(&mut self, reader: &mut SystemReader) -> Result<()> { + fn inspect_top_level( + &mut self, + reader: &mut SystemReader, + ) -> Result<()> { const TOP_LEVEL_DEPTH: usize = 0; self.write_table_header()?; let mut is_first_item = true; @@ -236,11 +239,17 @@ impl<'a, 'b> IonInspector<'a, 'b> { loop { // TODO: This does not account for shared symbol table imports. However, the CLI does not // yet support specifying a catalog, so it's correct enough for the moment. - let next_symbol_id = reader.symbol_table().len(); + let mut next_symbol_id = reader.symbol_table().len(); let item = reader.next_item()?; let is_last_item = matches!(item, SystemStreamItem::EndOfStream(_)); - match self.select_action(TOP_LEVEL_DEPTH, &mut has_printed_skip_message, &item.raw_stream_item(), "stream items", "ending")? { + match self.select_action( + TOP_LEVEL_DEPTH, + &mut has_printed_skip_message, + &item.raw_stream_item(), + "stream items", + "ending", + )? { InspectorAction::Skip => continue, InspectorAction::Inspect => {} InspectorAction::LimitReached => break, @@ -253,6 +262,13 @@ impl<'a, 'b> IonInspector<'a, 'b> { match item { SystemStreamItem::SymbolTable(lazy_struct) => { + let is_append = lazy_struct.get("imports")? + == Some(ValueRef::Symbol(SymbolRef::with_text( + "$ion_symbol_taqgble", + ))); + if !is_append { + next_symbol_id = 10; // First available SID after system symbols in Ion 1.0 + } self.inspect_symbol_table(next_symbol_id, lazy_struct)?; } SystemStreamItem::Value(lazy_value) => { @@ -266,7 +282,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { } // `SystemStreamItem` is marked `#[non_exhaustive]`, so this branch is needed. // The arms above cover all of the existing variants at the time of writing. - _ => unimplemented!("a new SystemStreamItem variant was added") + _ => unimplemented!("a new SystemStreamItem variant was added"), } // Notice that we wait until _after_ the item has been inspected above to set the @@ -297,7 +313,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { Some(item) => item.range().end <= self.bytes_to_skip, // If this item came from a macro, there's no corresponding input literal. If we // haven't finished skipping input literals, we'll skip this ephemeral value. - None => !self.skip_complete + None => !self.skip_complete, } } @@ -308,13 +324,20 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// inspected; if the e-expression that produced the value was not beyond the limit, /// none of the ephemeral values it produces are either. fn is_past_limit(&mut self, maybe_item: &Option) -> bool { - let limit = self.bytes_to_skip + self.limit_bytes; - maybe_item.as_ref().map(|item| item.range().start >= limit).unwrap_or(false) + let limit = self.bytes_to_skip.saturating_add(self.limit_bytes); + maybe_item + .as_ref() + .map(|item| item.range().start >= limit) + .unwrap_or(false) } /// Convenience method to set the output stream to the specified color/style for the duration of `write_fn` /// and then reset it upon completion. - fn with_style(&mut self, style: ColorSpec, write_fn: impl FnOnce(&mut OutputRef) -> Result<()>) -> Result<()> { + fn with_style( + &mut self, + style: ColorSpec, + write_fn: impl FnOnce(&mut OutputRef) -> Result<()>, + ) -> Result<()> { self.output.set_color(&style)?; write_fn(&mut self.output)?; self.output.reset()?; @@ -339,8 +362,18 @@ impl<'a, 'b> IonInspector<'a, 'b> { fn inspect_ivm(&mut self, marker: LazyRawAnyVersionMarker<'_>) -> Result<()> { const BINARY_IVM_LENGTH: usize = 4; self.newline()?; - let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![IonBytes::new(BytesKind::VersionMarker, marker.span().bytes())]); - self.write_offset_length_and_bytes(marker.range().start, BINARY_IVM_LENGTH, &mut formatter)?; + let mut formatter = BytesFormatter::new( + BYTES_PER_ROW, + vec![IonBytes::new( + BytesKind::VersionMarker, + marker.span().bytes(), + )], + ); + self.write_offset_length_and_bytes( + marker.range().start, + BINARY_IVM_LENGTH, + &mut formatter, + )?; self.with_style(BytesKind::VersionMarker.style(), |out| { let (major, minor) = marker.version(); write!(out, "$ion_{major}_{minor}")?; @@ -356,7 +389,13 @@ impl<'a, 'b> IonInspector<'a, 'b> { } /// Inspects all values (however deeply nested) starting at the current level. - fn inspect_value<'x>(&mut self, depth: usize, delimiter: &str, value: LazyValue<'x, AnyEncoding>, comment_fn: impl CommentFn<'x>) -> Result<()> { + fn inspect_value<'x>( + &mut self, + depth: usize, + delimiter: &str, + value: LazyValue<'x, AnyEncoding>, + comment_fn: impl CommentFn<'x>, + ) -> Result<()> { use ValueRef::*; self.newline()?; if value.has_annotations() { @@ -373,14 +412,24 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// Inspects the scalar `value`. If this value appears in a list or struct, the caller can set /// `delimiter` to a comma (`","`) and it will be appended to the value's text representation. - fn inspect_scalar<'x>(&mut self, depth: usize, delimiter: &str, value: LazyValue<'x, AnyEncoding>, comment_fn: impl CommentFn<'x>) -> Result<()> { + fn inspect_scalar<'x>( + &mut self, + depth: usize, + delimiter: &str, + value: LazyValue<'x, AnyEncoding>, + comment_fn: impl CommentFn<'x>, + ) -> Result<()> { use ExpandedValueSource::*; let value_literal = match value.expanded().source() { ValueLiteral(value_literal) => value_literal, // In Ion 1.0, there are no template values or constructed values so we can defer // implementing these. - Template(_, _) => { todo!("Ion 1.1 template values") } - Constructed(_, _) => { todo!("Ion 1.1 constructed values") } + Template(_, _) => { + todo!("Ion 1.1 template values") + } + Constructed(_, _) => { + todo!("Ion 1.1 constructed values") + } }; use LazyRawValueKind::*; @@ -390,18 +439,23 @@ impl<'a, 'b> IonInspector<'a, 'b> { self.inspect_binary_1_0_scalar(depth, delimiter, value, bin_val, comment_fn) } Binary_1_1(_) => todo!("Binary Ion 1.1 scalars"), - Text_1_0(_) | Text_1_1(_) => unreachable!("text value") + Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), } } /// Inspects the s-expression `sexp`, including all of its child values. If this sexp appears /// in a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended /// to the sexp's text representation. - fn inspect_sexp<'x>(&mut self, depth: usize, delimiter: &str, sexp: LazySExp<'x, AnyEncoding>) -> Result<()> { + fn inspect_sexp<'x>( + &mut self, + depth: usize, + delimiter: &str, + sexp: LazySExp<'x, AnyEncoding>, + ) -> Result<()> { use ExpandedSExpSource::*; let raw_sexp = match sexp.expanded().source() { ValueLiteral(raw_sexp) => raw_sexp, - Template(_, _, _, _) => todo!("Ion 1.1 template SExp") + Template(_, _, _, _) => todo!("Ion 1.1 template SExp"), }; use LazyRawSExpKind::*; @@ -415,11 +469,16 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// Inspects the list `list`, including all of its child values. If this list appears inside /// a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended /// to the list's text representation. - fn inspect_list<'x>(&mut self, depth: usize, delimiter: &str, list: LazyList<'x, AnyEncoding>) -> Result<()> { + fn inspect_list<'x>( + &mut self, + depth: usize, + delimiter: &str, + list: LazyList<'x, AnyEncoding>, + ) -> Result<()> { use ExpandedListSource::*; let raw_list = match list.expanded().source() { ValueLiteral(raw_list) => raw_list, - Template(_, _, _, _) => todo!("Ion 1.1 template List") + Template(_, _, _, _) => todo!("Ion 1.1 template List"), }; use LazyRawListKind::*; @@ -433,21 +492,32 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// Inspects the struct `struct_`, including all of its fields. If this struct appears inside /// a list or struct, the caller can set `delimiter` to a comma (`","`) and it will be appended /// to the struct's text representation. - fn inspect_struct(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct<'_, AnyEncoding>) -> Result<()> { + fn inspect_struct( + &mut self, + depth: usize, + delimiter: &str, + struct_: LazyStruct<'_, AnyEncoding>, + ) -> Result<()> { let raw_struct = match struct_.expanded().source() { ExpandedStructSource::ValueLiteral(raw_struct) => raw_struct, - ExpandedStructSource::Template(_, _, _, _, _) => todo!("Ion 1.1 template Struct") + ExpandedStructSource::Template(_, _, _, _, _) => todo!("Ion 1.1 template Struct"), }; use LazyRawValueKind::*; match raw_struct.as_value().kind() { - Binary_1_0(v) => self.inspect_binary_1_0_struct(depth, delimiter, struct_, raw_struct, v), + Binary_1_0(v) => { + self.inspect_binary_1_0_struct(depth, delimiter, struct_, raw_struct, v) + } Binary_1_1(_) => todo!("Binary Ion 1.1 Struct"), Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), } } - fn inspect_symbol_table(&mut self, next_symbol_id: usize, struct_: LazyStruct<'_, AnyEncoding>) -> Result<()> { + fn inspect_symbol_table( + &mut self, + next_symbol_id: usize, + struct_: LazyStruct<'_, AnyEncoding>, + ) -> Result<()> { let value = struct_.as_value(); if value.has_annotations() { self.newline()?; @@ -455,12 +525,14 @@ impl<'a, 'b> IonInspector<'a, 'b> { } let raw_struct = match struct_.expanded().source() { ExpandedStructSource::ValueLiteral(raw_struct) => raw_struct, - ExpandedStructSource::Template(_, _, _, _, _) => todo!("Ion 1.1 template symbol table") + ExpandedStructSource::Template(_, _, _, _, _) => todo!("Ion 1.1 template symbol table"), }; use LazyRawValueKind::*; match raw_struct.as_value().kind() { - Binary_1_0(v) => self.inspect_binary_1_0_symbol_table(next_symbol_id, struct_, raw_struct, v), + Binary_1_0(v) => { + self.inspect_binary_1_0_symbol_table(next_symbol_id, struct_, raw_struct, v) + } Binary_1_1(_) => todo!("Binary Ion 1.1 symbol table"), Text_1_0(_) | Text_1_1(_) => unreachable!("text value"), } @@ -470,7 +542,9 @@ impl<'a, 'b> IonInspector<'a, 'b> { let raw_value = match value.expanded().source() { ExpandedValueSource::ValueLiteral(raw_value) => raw_value, ExpandedValueSource::Template(_, _) => todo!("Ion 1.1 template value annotations"), - ExpandedValueSource::Constructed(_, _) => todo!("Ion 1.1 constructed value annotations") + ExpandedValueSource::Constructed(_, _) => { + todo!("Ion 1.1 constructed value annotations") + } }; use LazyRawValueKind::*; @@ -488,24 +562,34 @@ impl<'a, 'b> IonInspector<'a, 'b> { // bytes. // This prints the container's offset, length, and header bytes, leaving the cursor positioned // at the beginning of the `Text Ion` column. - fn inspect_binary_1_0_container_header(&mut self, raw_value: v1_0::LazyRawBinaryValue) -> Result<()> { + fn inspect_binary_1_0_container_header( + &mut self, + raw_value: v1_0::LazyRawBinaryValue, + ) -> Result<()> { let encoding = raw_value.encoded_data(); let range = encoding.range(); let opcode_bytes: &[u8] = raw_value.encoded_data().opcode_span().bytes(); - let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ - IonBytes::new(BytesKind::Opcode, opcode_bytes), - IonBytes::new(BytesKind::TrailingLength, raw_value.encoded_data().trailing_length_span().bytes()), - ]); + let mut formatter = BytesFormatter::new( + BYTES_PER_ROW, + vec![ + IonBytes::new(BytesKind::Opcode, opcode_bytes), + IonBytes::new( + BytesKind::TrailingLength, + raw_value.encoded_data().trailing_length_span().bytes(), + ), + ], + ); self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter) } - fn inspect_binary_1_0_sexp<'x>(&mut self, - depth: usize, - delimiter: &str, - sexp: LazySExp<'x, AnyEncoding>, - raw_sexp: v1_0::LazyRawBinarySExp<'x>, + fn inspect_binary_1_0_sexp<'x>( + &mut self, + depth: usize, + delimiter: &str, + sexp: LazySExp<'x, AnyEncoding>, + raw_sexp: v1_0::LazyRawBinarySExp<'x>, ) -> Result<()> { self.inspect_binary_1_0_sequence( depth, @@ -520,11 +604,12 @@ impl<'a, 'b> IonInspector<'a, 'b> { ) } - fn inspect_binary_1_0_list<'x>(&mut self, - depth: usize, - delimiter: &str, - list: LazyList<'x, AnyEncoding>, - raw_list: v1_0::LazyRawBinaryList<'x>, + fn inspect_binary_1_0_list<'x>( + &mut self, + depth: usize, + delimiter: &str, + list: LazyList<'x, AnyEncoding>, + raw_list: v1_0::LazyRawBinaryList<'x>, ) -> Result<()> { self.inspect_binary_1_0_sequence( depth, @@ -539,16 +624,17 @@ impl<'a, 'b> IonInspector<'a, 'b> { ) } - fn inspect_binary_1_0_sequence<'x>(&mut self, - depth: usize, - opening_delimiter: &str, - value_delimiter: &str, - closing_delimiter: &str, - trailing_delimiter: &str, - nested_values: impl IntoIterator>>, - nested_raw_values: impl LazyRawSequence<'x, v1_0::Binary>, - raw_value: LazyRawBinaryValue, - mut value_comment_fn: impl CommentFn<'x>, + fn inspect_binary_1_0_sequence<'x>( + &mut self, + depth: usize, + opening_delimiter: &str, + value_delimiter: &str, + closing_delimiter: &str, + trailing_delimiter: &str, + nested_values: impl IntoIterator>>, + nested_raw_values: impl LazyRawSequence<'x, v1_0::Binary>, + raw_value: LazyRawBinaryValue, + mut value_comment_fn: impl CommentFn<'x>, ) -> Result<()> { self.inspect_binary_1_0_container_header(raw_value)?; self.write_indentation(depth)?; @@ -560,7 +646,13 @@ impl<'a, 'b> IonInspector<'a, 'b> { let mut has_printed_skip_message = false; for (raw_value_res, value_res) in nested_raw_values.iter().zip(nested_values) { let (raw_nested_value, nested_value) = (raw_value_res?, value_res?); - match self.select_action(depth + 1, &mut has_printed_skip_message, &Some(raw_nested_value), "values", "stepping out")? { + match self.select_action( + depth + 1, + &mut has_printed_skip_message, + &Some(raw_nested_value), + "values", + "stepping out", + )? { InspectorAction::Skip => continue, InspectorAction::Inspect => {} InspectorAction::LimitReached => break, @@ -579,12 +671,13 @@ impl<'a, 'b> IonInspector<'a, 'b> { }) } - fn select_action(&mut self, - depth: usize, - has_printed_skip_message: &mut bool, - maybe_item: &Option, - name_of_skipped_item: &str, - name_of_limit_action: &str, + fn select_action( + &mut self, + depth: usize, + has_printed_skip_message: &mut bool, + maybe_item: &Option, + name_of_skipped_item: &str, + name_of_limit_action: &str, ) -> Result { if self.should_skip(maybe_item) { if !*has_printed_skip_message { @@ -603,19 +696,27 @@ impl<'a, 'b> IonInspector<'a, 'b> { Ok(InspectorAction::Inspect) } - fn inspect_binary_1_0_field_name(&mut self, depth: usize, raw_name: LazyRawAnyFieldName, name: SymbolRef) -> Result<()> { + fn inspect_binary_1_0_field_name( + &mut self, + depth: usize, + raw_name: LazyRawAnyFieldName, + name: SymbolRef, + ) -> Result<()> { self.newline()?; let range = raw_name.range(); let raw_name_bytes = raw_name.span().bytes(); let offset = range.start; let length = range.len(); - let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ - IonBytes::new(BytesKind::FieldId, raw_name_bytes) - ]); + let mut formatter = BytesFormatter::new( + BYTES_PER_ROW, + vec![IonBytes::new(BytesKind::FieldId, raw_name_bytes)], + ); self.write_offset_length_and_bytes(offset, length, &mut formatter)?; self.write_indentation(depth)?; self.with_style(field_id_style(), |out| { - IoValueFormatter::new(out).value_formatter().format_symbol(name)?; + IoValueFormatter::new(out) + .value_formatter() + .format_symbol(name)?; Ok(()) })?; write!(self.output, ": ")?; @@ -634,7 +735,12 @@ impl<'a, 'b> IonInspector<'a, 'b> { } /// Inspects all values (however deeply nested) starting at the current level. - fn inspect_binary_1_0_field(&mut self, depth: usize, field: LazyField, raw_field: LazyRawFieldExpr) -> Result<()> { + fn inspect_binary_1_0_field( + &mut self, + depth: usize, + field: LazyField, + raw_field: LazyRawFieldExpr, + ) -> Result<()> { let (raw_name, _raw_value) = raw_field.expect_name_value()?; let name = field.name()?; @@ -643,7 +749,14 @@ impl<'a, 'b> IonInspector<'a, 'b> { Ok(()) } - fn inspect_binary_1_0_struct(&mut self, depth: usize, delimiter: &str, struct_: LazyStruct, raw_struct: LazyRawAnyStruct, raw_value: LazyRawBinaryValue) -> Result<()> { + fn inspect_binary_1_0_struct( + &mut self, + depth: usize, + delimiter: &str, + struct_: LazyStruct, + raw_struct: LazyRawAnyStruct, + raw_value: LazyRawBinaryValue, + ) -> Result<()> { self.inspect_binary_1_0_container_header(raw_value)?; self.write_indentation(depth)?; @@ -655,9 +768,17 @@ impl<'a, 'b> IonInspector<'a, 'b> { for (raw_field_result, field_result) in raw_struct.iter().zip(struct_.iter()) { let field = field_result?; let raw_field = raw_field_result?; - match self.select_action(depth + 1, &mut has_printed_skip_message, &Some(raw_field), "fields", "stepping out")? { + match self.select_action( + depth + 1, + &mut has_printed_skip_message, + &Some(raw_field), + "fields", + "stepping out", + )? { InspectorAction::Skip => continue, - InspectorAction::Inspect => self.inspect_binary_1_0_field(depth + 1, field, raw_field)?, + InspectorAction::Inspect => { + self.inspect_binary_1_0_field(depth + 1, field, raw_field)? + } InspectorAction::LimitReached => break, } } @@ -670,7 +791,13 @@ impl<'a, 'b> IonInspector<'a, 'b> { }) } - fn inspect_binary_1_0_symbol_table(&mut self, next_symbol_id: usize, struct_: LazyStruct, raw_struct: LazyRawAnyStruct, raw_value: LazyRawBinaryValue) -> Result<()> { + fn inspect_binary_1_0_symbol_table( + &mut self, + next_symbol_id: usize, + struct_: LazyStruct, + raw_struct: LazyRawAnyStruct, + raw_value: LazyRawBinaryValue, + ) -> Result<()> { // The processing for a symbol table is very similar to that of a regular struct, // but with special handling defined for the `imports` and `symbols` fields when present. // Because symbol tables are always at the top level, there is no need for indentation. @@ -686,11 +813,21 @@ impl<'a, 'b> IonInspector<'a, 'b> { let field = field_result?; let raw_field = raw_field_result?; - match self.select_action(TOP_LEVEL_DEPTH + 1, &mut has_printed_skip_message, &Some(raw_field), "fields", "stepping out")? { + match self.select_action( + TOP_LEVEL_DEPTH + 1, + &mut has_printed_skip_message, + &Some(raw_field), + "fields", + "stepping out", + )? { InspectorAction::Skip => continue, - InspectorAction::Inspect if field.name()? == "symbols" => self.inspect_lst_symbols_field(next_symbol_id, field, raw_field)?, + InspectorAction::Inspect if field.name()? == "symbols" => { + self.inspect_lst_symbols_field(next_symbol_id, field, raw_field)? + } // TODO: if field.name()? == "imports" => {} - InspectorAction::Inspect => self.inspect_binary_1_0_field(TOP_LEVEL_DEPTH + 1, field, raw_field)?, + InspectorAction::Inspect => { + self.inspect_binary_1_0_field(TOP_LEVEL_DEPTH + 1, field, raw_field)? + } InspectorAction::LimitReached => break, } } @@ -703,20 +840,24 @@ impl<'a, 'b> IonInspector<'a, 'b> { }) } - fn inspect_lst_symbols_field(&mut self, mut next_symbol_id: usize, field: LazyField, raw_field: LazyRawFieldExpr) -> Result<()> { + fn inspect_lst_symbols_field( + &mut self, + mut next_symbol_id: usize, + field: LazyField, + raw_field: LazyRawFieldExpr, + ) -> Result<()> { const SYMBOL_LIST_DEPTH: usize = 1; let (raw_name, raw_value) = raw_field.expect_name_value()?; self.inspect_binary_1_0_field_name(SYMBOL_LIST_DEPTH, raw_name, field.name()?)?; + let symbols_list = match field.value().read()? { - ValueRef::Symbol(s) if s == "$ion_symbol_table" => return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| { - out.write_all(b" // Appends new symbols")?; - Ok(true) - }), ValueRef::List(list) => list, - _ => return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| { - out.write_all(b" // Invalid, ignored")?; - Ok(true) - }), + _ => { + return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| { + out.write_all(b" // Invalid, ignored")?; + Ok(true) + }) + } }; let raw_symbols_list = raw_value.read()?.expect_list()?; @@ -738,7 +879,13 @@ impl<'a, 'b> IonInspector<'a, 'b> { let mut has_printed_skip_message = false; for (raw_value_res, value_res) in nested_raw_values.zip(nested_values) { let (raw_nested_value, nested_value) = (raw_value_res?, value_res?); - match self.select_action(SYMBOL_LIST_DEPTH + 1, &mut has_printed_skip_message, &Some(raw_nested_value), "values", "stepping out")? { + match self.select_action( + SYMBOL_LIST_DEPTH + 1, + &mut has_printed_skip_message, + &Some(raw_nested_value), + "values", + "stepping out", + )? { InspectorAction::Skip => continue, InspectorAction::Inspect => {} InspectorAction::LimitReached => break, @@ -764,20 +911,33 @@ impl<'a, 'b> IonInspector<'a, 'b> { }) } - fn inspect_binary_1_0_annotations(&mut self, depth: usize, value: LazyValue, raw_value: LazyRawBinaryValue) -> Result<()> { + fn inspect_binary_1_0_annotations( + &mut self, + depth: usize, + value: LazyValue, + raw_value: LazyRawBinaryValue, + ) -> Result<()> { let encoding = raw_value.encoded_annotations().unwrap(); let range = encoding.range(); - let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![ - IonBytes::new(BytesKind::AnnotationsHeader, encoding.header_span().bytes()), - IonBytes::new(BytesKind::AnnotationsSequence, encoding.sequence_span().bytes()), - ]); + let mut formatter = BytesFormatter::new( + BYTES_PER_ROW, + vec![ + IonBytes::new(BytesKind::AnnotationsHeader, encoding.header_span().bytes()), + IonBytes::new( + BytesKind::AnnotationsSequence, + encoding.sequence_span().bytes(), + ), + ], + ); self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter)?; self.write_indentation(depth)?; self.with_style(annotations_style(), |out| { for annotation in value.annotations() { - IoValueFormatter::new(&mut *out).value_formatter().format_symbol(annotation?)?; + IoValueFormatter::new(&mut *out) + .value_formatter() + .format_symbol(annotation?)?; write!(out, "::")?; } Ok(()) @@ -800,21 +960,34 @@ impl<'a, 'b> IonInspector<'a, 'b> { Ok(()) } - fn inspect_binary_1_0_scalar<'x>(&mut self, depth: usize, delimiter: &str, value: LazyValue<'x, AnyEncoding>, raw_value: LazyRawBinaryValue, mut comment_fn: impl CommentFn<'x>) -> Result<()> { + fn inspect_binary_1_0_scalar<'x>( + &mut self, + depth: usize, + delimiter: &str, + value: LazyValue<'x, AnyEncoding>, + raw_value: LazyRawBinaryValue, + mut comment_fn: impl CommentFn<'x>, + ) -> Result<()> { let encoding = raw_value.encoded_data(); let range = encoding.range(); let opcode_bytes = IonBytes::new(BytesKind::Opcode, encoding.opcode_span().bytes()); - let length_bytes = IonBytes::new(BytesKind::TrailingLength, encoding.trailing_length_span().bytes()); + let length_bytes = IonBytes::new( + BytesKind::TrailingLength, + encoding.trailing_length_span().bytes(), + ); // TODO: There is a bug in the `body_span()` method that causes it fail when the value is annotated. // When it's fixed, this can be: // let body_bytes = IonBytes::new(BytesKind::ValueBody, body_span); let body_len = raw_value.encoded_data().body_range().len(); let total_len = raw_value.encoded_data().range().len(); - let body_bytes = IonBytes::new(BytesKind::ValueBody, &encoding.span().bytes()[total_len - body_len..]); - + let body_bytes = IonBytes::new( + BytesKind::ValueBody, + &encoding.span().bytes()[total_len - body_len..], + ); - let mut formatter = BytesFormatter::new(BYTES_PER_ROW, vec![opcode_bytes, length_bytes, body_bytes]); + let mut formatter = + BytesFormatter::new(BYTES_PER_ROW, vec![opcode_bytes, length_bytes, body_bytes]); self.write_offset_length_and_bytes(range.start, range.len(), &mut formatter)?; self.write_indentation(depth)?; @@ -830,7 +1003,8 @@ impl<'a, 'b> IonInspector<'a, 'b> { if encoded.ends_with(&[b' ']) { let _ = encoded.pop(); } - self.output.write_all(self.text_writer.output().as_slice())?; + self.output + .write_all(self.text_writer.output().as_slice())?; self.text_writer.output_mut().clear(); self.output.write_all(delimiter.as_bytes())?; self.output.reset()?; @@ -880,7 +1054,11 @@ impl<'a, 'b> IonInspector<'a, 'b> { const INDENTATION_WITH_GUIDE: &'static str = "· "; let mut color_spec = ColorSpec::new(); - color_spec.set_dimmed(false).set_intense(true).set_bold(true).set_fg(Some(Color::Rgb(100, 100, 100))); + color_spec + .set_dimmed(false) + .set_intense(true) + .set_bold(true) + .set_fg(Some(Color::Rgb(100, 100, 100))); self.with_style(color_spec, |out| { for _ in 0..depth { out.write_all(INDENTATION_WITH_GUIDE.as_bytes())?; @@ -892,8 +1070,16 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// Prints the given `offset` and `length` in the first and second table columns, then uses the /// `formatter` to print a single row of hex-encoded bytes in the third column ("Binary Ion"). /// The `offset` and `length` are typically `usize`, but can be anything that implements `Display`. - fn write_offset_length_and_bytes(&mut self, offset: impl Display, length: impl Display, formatter: &mut BytesFormatter) -> Result<()> { - write!(self.output, "{VERTICAL_LINE} {offset:12} {VERTICAL_LINE} {length:12} {VERTICAL_LINE} ")?; + fn write_offset_length_and_bytes( + &mut self, + offset: impl Display, + length: impl Display, + formatter: &mut BytesFormatter, + ) -> Result<()> { + write!( + self.output, + "{VERTICAL_LINE} {offset:12} {VERTICAL_LINE} {length:12} {VERTICAL_LINE} " + )?; formatter.write_row(self.output)?; write!(self.output, "{VERTICAL_LINE} ")?; Ok(()) @@ -991,37 +1177,27 @@ impl BytesKind { use BytesKind::*; let mut color = ColorSpec::new(); match self { - VersionMarker => - color - .set_fg(Some(Color::Yellow)) - .set_intense(true), - FieldId => - color - .set_fg(Some(Color::Cyan)) - .set_intense(true), - Opcode => - color - .set_bold(true) - .set_fg(Some(Color::Rgb(0, 0, 0))) - .set_bg(Some(Color::Rgb(255, 255, 255))), - - TrailingLength => - color - .set_bold(true) - .set_underline(true) - .set_fg(Some(Color::White)) - .set_intense(true), - ValueBody => - color.set_bold(false) - .set_fg(Some(Color::White)) - .set_intense(false), - AnnotationsHeader => - color.set_bold(false) - .set_fg(Some(Color::Black)) - .set_bg(Some(Color::Magenta)), - AnnotationsSequence => - color.set_bold(false) - .set_fg(Some(Color::Magenta)), + VersionMarker => color.set_fg(Some(Color::Yellow)).set_intense(true), + FieldId => color.set_fg(Some(Color::Cyan)).set_intense(true), + Opcode => color + .set_bold(true) + .set_fg(Some(Color::Rgb(0, 0, 0))) + .set_bg(Some(Color::Rgb(255, 255, 255))), + + TrailingLength => color + .set_bold(true) + .set_underline(true) + .set_fg(Some(Color::White)) + .set_intense(true), + ValueBody => color + .set_bold(false) + .set_fg(Some(Color::White)) + .set_intense(false), + AnnotationsHeader => color + .set_bold(false) + .set_fg(Some(Color::Black)) + .set_bg(Some(Color::Magenta)), + AnnotationsSequence => color.set_bold(false).set_fg(Some(Color::Magenta)), }; color } @@ -1083,7 +1259,11 @@ struct BytesFormatter<'a> { impl<'a> BytesFormatter<'a> { pub fn new(formatted_bytes_per_row: usize, slices: Vec>) -> Self { - Self { slices, slices_written: 0, formatted_bytes_per_row } + Self { + slices, + slices_written: 0, + formatted_bytes_per_row, + } } /// Writes a row of `n` hex-encoded, colorized bytes, where `n` is determined by the @@ -1118,7 +1298,11 @@ impl<'a> BytesFormatter<'a> { } /// Helper method to print up to `num_bytes` bytes from the current [`IonBytes`]. - fn write_bytes_from_current_slice(&mut self, num_bytes: usize, output: &mut impl WriteColor) -> Result { + fn write_bytes_from_current_slice( + &mut self, + num_bytes: usize, + output: &mut impl WriteColor, + ) -> Result { let Some(slice) = self.current_slice() else { // No more to write return Ok(0); @@ -1138,7 +1322,11 @@ impl<'a> BytesFormatter<'a> { // Set the appropriate style for this byte slice. let style: ColorSpec = slice.style(); output.set_color(&style)?; - write!(output, "{}", hex_contents(slice.next_n_bytes(bytes_to_write)))?; + write!( + output, + "{}", + hex_contents(slice.next_n_bytes(bytes_to_write)) + )?; slice.mark_bytes_written(bytes_to_write); output.reset()?; @@ -1189,11 +1377,3 @@ fn hex_contents(source: &[u8]) -> String { } buffer } - -#[test] -fn do_it() { - let stdout = StandardStream::stdout(ColorChoice::Always); - let mut output: Box = Box::new(stdout.lock()); - let mut reader = SystemReader::new(AnyEncoding, File::open("/tmp/symbols").unwrap()); - IonInspector::new(&mut output, 0, usize::MAX).unwrap().inspect_top_level(&mut reader).unwrap() -} \ No newline at end of file diff --git a/src/bin/ion/commands/beta/mod.rs b/src/bin/ion/commands/beta/mod.rs index b623797..5d9f5c4 100644 --- a/src/bin/ion/commands/beta/mod.rs +++ b/src/bin/ion/commands/beta/mod.rs @@ -4,18 +4,18 @@ pub mod from; #[cfg(feature = "experimental-code-gen")] pub mod generate; pub mod head; +pub mod inspect; pub mod primitive; pub mod schema; pub mod symtab; pub mod to; -pub mod inspect; use crate::commands::beta::count::CountCommand; -use crate::commands::beta::inspect::InspectCommand; use crate::commands::beta::from::FromNamespace; #[cfg(feature = "experimental-code-gen")] use crate::commands::beta::generate::GenerateCommand; use crate::commands::beta::head::HeadCommand; +use crate::commands::beta::inspect::InspectCommand; use crate::commands::beta::primitive::PrimitiveCommand; use crate::commands::beta::schema::SchemaNamespace; use crate::commands::beta::symtab::SymtabNamespace; @@ -44,7 +44,7 @@ impl IonCliCommand for BetaNamespace { Box::new(ToNamespace), Box::new(SymtabNamespace), #[cfg(feature = "experimental-code-gen")] - Box::new(GenerateCommand), + Box::new(GenerateCommand), ] } } diff --git a/src/bin/ion/commands/beta/symtab/filter.rs b/src/bin/ion/commands/beta/symtab/filter.rs index 55b0765..1051b6b 100644 --- a/src/bin/ion/commands/beta/symtab/filter.rs +++ b/src/bin/ion/commands/beta/symtab/filter.rs @@ -1,10 +1,10 @@ use crate::commands::{IonCliCommand, WithIonCliArgument}; use anyhow::{bail, Context, Result}; use clap::{Arg, ArgAction, ArgMatches, Command}; +use ion_rs::*; use std::fs::File; use std::io; use std::io::{stdout, BufWriter, Write}; -use ion_rs::*; pub struct SymtabFilterCommand; @@ -94,7 +94,7 @@ pub fn filter_out_user_data( SystemStreamItem::EndOfStream(_) => { return Ok(()); } - _ => unreachable!("#[non_exhaustive] enum, current variants covered") + _ => unreachable!("#[non_exhaustive] enum, current variants covered"), }; // If this is a text encoding, then we need delimiting space to separate // IVMs from their neighboring system stream items. Consider: diff --git a/src/bin/ion/commands/beta/to/json.rs b/src/bin/ion/commands/beta/to/json.rs index e0e7cff..89f5864 100644 --- a/src/bin/ion/commands/beta/to/json.rs +++ b/src/bin/ion/commands/beta/to/json.rs @@ -1,11 +1,12 @@ use crate::commands::{IonCliCommand, WithIonCliArgument}; use anyhow::{Context, Result}; use clap::{ArgMatches, Command}; +use ion_rs::*; use serde_json::{Map, Number, Value as JsonValue}; use std::fs::File; use std::io::{stdin, stdout, BufWriter, Write}; use std::str::FromStr; -use ion_rs::*; +use zstd::zstd_safe::WriteBuf; pub struct ToJsonCommand; @@ -60,7 +61,10 @@ impl IonCliCommand for ToJsonCommand { } } -pub fn convert(reader: &mut Reader, output: &mut Box) -> Result<()> { +pub fn convert( + reader: &mut Reader, + output: &mut Box, +) -> Result<()> { const FLUSH_EVERY_N: usize = 100; let mut value_count = 0usize; while let Some(value) = reader.next()? { @@ -96,13 +100,14 @@ fn to_json_value(value: LazyValue) -> Result { ) } Timestamp(t) => JsonValue::String(t.to_string()), - Symbol(s) => s.text() + Symbol(s) => s + .text() .map(|text| JsonValue::String(text.to_owned())) .unwrap_or_else(|| JsonValue::Null), String(s) => JsonValue::String(s.text().to_owned()), Blob(b) | Clob(b) => { use base64::{engine::general_purpose as base64_encoder, Engine as _}; - let base64_text = base64_encoder::STANDARD.encode(b.as_ref()); + let base64_text = base64_encoder::STANDARD.encode(b.as_slice()); JsonValue::String(base64_text) } SExp(s) => to_json_array(s.iter())?, @@ -121,7 +126,12 @@ fn to_json_value(value: LazyValue) -> Result { Ok(value) } -fn to_json_array<'a>(ion_values: impl IntoIterator>>) -> Result { - let result: Result> = ion_values.into_iter().flat_map(|v| v.map(to_json_value)).collect(); +fn to_json_array<'a>( + ion_values: impl IntoIterator>>, +) -> Result { + let result: Result> = ion_values + .into_iter() + .flat_map(|v| v.map(to_json_value)) + .collect(); Ok(JsonValue::Array(result?)) } diff --git a/src/bin/ion/commands/dump.rs b/src/bin/ion/commands/dump.rs index 3a4d5f2..20afae7 100644 --- a/src/bin/ion/commands/dump.rs +++ b/src/bin/ion/commands/dump.rs @@ -185,8 +185,8 @@ fn auto_decompressing_reader( mut reader: R, header_len: usize, ) -> IonResult - where - R: BufRead + 'static, +where + R: BufRead + 'static, { // read header let mut header_bytes = vec![0; header_len]; diff --git a/src/bin/ion/file_writer.rs b/src/bin/ion/file_writer.rs index f002b72..d73e57f 100644 --- a/src/bin/ion/file_writer.rs +++ b/src/bin/ion/file_writer.rs @@ -1,7 +1,7 @@ -use termcolor::{ColorSpec, WriteColor}; +use std::fs::File; use std::io; use std::io::{BufWriter, Write}; -use std::fs::File; +use termcolor::{ColorSpec, WriteColor}; /// A buffered `io::Write` implementation that implements [`WriteColor`] by reporting that it does /// not support TTY escape sequences and treating all requests to change or reset the current color @@ -22,7 +22,9 @@ pub struct FileWriter { impl FileWriter { pub fn new(file: File) -> Self { - Self { inner: BufWriter::new(file) } + Self { + inner: BufWriter::new(file), + } } } diff --git a/tests/cli.rs b/tests/cli.rs index 44a5c71..e667e1f 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -2,7 +2,6 @@ use anyhow::Result; use assert_cmd::Command; use ion_rs::Element; use rstest::*; -use std::fs; use std::fs::File; use std::io::{Read, Write}; use std::time::Duration; @@ -213,14 +212,21 @@ fn test_write_all_values(#[case] number: i32, #[case] expected_output: &str) -> let command_assert = cmd.assert(); let output = command_assert.get_output(); let stdout = String::from_utf8_lossy(&output.stdout); - assert_eq!(stdout.trim_end(), expected_output); + assert_eq!( + Element::read_all(stdout.trim_end())?, + Element::read_all(expected_output)? + ); Ok(()) } #[cfg(feature = "experimental-code-gen")] -#[rstest] -#[case::simple_struct( - r#" +mod code_gen_tests { + use super::*; + use std::fs; + + #[rstest] + #[case::simple_struct( + r#" type::{ name: simple_struct, fields: { @@ -229,32 +235,32 @@ fn test_write_all_values(#[case] number: i32, #[case] expected_output: &str) -> }, } "#, - &["id: i64", "name: String"], - &["pub fn name(&self) -> &String {", "pub fn id(&self) -> &i64 {"] -)] -#[case::value_struct( - r#" + & ["id: i64", "name: String"], + & ["pub fn name(&self) -> &String {", "pub fn id(&self) -> &i64 {"] + )] + #[case::value_struct( + r#" type::{ name: value_struct, type: int // this will be a field in struct } "#, - &["value: i64"], - &["pub fn value(&self) -> &i64 {"] -)] -#[case::sequence_struct( - r#" + & ["value: i64"], + & ["pub fn value(&self) -> &i64 {"] + )] + #[case::sequence_struct( + r#" type::{ name: sequence_struct, element: string, // this will be a sequence field in struct type: list } "#, - &["value: Vec"], - &["pub fn value(&self) -> &Vec {"] -)] -#[case::struct_with_reference_field( - r#" + & ["value: Vec"], + & ["pub fn value(&self) -> &Vec {"] + )] + #[case::struct_with_reference_field( + r#" type::{ name: struct_with_reference_field, fields: { @@ -267,11 +273,11 @@ fn test_write_all_values(#[case] number: i32, #[case] expected_output: &str) -> type: int } "#, - &["reference: OtherType"], - &["pub fn reference(&self) -> &OtherType {"] -)] -#[case::struct_with_nested_type( - r#" + & ["reference: OtherType"], + & ["pub fn reference(&self) -> &OtherType {"] + )] + #[case::struct_with_nested_type( + r#" type::{ name: struct_with_nested_type, fields: { @@ -279,56 +285,55 @@ fn test_write_all_values(#[case] number: i32, #[case] expected_output: &str) -> } } "#, - &["nested_type: NestedType1"], - &["pub fn nested_type(&self) -> &NestedType1 {"] -)] -/// Calls ion-cli beta generate with different schema file. Pass the test if the return value contains the expected properties and accessors. -fn test_code_generation_in_rust( - #[case] test_schema: &str, - #[case] expected_properties: &[&str], - #[case] expected_accessors: &[&str], -) -> Result<()> { - let mut cmd = Command::cargo_bin("ion")?; - let temp_dir = TempDir::new()?; - let input_schema_path = temp_dir.path().join("test_schema.isl"); - let mut input_schema_file = File::create(&input_schema_path)?; - input_schema_file.write(test_schema.as_bytes())?; - input_schema_file.flush()?; - cmd.args([ - "beta", - "generate", - "--schema", - "test_schema.isl", - "--output", - temp_dir.path().to_str().unwrap(), - "--language", - "rust", - "--directory", - temp_dir.path().to_str().unwrap(), - ]); - let command_assert = cmd.assert(); - let output_file_path = temp_dir.path().join("ion_generated_code.rs"); - command_assert.success(); - let contents = - fs::read_to_string(output_file_path).expect("Should have been able to read the file"); - for expected_property in expected_properties { - assert!(contents.contains(expected_property)); - } - for expected_accessor in expected_accessors { - assert!(contents.contains(expected_accessor)); + & ["nested_type: NestedType1"], + & ["pub fn nested_type(&self) -> &NestedType1 {"] + )] + /// Calls ion-cli beta generate with different schema file. Pass the test if the return value contains the expected properties and accessors. + fn test_code_generation_in_rust( + #[case] test_schema: &str, + #[case] expected_properties: &[&str], + #[case] expected_accessors: &[&str], + ) -> Result<()> { + let mut cmd = Command::cargo_bin("ion")?; + let temp_dir = TempDir::new()?; + let input_schema_path = temp_dir.path().join("test_schema.isl"); + let mut input_schema_file = File::create(&input_schema_path)?; + input_schema_file.write(test_schema.as_bytes())?; + input_schema_file.flush()?; + cmd.args([ + "beta", + "generate", + "--schema", + "test_schema.isl", + "--output", + temp_dir.path().to_str().unwrap(), + "--language", + "rust", + "--directory", + temp_dir.path().to_str().unwrap(), + ]); + let command_assert = cmd.assert(); + let output_file_path = temp_dir.path().join("ion_generated_code.rs"); + command_assert.success(); + let contents = + fs::read_to_string(output_file_path).expect("Should have been able to read the file"); + for expected_property in expected_properties { + assert!(contents.contains(expected_property)); + } + for expected_accessor in expected_accessors { + assert!(contents.contains(expected_accessor)); + } + // verify that it generates read-write APIs + assert!(contents.contains("pub fn read_from(reader: &mut Reader) -> SerdeResult {")); + assert!(contents + .contains("pub fn write_to(&self, writer: &mut W) -> SerdeResult<()> {")); + Ok(()) } - // verify that it generates read-write APIs - assert!(contents.contains("pub fn read_from(reader: &mut Reader) -> SerdeResult {")); - assert!(contents - .contains("pub fn write_to(&self, writer: &mut W) -> SerdeResult<()> {")); - Ok(()) -} -#[cfg(feature = "experimental-code-gen")] -#[rstest] -#[case( - "SimpleStruct", - r#" + #[rstest] + #[case( + "SimpleStruct", + r#" type::{ name: simple_struct, fields: { @@ -337,35 +342,35 @@ fn test_code_generation_in_rust( } } "#, - &["private int id;", "private String name;"], - &["public String getName() {", "public int getId() {"] -)] -#[case( - "ValueStruct", - r#" + & ["private int id;", "private String name;"], + & ["public String getName() {", "public int getId() {"] + )] + #[case( + "ValueStruct", + r#" type::{ name: value_struct, type: int // this will be a field in struct } "#, - &["private int value;"], - &["public int getValue() {"] -)] -#[case( - "SequenceStruct", - r#" + & ["private int value;"], + & ["public int getValue() {"] + )] + #[case( + "SequenceStruct", + r#" type::{ name: sequence_struct, element: string, // this will be a sequence field in struct type: list } "#, - &["private ArrayList value;"], - &["public ArrayList getValue() {"] -)] -#[case( - "StructWithReferenceField", - r#" + & ["private ArrayList value;"], + & ["public ArrayList getValue() {"] + )] + #[case( + "StructWithReferenceField", + r#" type::{ name: struct_with_reference_field, fields: { @@ -378,12 +383,12 @@ fn test_code_generation_in_rust( type: int } "#, - &["private OtherType reference;"], - &["public OtherType getReference() {"] -)] -#[case( - "StructWithNestedType", - r#" + & ["private OtherType reference;"], + & ["public OtherType getReference() {"] + )] + #[case( + "StructWithNestedType", + r#" type::{ name: struct_with_nested_type, fields: { @@ -391,45 +396,47 @@ fn test_code_generation_in_rust( } } "#, - &["private NestedType1 nestedType;"], - &["public NestedType1 getNestedType() {"] -)] -/// Calls ion-cli beta generate with different schema file. Pass the test if the return value contains the expected properties and accessors. -fn test_code_generation_in_java( - #[case] test_name: &str, - #[case] test_schema: &str, - #[case] expected_properties: &[&str], - #[case] expected_accessors: &[&str], -) -> Result<()> { - let mut cmd = Command::cargo_bin("ion")?; - let temp_dir = TempDir::new()?; - let input_schema_path = temp_dir.path().join("test_schema.isl"); - let mut input_schema_file = File::create(&input_schema_path)?; - input_schema_file.write(test_schema.as_bytes())?; - input_schema_file.flush()?; - cmd.args([ - "beta", - "generate", - "--schema", - "test_schema.isl", - "--output", - temp_dir.path().to_str().unwrap(), - "--language", - "java", - "--namespace", - "org.example", - "--directory", - temp_dir.path().to_str().unwrap(), - ]); - let command_assert = cmd.assert(); - let output_file_path = temp_dir.path().join(format!("{}.java", test_name)); - command_assert.success(); - let contents = fs::read_to_string(output_file_path).expect("Can not read generated code file."); - for expected_property in expected_properties { - assert!(contents.contains(expected_property)); - } - for expected_accessor in expected_accessors { - assert!(contents.contains(expected_accessor)); + & ["private NestedType1 nestedType;"], + & ["public NestedType1 getNestedType() {"] + )] + /// Calls ion-cli beta generate with different schema file. Pass the test if the return value contains the expected properties and accessors. + fn test_code_generation_in_java( + #[case] test_name: &str, + #[case] test_schema: &str, + #[case] expected_properties: &[&str], + #[case] expected_accessors: &[&str], + ) -> Result<()> { + let mut cmd = Command::cargo_bin("ion")?; + let temp_dir = TempDir::new()?; + let input_schema_path = temp_dir.path().join("test_schema.isl"); + let mut input_schema_file = File::create(&input_schema_path)?; + input_schema_file.write(test_schema.as_bytes())?; + input_schema_file.flush()?; + cmd.args([ + "beta", + "generate", + "--schema", + "test_schema.isl", + "--output", + temp_dir.path().to_str().unwrap(), + "--language", + "java", + "--namespace", + "org.example", + "--directory", + temp_dir.path().to_str().unwrap(), + ]); + let command_assert = cmd.assert(); + let output_file_path = temp_dir.path().join(format!("{}.java", test_name)); + command_assert.success(); + let contents = + fs::read_to_string(output_file_path).expect("Can not read generated code file."); + for expected_property in expected_properties { + assert!(contents.contains(expected_property)); + } + for expected_accessor in expected_accessors { + assert!(contents.contains(expected_accessor)); + } + Ok(()) } - Ok(()) } diff --git a/tests/code-gen-tests.rs b/tests/code-gen-tests.rs index 3632d2e..77f9ed5 100644 --- a/tests/code-gen-tests.rs +++ b/tests/code-gen-tests.rs @@ -1,12 +1,12 @@ +#![cfg(feature = "experimental-code-gen")] + use anyhow::Result; use assert_cmd::Command; use rstest::rstest; -use std::fs; use std::fs::File; use std::io::Write; use tempfile::TempDir; -#[cfg(feature = "experimental-code-gen")] #[test] fn roundtrip_tests_for_generated_code_gradle() -> Result<()> { // run the gradle project defined under `code-gen-projects`, @@ -39,7 +39,6 @@ fn roundtrip_tests_for_generated_code_gradle() -> Result<()> { Ok(()) } -#[cfg(feature = "experimental-code-gen")] #[test] fn roundtrip_tests_for_generated_code_cargo() -> Result<()> { // run the cargo project defined under `code-gen-projects`, @@ -93,7 +92,7 @@ fn roundtrip_tests_for_generated_code_cargo() -> Result<()> { #[cfg(feature = "experimental-code-gen")] #[rstest] #[case::any_element_list( - r#" +r#" type::{ name: any_element_list, type: list, // this doesn't specify the type for elements in the list with `element` constraint From 235d3e75994c59dc29b548a28767e2ea0b7bd331 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Mon, 3 Jun 2024 12:57:05 -0400 Subject: [PATCH 10/13] Remove redundant newline after sequence start --- src/bin/ion/commands/beta/inspect.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index a55de7a..33bd085 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -263,9 +263,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { match item { SystemStreamItem::SymbolTable(lazy_struct) => { let is_append = lazy_struct.get("imports")? - == Some(ValueRef::Symbol(SymbolRef::with_text( - "$ion_symbol_taqgble", - ))); + == Some(ValueRef::Symbol(SymbolRef::with_text("$ion_symbol_table"))); if !is_append { next_symbol_id = 10; // First available SID after system symbols in Ion 1.0 } @@ -639,7 +637,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { self.inspect_binary_1_0_container_header(raw_value)?; self.write_indentation(depth)?; self.with_style(text_ion_style(), |out| { - write!(out, "{opening_delimiter}\n")?; + write!(out, "{opening_delimiter}")?; Ok(()) })?; @@ -856,7 +854,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { return self.inspect_value(SYMBOL_LIST_DEPTH, ",", field.value(), |out, _value| { out.write_all(b" // Invalid, ignored")?; Ok(true) - }) + }); } }; From a0d945fe828a24778705bd0e0fca6ebf54db162c Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Mon, 3 Jun 2024 13:40:48 -0400 Subject: [PATCH 11/13] removed old comment --- src/bin/ion/commands/beta/inspect.rs | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index 33bd085..b3a9850 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -201,8 +201,7 @@ trait CommentFn<'x>: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result impl<'x, F> CommentFn<'x> for F where F: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result -{ -} +{} /// Returns a `CommentFn` implementation that does nothing. fn no_comment<'x>() -> impl CommentFn<'x> { @@ -283,15 +282,6 @@ impl<'a, 'b> IonInspector<'a, 'b> { _ => unimplemented!("a new SystemStreamItem variant was added"), } - // Notice that we wait until _after_ the item has been inspected above to set the - // `skip_complete` flag. This is because the offset specified by `--skip-bytes` may - // have been located somewhere inside the item and the inspector needed to look for - // that point within its nested values. If this happens, the inspector will set the - // `skip_complete` flag when it reaches that offset at a deeper level of nesting. - // When it reaches this point, `skip_complete` will already be true. However, if the - // offset fell at the beginning of a top level value, the line below will set the flag - // for the first time. - self.skip_complete = true; is_first_item = false; } self.output.write_all(END_OF_TABLE.as_bytes())?; @@ -321,7 +311,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { /// * `None`, then there is no stream-level entity backing the item. These will always be /// inspected; if the e-expression that produced the value was not beyond the limit, /// none of the ephemeral values it produces are either. - fn is_past_limit(&mut self, maybe_item: &Option) -> bool { + fn is_past_limit(&self, maybe_item: &Option) -> bool { let limit = self.bytes_to_skip.saturating_add(self.limit_bytes); maybe_item .as_ref() @@ -629,7 +619,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { value_delimiter: &str, closing_delimiter: &str, trailing_delimiter: &str, - nested_values: impl IntoIterator>>, + nested_values: impl IntoIterator>>, nested_raw_values: impl LazyRawSequence<'x, v1_0::Binary>, raw_value: LazyRawBinaryValue, mut value_comment_fn: impl CommentFn<'x>, From 0bfeb246e3334ccd330d0196e9f33efdffed4346 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Mon, 3 Jun 2024 13:53:34 -0400 Subject: [PATCH 12/13] cargo fmt --- src/bin/ion/commands/beta/inspect.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bin/ion/commands/beta/inspect.rs b/src/bin/ion/commands/beta/inspect.rs index b3a9850..e96b907 100644 --- a/src/bin/ion/commands/beta/inspect.rs +++ b/src/bin/ion/commands/beta/inspect.rs @@ -201,7 +201,8 @@ trait CommentFn<'x>: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result impl<'x, F> CommentFn<'x> for F where F: FnMut(&mut OutputRef, LazyValue<'x, AnyEncoding>) -> Result -{} +{ +} /// Returns a `CommentFn` implementation that does nothing. fn no_comment<'x>() -> impl CommentFn<'x> { @@ -619,7 +620,7 @@ impl<'a, 'b> IonInspector<'a, 'b> { value_delimiter: &str, closing_delimiter: &str, trailing_delimiter: &str, - nested_values: impl IntoIterator>>, + nested_values: impl IntoIterator>>, nested_raw_values: impl LazyRawSequence<'x, v1_0::Binary>, raw_value: LazyRawBinaryValue, mut value_comment_fn: impl CommentFn<'x>, From f49c4cdec779f8d70a9032dfe7214c99fb41e41b Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Mon, 3 Jun 2024 14:00:52 -0400 Subject: [PATCH 13/13] Removed outdated comment --- src/bin/ion/commands/beta/symtab/filter.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bin/ion/commands/beta/symtab/filter.rs b/src/bin/ion/commands/beta/symtab/filter.rs index 1051b6b..1006aee 100644 --- a/src/bin/ion/commands/beta/symtab/filter.rs +++ b/src/bin/ion/commands/beta/symtab/filter.rs @@ -14,7 +14,6 @@ impl IonCliCommand for SymtabFilterCommand { } fn about(&self) -> &'static str { - // XXX Currently only supports binary input "Filters user data out of an Ion stream, leaving only the symbol table(s) behind." }