Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version bump to 1.0.0-rc.6, improvements to IonEncoding #785

Merged
merged 8 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ exclude = [
"**/ion-tests/iontestdata/**",
"*.pdf"
]
version = "1.0.0-rc.5"
version = "1.0.0-rc.6"
edition = "2021"
# We need at least 1.65 for GATs[1] and 1.67 for `ilog`[2]
# [1] https://blog.rust-lang.org/2022/11/03/Rust-1.65.0.html
Expand Down
54 changes: 52 additions & 2 deletions src/lazy/any_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ use crate::lazy::text::value::{
LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker_1_0,
LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator,
};
use crate::{IonResult, IonType, RawSymbolRef};
use crate::{Encoding, IonResult, IonType, RawSymbolRef};
use bumpalo::Bump as BumpAllocator;

/// An implementation of the `LazyDecoder` trait that can read any encoding of Ion.
Expand Down Expand Up @@ -89,6 +89,18 @@ pub enum LazyRawAnyVersionMarkerKind<'top> {
Binary_1_1(LazyRawBinaryVersionMarker_1_1<'top>),
}

impl<'top> LazyRawAnyVersionMarker<'top> {
pub fn encoding(&self) -> IonEncoding {
use crate::lazy::any_encoding::LazyRawAnyVersionMarkerKind::*;
match self.encoding {
Text_1_0(_) => TextEncoding_1_0.encoding(),
Binary_1_0(_) => BinaryEncoding_1_0.encoding(),
Text_1_1(_) => TextEncoding_1_1.encoding(),
Binary_1_1(_) => BinaryEncoding_1_1.encoding(),
}
}
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ I plumbed the IonEncoding from the reader up through all of the raw stream item types (version marker, value, e-expression, end of stream), making it easy to check the encoding of any item during iteration.


impl<'top> HasSpan<'top> for LazyRawAnyVersionMarker<'top> {
fn span(&self) -> Span<'top> {
use LazyRawAnyVersionMarkerKind::*;
Expand Down Expand Up @@ -165,6 +177,16 @@ pub enum LazyRawAnyEExpressionKind<'top> {
Binary_1_1(Never), // TODO: RawBinaryEExpression_1_1
}

impl<'top> LazyRawAnyEExpression<'top> {
pub fn encoding(&self) -> IonEncoding {
use LazyRawAnyEExpressionKind::*;
match self.encoding {
Text_1_1(_) => TextEncoding_1_1.encoding(),
Binary_1_1(_) => BinaryEncoding_1_1.encoding(),
}
}
}

impl<'top> From<RawTextEExpression_1_1<'top>> for LazyRawAnyEExpression<'top> {
fn from(text_invocation: RawTextEExpression_1_1<'top>) -> Self {
LazyRawAnyEExpression {
Expand Down Expand Up @@ -278,7 +300,7 @@ pub enum RawReaderKind<'data> {
Binary_1_1(LazyRawBinaryReader_1_1<'data>),
}

#[derive(Default, Copy, Clone)]
#[derive(Default, Debug, Copy, Clone)]
#[non_exhaustive]
pub enum IonEncoding {
// In the absence of a binary IVM, readers must assume Ion 1.0 text data until a
Expand All @@ -300,6 +322,24 @@ impl IonEncoding {
use IonEncoding::*;
matches!(*self, Binary_1_0 | Binary_1_1)
}

pub fn name(&self) -> &str {
use IonEncoding::*;
match self {
Text_1_0 => TextEncoding_1_0::name(),
Binary_1_0 => BinaryEncoding_1_0::name(),
Text_1_1 => TextEncoding_1_1::name(),
Binary_1_1 => BinaryEncoding_1_1::name(),
}
}

pub fn version(&self) -> (u8, u8) {
use IonEncoding::*;
match self {
Text_1_0 | Binary_1_0 => (1, 0),
Text_1_1 | Binary_1_1 => (1, 1),
}
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ These methods make it easier for programs like ion inspect to check what encoding they're working with and construct error messages.

}

impl<'data> From<LazyRawTextReader_1_0<'data>> for LazyRawAnyReader<'data> {
Expand Down Expand Up @@ -421,6 +461,16 @@ impl<'top> LazyRawAnyValue<'top> {
pub fn kind(&self) -> LazyRawValueKind<'top> {
self.encoding
}

pub fn encoding(&self) -> IonEncoding {
use LazyRawValueKind::*;
match &self.encoding {
Text_1_0(_) => TextEncoding_1_0.encoding(),
Binary_1_0(_) => BinaryEncoding_1_0.encoding(),
Text_1_1(_) => TextEncoding_1_1.encoding(),
Binary_1_1(_) => BinaryEncoding_1_1.encoding(),
}
}
}

#[derive(Debug, Copy, Clone)]
Expand Down
8 changes: 4 additions & 4 deletions src/lazy/binary/raw/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::lazy::decoder::{Decoder, HasRange, LazyRawFieldExpr, LazyRawReader, R
use crate::lazy::encoding::BinaryEncoding_1_0;
use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem};
use crate::result::IonFailure;
use crate::IonResult;
use crate::{Encoding, IonResult};

use crate::lazy::any_encoding::IonEncoding;
use bumpalo::Bump as BumpAllocator;
Expand Down Expand Up @@ -66,7 +66,7 @@ impl<'data> LazyRawBinaryReader_1_0<'data> {
Some(lazy_value) => lazy_value,
None => {
return Ok(LazyRawStreamItem::<BinaryEncoding_1_0>::EndOfStream(
EndPosition::new(self.position()),
EndPosition::new(BinaryEncoding_1_0.encoding(), self.position()),
))
}
};
Expand All @@ -83,7 +83,7 @@ impl<'data> LazyRawBinaryReader_1_0<'data> {
let mut buffer = self.data.advance_to_next_item()?;
if buffer.is_empty() {
return Ok(LazyRawStreamItem::<BinaryEncoding_1_0>::EndOfStream(
EndPosition::new(self.position()),
EndPosition::new(BinaryEncoding_1_0.encoding(), self.position()),
));
}
// Peek at the first byte in the new buffer view
Expand All @@ -94,7 +94,7 @@ impl<'data> LazyRawBinaryReader_1_0<'data> {
(_, buffer) = buffer.consume_nop_padding(type_descriptor)?;
if buffer.is_empty() {
return Ok(LazyRawStreamItem::<BinaryEncoding_1_0>::EndOfStream(
EndPosition::new(buffer.offset()),
EndPosition::new(BinaryEncoding_1_0.encoding(), buffer.offset()),
));
}
type_descriptor = buffer.peek_type_descriptor()?;
Expand Down
8 changes: 4 additions & 4 deletions src/lazy/binary/raw/v1_1/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::lazy::encoder::private::Sealed;
use crate::lazy::encoding::BinaryEncoding_1_1;
use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem};
use crate::result::IonFailure;
use crate::IonResult;
use crate::{Encoding, IonResult};

use crate::lazy::any_encoding::IonEncoding;
use bumpalo::Bump as BumpAllocator;
Expand Down Expand Up @@ -62,7 +62,7 @@ impl<'data> LazyRawBinaryReader_1_1<'data> {
Some(lazy_value) => lazy_value,
None => {
return Ok(LazyRawStreamItem::<BinaryEncoding_1_1>::EndOfStream(
EndPosition::new(self.position()),
EndPosition::new(BinaryEncoding_1_1.encoding(), self.position()),
))
}
};
Expand Down Expand Up @@ -93,7 +93,7 @@ impl<'data> LazyRawBinaryReader_1_1<'data> {
let mut buffer = self.advance_to_next_item()?;
if buffer.is_empty() {
return Ok(LazyRawStreamItem::<BinaryEncoding_1_1>::EndOfStream(
EndPosition::new(buffer.offset()),
EndPosition::new(BinaryEncoding_1_1.encoding(), buffer.offset()),
));
}

Expand All @@ -102,7 +102,7 @@ impl<'data> LazyRawBinaryReader_1_1<'data> {
(_, buffer) = buffer.consume_nop_padding(type_descriptor)?;
if buffer.is_empty() {
return Ok(LazyRawStreamItem::<BinaryEncoding_1_1>::EndOfStream(
EndPosition::new(buffer.offset()),
EndPosition::new(BinaryEncoding_1_1.encoding(), buffer.offset()),
));
}
}
Expand Down
22 changes: 19 additions & 3 deletions src/lazy/raw_stream_item.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::lazy::decoder::{Decoder, HasRange, HasSpan};
use crate::lazy::span::Span;
use crate::result::IonFailure;
use crate::{IonError, IonResult};
use crate::{AnyEncoding, IonEncoding, IonError, IonResult};
use std::fmt::Debug;
use std::ops::Range;

Expand All @@ -27,6 +27,17 @@ pub type LazyRawStreamItem<'top, D> = RawStreamItem<
<D as Decoder>::EExp<'top>,
>;

impl<'top> LazyRawStreamItem<'top, AnyEncoding> {
pub fn encoding(&self) -> IonEncoding {
match self {
LazyRawStreamItem::<AnyEncoding>::VersionMarker(m) => m.encoding(),
LazyRawStreamItem::<AnyEncoding>::Value(v) => v.encoding(),
LazyRawStreamItem::<AnyEncoding>::EExpression(e) => e.encoding(),
LazyRawStreamItem::<AnyEncoding>::EndOfStream(eos) => eos.encoding(),
}
}
}

impl<M: Debug + HasRange, V: Debug + HasRange, E: Debug + HasRange> HasRange
for RawStreamItem<M, V, E>
{
Expand Down Expand Up @@ -116,12 +127,17 @@ impl<M: Copy + Debug, V: Copy + Debug, E: Copy + Debug> RawStreamItem<M, V, E> {
/// an `EndOfStream(EndPosition)` variant) to also implement them.
#[derive(Debug, Copy, Clone)]
pub struct EndPosition {
encoding: IonEncoding,
position: usize,
}

impl EndPosition {
pub(crate) fn new(position: usize) -> Self {
Self { position }
pub(crate) fn new(encoding: IonEncoding, position: usize) -> Self {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ While it seems a bit silly for the end of the stream to have an 'encoding' despite being a zero-length entity, this enables the LazyRawStreamItem<AnyEncoding> to always report its encoding.

Self { encoding, position }
}

pub fn encoding(&self) -> IonEncoding {
self.encoding
}
}

Expand Down
7 changes: 3 additions & 4 deletions src/lazy/system_stream_item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,16 @@ impl<'top, D: Decoder> SystemStreamItem<'top, D> {
}
}

/// Like [`Self::symbol_table`], but returns a [`IonError::Decoding`] if this item is not
/// a symbol table.
pub fn symbol_table(self) -> Option<LazyStruct<'top, D>> {
/// If this item is a symbol table, returns `Some(lazy_struct)`. Otherwise, returns `None`.
pub fn as_symbol_table(self) -> Option<LazyStruct<'top, D>> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ I renamed this method for consistency; other types use as_something/expect_something for their Option<T>/IonResult<T> pairs.

if let Self::SymbolTable(struct_) = self {
Some(struct_)
} else {
None
}
}

/// Like [`Self::symbol_table`], but returns a [`IonError::Decoding`] if this item is not
/// Like [`Self::as_symbol_table`], but returns a [`IonError::Decoding`] if this item is not
/// a symbol table.
pub fn expect_symbol_table(self) -> IonResult<LazyStruct<'top, D>> {
if let Self::SymbolTable(value) = self {
Expand Down
12 changes: 9 additions & 3 deletions src/lazy/text/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ use crate::lazy::text::value::{
LazyRawTextValue, LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker,
};
use crate::result::DecodingError;
use crate::{IonError, IonResult, IonType, TimestampPrecision};
use crate::{Encoding, IonError, IonResult, IonType, TimestampPrecision};

impl<'a> Debug for TextBufferView<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
Expand Down Expand Up @@ -558,7 +558,10 @@ impl<'top> TextBufferView<'top> {
if input_after_ws.is_empty() {
return Ok((
input_after_ws,
RawStreamItem::EndOfStream(EndPosition::new(input_after_ws.offset())),
RawStreamItem::EndOfStream(EndPosition::new(
TextEncoding_1_0.encoding(),
input_after_ws.offset(),
)),
));
}
// Otherwise, the next item must be an IVM or a value.
Expand All @@ -581,7 +584,10 @@ impl<'top> TextBufferView<'top> {
if input_after_ws.is_empty() {
return Ok((
input_after_ws,
RawStreamItem::EndOfStream(EndPosition::new(input_after_ws.offset())),
RawStreamItem::EndOfStream(EndPosition::new(
TextEncoding_1_1.encoding(),
input_after_ws.offset(),
)),
));
}
// Otherwise, the next item must be an IVM or a value.
Expand Down
3 changes: 2 additions & 1 deletion src/lazy/text/raw/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem
use crate::lazy::text::buffer::TextBufferView;
use crate::lazy::text::parse_result::AddContext;
use crate::result::IonFailure;
use crate::IonResult;
use crate::{Encoding, IonResult};

/// A text Ion 1.0 reader that yields [`LazyRawStreamItem`]s representing the top level values found
/// in the provided input stream.
Expand Down Expand Up @@ -59,6 +59,7 @@ impl<'data> LazyRawTextReader_1_0<'data> {
.with_context("reading whitespace/comments at the top level", input)?;
if buffer_after_whitespace.is_empty() {
return Ok(RawStreamItem::EndOfStream(EndPosition::new(
TextEncoding_1_0.encoding(),
buffer_after_whitespace.offset(),
)));
}
Expand Down
3 changes: 2 additions & 1 deletion src/lazy/text/raw/v1_1/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::lazy::text::matched::{MatchedFieldName, MatchedValue};
use crate::lazy::text::parse_result::{AddContext, ToIteratorOutput};
use crate::lazy::text::value::{LazyRawTextValue_1_1, RawTextAnnotationsIterator};
use crate::result::IonFailure;
use crate::{IonResult, IonType, RawSymbolRef};
use crate::{Encoding, IonResult, IonType, RawSymbolRef};

pub struct LazyRawTextReader_1_1<'data> {
input: &'data [u8],
Expand Down Expand Up @@ -168,6 +168,7 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_1> for LazyRawTextReader_1_1<'da
.with_context("reading v1.1 whitespace/comments at the top level", input)?;
if buffer_after_whitespace.is_empty() {
return Ok(RawStreamItem::EndOfStream(EndPosition::new(
TextEncoding_1_1.encoding(),
buffer_after_whitespace.offset(),
)));
}
Expand Down
10 changes: 9 additions & 1 deletion src/lazy/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,17 @@ impl<'top, D: Decoder> LazyValue<'top, D> {
LazyValue { expanded_value }
}

fn symbol_table(&'top self) -> &'top SymbolTable {
#[cfg(feature = "experimental-tooling-apis")]
pub fn symbol_table(&self) -> &SymbolTable {
self.expanded_value.context.symbol_table
}

// When the `experimental-tooling-apis` feature is disabled, this method is `pub(crate)`
#[cfg(not(feature = "experimental-tooling-apis"))]
pub fn symbol_table(&self) -> &SymbolTable {
zslayton marked this conversation as resolved.
Show resolved Hide resolved
self.expanded_value.context.symbol_table
}

/// Returns the [`IonType`] of this value.
/// ```
///# use ion_rs::IonResult;
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ macro_rules! v1_x_reader_writer {
lazy::r#struct::{LazyStruct, LazyField},
lazy::sequence::{LazyList, LazySExp},
lazy::encoder::value_writer::{ValueWriter, StructWriter, SequenceWriter, EExpWriter},
lazy::any_encoding::IonEncoding,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ This re-export is only visible when the experimental-reader-writer feature is enabled.

};
};
}
Expand Down
10 changes: 9 additions & 1 deletion src/symbol_ref.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::raw_symbol_ref::{AsRawSymbolRef, RawSymbolRef};
use crate::{Str, Symbol};
use crate::result::IonFailure;
use crate::{IonResult, Str, Symbol};
use std::borrow::Borrow;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
Expand Down Expand Up @@ -39,6 +40,13 @@ impl<'a> SymbolRef<'a> {
Some(text) => Symbol::owned(Str::from(text)),
}
}

pub fn expect_text(&self) -> IonResult<&str> {
match self.text() {
Some(text) => Ok(text),
None => IonResult::decoding_error("symbol has unknown text"),
}
}
}

impl<'a, A> PartialEq<A> for SymbolRef<'a>
Expand Down
2 changes: 1 addition & 1 deletion src/types/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl Symbol {
self.text.text()
}

pub fn text_or_error(&self) -> IonResult<&str> {
pub fn expect_text(&self) -> IonResult<&str> {
match self.text() {
Some(text) => Ok(text),
None => IonResult::decoding_error("symbol has unknown text"),
Expand Down
Loading