Skip to content

Commit

Permalink
Document new format description parser
Browse files Browse the repository at this point in the history
  • Loading branch information
jhpratt committed Oct 31, 2022
1 parent 3bff301 commit 88a6d96
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 1 deletion.
35 changes: 35 additions & 0 deletions time/src/format_description/parse/ast.rs
Original file line number Diff line number Diff line change
@@ -1,57 +1,91 @@
//! AST for parsing format descriptions.
use alloc::string::String;
use alloc::vec::Vec;
use core::iter;
use core::iter::Peekable;

use super::{lexer, Error, Location, Span};

/// One part of a complete format description.
#[allow(variant_size_differences)]
pub(super) enum Item<'a> {
/// A literal string, formatted and parsed as-is.
Literal {
/// The string itself.
value: &'a [u8],
/// Where the string originates from in the format string.
_span: Span,
},
/// A sequence of brackets. The first acts as the escape character.
EscapedBracket {
/// The first bracket.
_first: Location,
/// The second bracket.
_second: Location,
},
/// Part of a type, along with its modifiers.
Component {
/// Where the opening bracket was in the format string.
_opening_bracket: Location,
/// Whitespace between the opening bracket and name.
_leading_whitespace: Option<Whitespace<'a>>,
/// The name of the component.
name: Name<'a>,
/// The modifiers for the component.
modifiers: Vec<Modifier<'a>>,
/// Whitespace between the modifiers and closing bracket.
_trailing_whitespace: Option<Whitespace<'a>>,
/// Where the closing bracket was in the format string.
_closing_bracket: Location,
},
}

/// Whitespace within a component.
pub(super) struct Whitespace<'a> {
/// The whitespace itself.
pub(super) _value: &'a [u8],
/// Where the whitespace was in the format string.
pub(super) span: Span,
}

/// The name of a component.
pub(super) struct Name<'a> {
/// The name itself.
pub(super) value: &'a [u8],
/// Where the name was in the format string.
pub(super) span: Span,
}

/// A modifier for a component.
pub(super) struct Modifier<'a> {
/// Whitespace preceding the modifier.
pub(super) _leading_whitespace: Whitespace<'a>,
/// The key of the modifier.
pub(super) key: Key<'a>,
/// Where the colon of the modifier was in the format string.
pub(super) _colon: Location,
/// The value of the modifier.
pub(super) value: Value<'a>,
}

/// The key of a modifier.
pub(super) struct Key<'a> {
/// The key itself.
pub(super) value: &'a [u8],
/// Where the key was in the format string.
pub(super) span: Span,
}

/// The value of a modifier.
pub(super) struct Value<'a> {
/// The value itself.
pub(super) value: &'a [u8],
/// Where the value was in the format string.
pub(super) span: Span,
}

/// Parse the provided tokens into an AST.
pub(super) fn parse<'a>(
tokens: impl Iterator<Item = lexer::Token<'a>>,
) -> impl Iterator<Item = Result<Item<'a>, Error>> {
Expand Down Expand Up @@ -97,6 +131,7 @@ pub(super) fn parse<'a>(
})
}

/// Parse a component. This assumes that the opening bracket has already been consumed.
fn parse_component<'a>(
opening_bracket: Location,
tokens: &mut Peekable<impl Iterator<Item = lexer::Token<'a>>>,
Expand Down
31 changes: 31 additions & 0 deletions time/src/format_description/parse/format_item.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,27 @@
//! Typed, validated representation of a parsed format description.
use alloc::string::String;

use super::{ast, Error};

/// Parse an AST iterator into a sequence of format items.
pub(super) fn parse<'a>(
ast_items: impl Iterator<Item = Result<ast::Item<'a>, Error>>,
) -> impl Iterator<Item = Result<Item<'a>, Error>> {
ast_items.map(|ast_item| ast_item.and_then(Item::from_ast))
}

/// A description of how to format and parse one part of a type.
#[allow(variant_size_differences)]
pub(super) enum Item<'a> {
/// A literal string.
Literal(&'a [u8]),
/// Part of a type, along with its modifiers.
Component(Component),
}

impl Item<'_> {
/// Parse an AST item into a format item.
pub(super) fn from_ast(ast_item: ast::Item<'_>) -> Result<Item<'_>, Error> {
Ok(match ast_item {
ast::Item::Component {
Expand Down Expand Up @@ -43,6 +50,7 @@ impl<'a> From<Item<'a>> for crate::format_description::FormatItem<'a> {
}
}

/// Declare the `Component` struct.
macro_rules! component_definition {
($vis:vis enum $name:ident {
$($variant:ident = $parse_variant:literal {
Expand All @@ -59,6 +67,7 @@ macro_rules! component_definition {
})*

$(impl $variant {
/// Parse the component from the AST, given its modifiers.
fn with_modifiers(modifiers: &[ast::Modifier<'_>]) -> Result<Self, Error> {
let mut this = Self {
$($field: None),*
Expand Down Expand Up @@ -96,6 +105,7 @@ macro_rules! component_definition {
}
}

/// Parse a component from the AST, given its name and modifiers.
fn component_from_ast(
name: &ast::Name<'_>,
modifiers: &[ast::Modifier<'_>],
Expand Down Expand Up @@ -173,6 +183,7 @@ component_definition! {
}
}

/// Get the target type for a given enum.
macro_rules! target_ty {
($name:ident $type:ty) => {
$type
Expand All @@ -182,6 +193,7 @@ macro_rules! target_ty {
};
}

/// Get the target value for a given enum.
macro_rules! target_value {
($name:ident $variant:ident $value:expr) => {
$value
Expand All @@ -192,6 +204,7 @@ macro_rules! target_value {
}

// TODO use `#[derive(Default)]` on enums once MSRV is 1.62 (NET 2022-12-30)
/// Simulate `#[derive(Default)]` on enums.
macro_rules! derived_default_on_enum {
($type:ty; $default:expr) => {};
($attr:meta $type:ty; $default:expr) => {
Expand All @@ -203,6 +216,23 @@ macro_rules! derived_default_on_enum {
};
}

/// Declare the various modifiers.
///
/// For the general case, ordinary syntax can be used. Note that you _must_ declare a default
/// variant. The only significant change is that the string representation of the variant must be
/// provided after the variant name. For example, `Numerical = b"numerical"` declares a variant
/// named `Numerical` with the string representation `b"numerical"`. This is the value that will be
/// used when parsing the modifier. The value is not case sensitive.
///
/// If the type in the public API does not have the same name as the type in the internal
/// representation, then the former must be specified in parenthesis after the internal name. For
/// example, `HourBase(bool)` has an internal name "HourBase", but is represented as a boolean in
/// the public API.
///
/// By default, the internal variant name is assumed to be the same as the public variant name. If
/// this is not the case, the qualified path to the variant must be specified in parenthesis after
/// the internal variant name. For example, `Twelve(true)` has an internal variant name "Twelve",
/// but is represented as `true` in the public API.
macro_rules! modifier {
($(
enum $name:ident $(($target_ty:ty))? {
Expand All @@ -221,6 +251,7 @@ macro_rules! modifier {
})*

impl $name {
/// Parse the modifier from its string representation.
fn from_modifier_value(value: &ast::Value<'_>) -> Result<Option<Self>, Error> {
$(if value.value.eq_ignore_ascii_case($parse_variant) {
return Ok(Some(Self::$variant));
Expand Down
21 changes: 21 additions & 0 deletions time/src/format_description/parse/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,53 @@
//! Lexer for parsing format descriptions.
use core::iter;

use super::{Location, Span};

/// A token emitted by the lexer. There is no semantic meaning at this stage.
pub(super) enum Token<'a> {
/// A literal string, formatted and parsed as-is.
Literal {
/// The string itself.
value: &'a [u8],
/// Where the string was in the format string.
span: Span,
},
/// An opening or closing bracket. May or may not be the start or end of a component.
Bracket {
/// Whether the bracket is opening or closing.
kind: BracketKind,
/// Where the bracket was in the format string.
location: Location,
},
/// One part of a component. This could be its name, a modifier, or whitespace.
ComponentPart {
/// Whether the part is whitespace or not.
kind: ComponentKind,
/// The part itself.
value: &'a [u8],
/// Where the part was in the format string.
span: Span,
},
}

/// What type of bracket is present.
pub(super) enum BracketKind {
/// An opening bracket: `[`
Opening,
/// A closing bracket: `]`
Closing,
}

/// Indicates whether the component is whitespace or not.
pub(super) enum ComponentKind {
#[allow(clippy::missing_docs_in_private_items)]
Whitespace,
#[allow(clippy::missing_docs_in_private_items)]
NotWhitespace,
}

/// Attach [`Location`] information to each byte in the iterator.
fn attach_location(iter: impl Iterator<Item = u8>) -> impl Iterator<Item = (u8, Location)> {
let mut line = 1;
let mut column = 1;
Expand All @@ -51,6 +71,7 @@ fn attach_location(iter: impl Iterator<Item = u8>) -> impl Iterator<Item = (u8,
})
}

/// Parse the string into a series of [`Token`]s.
pub(super) fn lex(mut input: &[u8]) -> impl Iterator<Item = Token<'_>> {
let mut depth: u8 = 0;
let mut iter = attach_location(input.iter().copied()).peekable();
Expand Down
31 changes: 30 additions & 1 deletion time/src/format_description/parse/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#![allow(clippy::missing_docs_in_private_items)]
//! Parser for format descriptions.
use alloc::vec::Vec;
use core::ops::{RangeFrom, RangeTo};
Expand All @@ -7,6 +7,10 @@ mod ast;
mod format_item;
mod lexer;

/// Parse a sequence of items from the format description.
///
/// The syntax for the format description can be found in [the
/// book](https://time-rs.github.io/book/api/format-description.html).
pub fn parse(
s: &str,
) -> Result<Vec<crate::format_description::FormatItem<'_>>, crate::error::InvalidFormatDescription>
Expand All @@ -19,14 +23,21 @@ pub fn parse(
Ok(items.into_iter().map(Into::into).collect())
}

/// A location within a string.
#[derive(Clone, Copy)]
struct Location {
/// The one-indexed line of the string.
line: usize,
/// The one-indexed column of the string.
column: usize,
/// The zero-indexed byte of the string.
byte: usize,
}

impl Location {
/// Offset the location by the provided amount.
///
/// Note that this assumes the resulting location is on the same line as the original location.
#[must_use = "this does not modify the original value"]
const fn offset(&self, offset: usize) -> Self {
Self {
Expand All @@ -36,6 +47,7 @@ impl Location {
}
}

/// Create an error with the provided message at this location.
const fn error(self, message: &'static str) -> ErrorInner {
ErrorInner {
_message: message,
Expand All @@ -47,22 +59,28 @@ impl Location {
}
}

/// A start and end point within a string.
#[derive(Clone, Copy)]
struct Span {
#[allow(clippy::missing_docs_in_private_items)]
start: Location,
#[allow(clippy::missing_docs_in_private_items)]
end: Location,
}

impl Span {
/// Create a new `Span` from the provided start and end locations.
const fn start_end(start: Location, end: Location) -> Self {
Self { start, end }
}

/// Reduce this span to the provided range.
#[must_use = "this does not modify the original value"]
fn subspan(&self, range: impl Subspan) -> Self {
range.subspan(self)
}

/// Obtain a `Span` pointing at the start of the pre-existing span.
#[must_use = "this does not modify the original value"]
const fn shrink_to_start(&self) -> Self {
Self {
Expand All @@ -71,6 +89,7 @@ impl Span {
}
}

/// Obtain a `Span` pointing at the end of the pre-existing span.
#[must_use = "this does not modify the original value"]
const fn shrink_to_end(&self) -> Self {
Self {
Expand All @@ -79,19 +98,23 @@ impl Span {
}
}

/// Create an error with the provided message at this span.
const fn error(self, message: &'static str) -> ErrorInner {
ErrorInner {
_message: message,
_span: self,
}
}

/// Get the byte index that the span starts at.
const fn start_byte(&self) -> usize {
self.start.byte
}
}

/// A trait for types that can be used to reduce a `Span`.
trait Subspan {
/// Reduce the provided `Span` to a new `Span`.
fn subspan(self, span: &Span) -> Span;
}

Expand Down Expand Up @@ -125,13 +148,19 @@ impl Subspan for RangeTo<usize> {
}
}

/// The internal error type.
struct ErrorInner {
/// The message displayed to the user.
_message: &'static str,
/// Where the error originated.
_span: Span,
}

/// A complete error description.
struct Error {
/// The internal error.
_inner: ErrorInner,
/// The error needed for interoperability with the rest of `time`.
public: crate::error::InvalidFormatDescription,
}

Expand Down

0 comments on commit 88a6d96

Please sign in to comment.