Skip to content

Commit

Permalink
Add support for DateTime parsing and formatting
Browse files Browse the repository at this point in the history
This adds the following methods to the DateTime type:

- DateTime.parse
- DateTime.format
- DateTime.to_iso8601
- DateTime.to_rfc2822

These methods support locale-aware parsing and formatting of DateTime
values, with the standard library providing support for English, Dutch
and Japanese (with the ability to add more in the future). Locale data
is located in locale-specific modules such as std.locale.en and
std.locale.ja, such that this data is only included if necessary.

The parsing and formatting is done according to a formatting string,
which uses a syntax based on that of strftime(2)/strptime(2)

This fixes #677
and fixes #678.

Changelog: added
  • Loading branch information
yorickpeterse committed Dec 22, 2024
1 parent 6f78b95 commit 40b3293
Show file tree
Hide file tree
Showing 13 changed files with 1,868 additions and 55 deletions.
101 changes: 101 additions & 0 deletions std/src/std/bytes.inko
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Internal helper methods for working with bytes and byte streams.
import std.cmp (min)
import std.ptr
import std.string (Bytes)

let NINE = 57
let ZERO = 48

fn inline digit?(byte: Int) -> Bool {
byte >= ZERO and byte <= NINE
}

# Parses two base 10 digits into an `Int`.
fn inline two_digits[T: Bytes](input: ref T, start: Int) -> Option[Int] {
if start.wrapping_add(2) > input.size { return Option.None }

let a = input.byte(start)
let b = input.byte(start.wrapping_add(1))

if digit?(a) and digit?(b) {
Option.Some(
a.wrapping_sub(ZERO).wrapping_mul(10).wrapping_add(b.wrapping_sub(ZERO)),
)
} else {
Option.None
}
}

# Parses four base 10 digits into an `Int`.
fn inline four_digits[T: Bytes](input: ref T, start: Int) -> Option[Int] {
if start.wrapping_add(4) > input.size { return Option.None }

let a = input.byte(start)
let b = input.byte(start.wrapping_add(1))
let c = input.byte(start.wrapping_add(2))
let d = input.byte(start.wrapping_add(3))

if digit?(a) and digit?(b) and digit?(c) and digit?(d) {
Option.Some(
a
.wrapping_sub(ZERO)
.wrapping_mul(10)
.wrapping_add(b.wrapping_sub(ZERO))
.wrapping_mul(10)
.wrapping_add(c.wrapping_sub(ZERO))
.wrapping_mul(10)
.wrapping_add(d.wrapping_sub(ZERO)),
)
} else {
Option.None
}
}

# Parses up to N base 10 digits into an `Int`.
fn digits[T: Bytes](
input: ref T,
start: Int,
limit: Int,
) -> Option[(Int, Int)] {
let mut idx = start
let mut num = 0
let max = min(limit + 1, input.size)

while idx < max {
let byte = input.byte(idx)

if digit?(byte).false? { break }

num = num.wrapping_mul(10).wrapping_add(byte.wrapping_sub(ZERO))
idx = idx.wrapping_add(1)
}

let len = idx - start

if len > 0 { Option.Some((num, len)) } else { Option.None }
}

fn name_index_at[T: Bytes](
input: ref T,
start: Int,
names: ref Array[String],
) -> Option[(Int, Int)] {
let in_len = input.size - start
let in_ptr = ptr.add(input.to_pointer, start)
let mut i = 0
let max = names.size

while i < max {
let name = names.get(i)
let name_ptr = name.to_pointer
let name_len = name.size

if ptr.starts_with?(in_ptr, in_len, name_ptr, name_len) {
return Option.Some((i, name_len))
}

i += 1
}

Option.None
}
5 changes: 1 addition & 4 deletions std/src/std/json.inko
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
# The implementation provided by this module isn't optimised for maximum
# performance or optimal memory usage. Instead this module aims to provide an
# implementation that's good enough for most cases.
import std.bytes (digit?)
import std.cmp (Equal)
import std.fmt (Format as FormatTrait, Formatter)
import std.int (Format)
Expand Down Expand Up @@ -132,10 +133,6 @@ let ESCAPE_TABLE = [
# objects.
let DEFAULT_PRETTY_INDENT = 2

fn digit?(byte: Int) -> Bool {
byte >= ZERO and byte <= NINE
}

fn exponent?(byte: Int) -> Bool {
byte == LOWER_E or byte == UPPER_E
}
Expand Down
99 changes: 99 additions & 0 deletions std/src/std/locale.inko
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import std.string (Bytes)

# A type describing a locale (e.g. English or Dutch).
trait pub Locale {
# Parses a (case sensitive) abbreviated month name.
#
# The return value is an optional tuple containing the index (in the range
# 0-11) and the size of the name in bytes.
#
# Not all locales use abbreviated names, in which case this method should
# simply parse the input as full names.
#
# The `input` argument is a `String` or `ByteArray` to parse. The `start`
# argument is the offset to start parsing at.
#
# If the input points to a valid month name, the return value is an
# `Option.Some` containing the month number, otherwise an `Option.None` is
# returned.
fn parse_short_month[T: Bytes](input: ref T, start: Int) -> Option[(Int, Int)]

# Parses a (case sensitive) full month name.
#
# The return value is an optional tuple containing the index (in the range
# 0-11) and the size of the name in bytes.
#
# The `input` argument is a `String` or `ByteArray` to parse. The `start`
# argument is the offset to start parsing at.
#
# If the input points to a valid month name, the return value is an
# `Option.Some` containing the month number, otherwise an `Option.None` is
# returned.
fn parse_full_month[T: Bytes](input: ref T, start: Int) -> Option[(Int, Int)]

# Parses a (case sensitive) abbreviated name of the day of the week.
#
# The return value is an optional tuple containing the number of the day of
# the week (in the range 1-7) and the size of the name in bytes.
#
# Not all locales use abbreviated names, in which case this method should
# simply parse the input as full names.
#
# The `input` argument is a `String` or `ByteArray` to parse. The `start`
# argument is the offset to start parsing at.
#
# If the input points to a valid day name, the return value is an
# `Option.Some` containing the day number, otherwise an `Option.None` is
# returned.
fn parse_short_day_of_week[T: Bytes](
input: ref T,
start: Int,
) -> Option[(Int, Int)]

# Parses a (case sensitive) full name of the day of the week.
#
# The return value is an optional tuple containing the number of the day of
# the week (in the range 1-7) and the size of the name in bytes.
#
# The `input` argument is a `String` or `ByteArray` to parse. The `start`
# argument is the offset to start parsing at.
#
# If the input points to a valid day name, the return value is an
# `Option.Some` containing the day number, otherwise an `Option.None` is
# returned.
fn parse_full_day_of_week[T: Bytes](
input: ref T,
start: Int,
) -> Option[(Int, Int)]

# Returns the abbreviated month name for the given month index in the range
# 0-11.
#
# # Panics
#
# This method should panic if the month is out of bounds.
fn short_month(index: Int) -> String

# Returns the full month name for the given month index in the range 0-11.
#
# # Panics
#
# This method should panic if the month is out of bounds.
fn full_month(index: Int) -> String

# Returns the abbreviated name of the day of the week for the given day index
# in the range 0-6.
#
# # Panics
#
# This method should panic if the month is out of bounds.
fn short_day_of_week(index: Int) -> String

# Returns the full name of the day of the week for the given day index in the
# range 0-6.
#
# # Panics
#
# This method should panic if the month is out of bounds.
fn full_day_of_week(index: Int) -> String
}
163 changes: 163 additions & 0 deletions std/src/std/locale/en.inko
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Locale information for English.
import std.bytes (name_index_at)
import std.locale (Locale as LocaleTrait)
import std.ptr
import std.string (Bytes)

let SHORT_MONTHS = [
'Jan',
'Feb',
'Mar',
'Apr',
'May',
'Jun',
'Jul',
'Aug',
'Sep',
'Oct',
'Nov',
'Dec',
]

let FULL_MONTHS = [
'January',
'February',
'March',
'April',
'May',
'June',
'July',
'August',
'September',
'October',
'November',
'December',
]

let SHORT_WEEKDAYS = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

let FULL_WEEKDAYS = [
'Monday',
'Tuesday',
'Wednesday',
'Thursday',
'Friday',
'Saturday',
'Sunday',
]

fn month_prefix_index[T: Bytes](input: ref T, start: Int) -> Option[Int] {
# For English we can take advantage of the fact that for all months the first
# 3 bytes are unique. This allows us to efficiently reduce the amount of
# months to compare in full to just a single month.
let a = (ptr.add(input.to_pointer, start) as Pointer[UInt16]).0 as Int
let b = ptr.add(input.to_pointer, start + 2).0 as Int << 16

# These magic values are the result of
# `(byte 2 << 16) | (byte 1 << 8) | byte 0`, i.e. the first three bytes in
# little endian order.
match b | a {
case 0x6E614A -> Option.Some(0)
case 0x626546 -> Option.Some(1)
case 0x72614D -> Option.Some(2)
case 0x727041 -> Option.Some(3)
case 0x79614D -> Option.Some(4)
case 0x6E754A -> Option.Some(5)
case 0x6C754A -> Option.Some(6)
case 0x677541 -> Option.Some(7)
case 0x706553 -> Option.Some(8)
case 0x74634F -> Option.Some(9)
case 0x766F4E -> Option.Some(10)
case 0x636544 -> Option.Some(11)
case _ -> Option.None
}
}

# Locale data for English.
#
# This type handles both US and UK English as in its current implementation
# there are no differences between the two.
class pub copy Locale {
# Returns a new `Locale`.
fn pub inline static new -> Locale {
Locale()
}
}

impl LocaleTrait for Locale {
fn parse_short_month[T: Bytes](
input: ref T,
start: Int,
) -> Option[(Int, Int)] {
if input.size - start < 3 { return Option.None }

match month_prefix_index(input, start) {
case Some(v) -> Option.Some((v, 3))
case _ -> Option.None
}
}

fn parse_full_month[T: Bytes](
input: ref T,
start: Int,
) -> Option[(Int, Int)] {
let len = input.size - start

# "May" is the shortest month and consists of 3 bytes, so anything shorter
# is by definition not a name of the month.
if len < 3 { return Option.None }

let name_idx = try month_prefix_index(input, start)
let mon = FULL_MONTHS.get(name_idx)
let mut inp_idx = start + 3
let mut mon_idx = 3
let max = input.size

if len < mon.size { return Option.None }

while inp_idx < max {
if input.byte(inp_idx) == mon.opt(mon_idx).or(-1) {
inp_idx += 1
mon_idx += 1
} else {
break
}
}

if mon_idx == mon.size {
Option.Some((name_idx, mon.size))
} else {
Option.None
}
}

fn parse_short_day_of_week[T: Bytes](
input: ref T,
start: Int,
) -> Option[(Int, Int)] {
name_index_at(input, start, SHORT_WEEKDAYS)
}

fn parse_full_day_of_week[T: Bytes](
input: ref T,
start: Int,
) -> Option[(Int, Int)] {
name_index_at(input, start, FULL_WEEKDAYS)
}

fn short_month(index: Int) -> String {
SHORT_MONTHS.get(index)
}

fn full_month(index: Int) -> String {
FULL_MONTHS.get(index)
}

fn short_day_of_week(index: Int) -> String {
SHORT_WEEKDAYS.get(index)
}

fn full_day_of_week(index: Int) -> String {
FULL_WEEKDAYS.get(index)
}
}
Loading

0 comments on commit 40b3293

Please sign in to comment.