-
-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for DateTime parsing and formatting
This adds the following methods to the DateTime type: - DateTime.parse - DateTime.format - DateTime.to_iso8601 - DateTime.to_rfc2822 These methods support locale-aware parsing and formatting of DateTime values, with the standard library providing support for English, Dutch and Japanese (with the ability to add more in the future). Locale data is located in locale-specific modules such as std.locale.en and std.locale.ja, such that this data is only included if necessary. The parsing and formatting is done according to a formatting string, which uses a syntax based on that of strftime(2)/strptime(2) This fixes #677 and fixes #678. Changelog: added
- Loading branch information
1 parent
6f78b95
commit 40b3293
Showing
13 changed files
with
1,868 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
# Internal helper methods for working with bytes and byte streams. | ||
import std.cmp (min) | ||
import std.ptr | ||
import std.string (Bytes) | ||
|
||
let NINE = 57 | ||
let ZERO = 48 | ||
|
||
fn inline digit?(byte: Int) -> Bool { | ||
byte >= ZERO and byte <= NINE | ||
} | ||
|
||
# Parses two base 10 digits into an `Int`. | ||
fn inline two_digits[T: Bytes](input: ref T, start: Int) -> Option[Int] { | ||
if start.wrapping_add(2) > input.size { return Option.None } | ||
|
||
let a = input.byte(start) | ||
let b = input.byte(start.wrapping_add(1)) | ||
|
||
if digit?(a) and digit?(b) { | ||
Option.Some( | ||
a.wrapping_sub(ZERO).wrapping_mul(10).wrapping_add(b.wrapping_sub(ZERO)), | ||
) | ||
} else { | ||
Option.None | ||
} | ||
} | ||
|
||
# Parses four base 10 digits into an `Int`. | ||
fn inline four_digits[T: Bytes](input: ref T, start: Int) -> Option[Int] { | ||
if start.wrapping_add(4) > input.size { return Option.None } | ||
|
||
let a = input.byte(start) | ||
let b = input.byte(start.wrapping_add(1)) | ||
let c = input.byte(start.wrapping_add(2)) | ||
let d = input.byte(start.wrapping_add(3)) | ||
|
||
if digit?(a) and digit?(b) and digit?(c) and digit?(d) { | ||
Option.Some( | ||
a | ||
.wrapping_sub(ZERO) | ||
.wrapping_mul(10) | ||
.wrapping_add(b.wrapping_sub(ZERO)) | ||
.wrapping_mul(10) | ||
.wrapping_add(c.wrapping_sub(ZERO)) | ||
.wrapping_mul(10) | ||
.wrapping_add(d.wrapping_sub(ZERO)), | ||
) | ||
} else { | ||
Option.None | ||
} | ||
} | ||
|
||
# Parses up to N base 10 digits into an `Int`. | ||
fn digits[T: Bytes]( | ||
input: ref T, | ||
start: Int, | ||
limit: Int, | ||
) -> Option[(Int, Int)] { | ||
let mut idx = start | ||
let mut num = 0 | ||
let max = min(limit + 1, input.size) | ||
|
||
while idx < max { | ||
let byte = input.byte(idx) | ||
|
||
if digit?(byte).false? { break } | ||
|
||
num = num.wrapping_mul(10).wrapping_add(byte.wrapping_sub(ZERO)) | ||
idx = idx.wrapping_add(1) | ||
} | ||
|
||
let len = idx - start | ||
|
||
if len > 0 { Option.Some((num, len)) } else { Option.None } | ||
} | ||
|
||
fn name_index_at[T: Bytes]( | ||
input: ref T, | ||
start: Int, | ||
names: ref Array[String], | ||
) -> Option[(Int, Int)] { | ||
let in_len = input.size - start | ||
let in_ptr = ptr.add(input.to_pointer, start) | ||
let mut i = 0 | ||
let max = names.size | ||
|
||
while i < max { | ||
let name = names.get(i) | ||
let name_ptr = name.to_pointer | ||
let name_len = name.size | ||
|
||
if ptr.starts_with?(in_ptr, in_len, name_ptr, name_len) { | ||
return Option.Some((i, name_len)) | ||
} | ||
|
||
i += 1 | ||
} | ||
|
||
Option.None | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import std.string (Bytes) | ||
|
||
# A type describing a locale (e.g. English or Dutch). | ||
trait pub Locale { | ||
# Parses a (case sensitive) abbreviated month name. | ||
# | ||
# The return value is an optional tuple containing the index (in the range | ||
# 0-11) and the size of the name in bytes. | ||
# | ||
# Not all locales use abbreviated names, in which case this method should | ||
# simply parse the input as full names. | ||
# | ||
# The `input` argument is a `String` or `ByteArray` to parse. The `start` | ||
# argument is the offset to start parsing at. | ||
# | ||
# If the input points to a valid month name, the return value is an | ||
# `Option.Some` containing the month number, otherwise an `Option.None` is | ||
# returned. | ||
fn parse_short_month[T: Bytes](input: ref T, start: Int) -> Option[(Int, Int)] | ||
|
||
# Parses a (case sensitive) full month name. | ||
# | ||
# The return value is an optional tuple containing the index (in the range | ||
# 0-11) and the size of the name in bytes. | ||
# | ||
# The `input` argument is a `String` or `ByteArray` to parse. The `start` | ||
# argument is the offset to start parsing at. | ||
# | ||
# If the input points to a valid month name, the return value is an | ||
# `Option.Some` containing the month number, otherwise an `Option.None` is | ||
# returned. | ||
fn parse_full_month[T: Bytes](input: ref T, start: Int) -> Option[(Int, Int)] | ||
|
||
# Parses a (case sensitive) abbreviated name of the day of the week. | ||
# | ||
# The return value is an optional tuple containing the number of the day of | ||
# the week (in the range 1-7) and the size of the name in bytes. | ||
# | ||
# Not all locales use abbreviated names, in which case this method should | ||
# simply parse the input as full names. | ||
# | ||
# The `input` argument is a `String` or `ByteArray` to parse. The `start` | ||
# argument is the offset to start parsing at. | ||
# | ||
# If the input points to a valid day name, the return value is an | ||
# `Option.Some` containing the day number, otherwise an `Option.None` is | ||
# returned. | ||
fn parse_short_day_of_week[T: Bytes]( | ||
input: ref T, | ||
start: Int, | ||
) -> Option[(Int, Int)] | ||
|
||
# Parses a (case sensitive) full name of the day of the week. | ||
# | ||
# The return value is an optional tuple containing the number of the day of | ||
# the week (in the range 1-7) and the size of the name in bytes. | ||
# | ||
# The `input` argument is a `String` or `ByteArray` to parse. The `start` | ||
# argument is the offset to start parsing at. | ||
# | ||
# If the input points to a valid day name, the return value is an | ||
# `Option.Some` containing the day number, otherwise an `Option.None` is | ||
# returned. | ||
fn parse_full_day_of_week[T: Bytes]( | ||
input: ref T, | ||
start: Int, | ||
) -> Option[(Int, Int)] | ||
|
||
# Returns the abbreviated month name for the given month index in the range | ||
# 0-11. | ||
# | ||
# # Panics | ||
# | ||
# This method should panic if the month is out of bounds. | ||
fn short_month(index: Int) -> String | ||
|
||
# Returns the full month name for the given month index in the range 0-11. | ||
# | ||
# # Panics | ||
# | ||
# This method should panic if the month is out of bounds. | ||
fn full_month(index: Int) -> String | ||
|
||
# Returns the abbreviated name of the day of the week for the given day index | ||
# in the range 0-6. | ||
# | ||
# # Panics | ||
# | ||
# This method should panic if the month is out of bounds. | ||
fn short_day_of_week(index: Int) -> String | ||
|
||
# Returns the full name of the day of the week for the given day index in the | ||
# range 0-6. | ||
# | ||
# # Panics | ||
# | ||
# This method should panic if the month is out of bounds. | ||
fn full_day_of_week(index: Int) -> String | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
# Locale information for English. | ||
import std.bytes (name_index_at) | ||
import std.locale (Locale as LocaleTrait) | ||
import std.ptr | ||
import std.string (Bytes) | ||
|
||
let SHORT_MONTHS = [ | ||
'Jan', | ||
'Feb', | ||
'Mar', | ||
'Apr', | ||
'May', | ||
'Jun', | ||
'Jul', | ||
'Aug', | ||
'Sep', | ||
'Oct', | ||
'Nov', | ||
'Dec', | ||
] | ||
|
||
let FULL_MONTHS = [ | ||
'January', | ||
'February', | ||
'March', | ||
'April', | ||
'May', | ||
'June', | ||
'July', | ||
'August', | ||
'September', | ||
'October', | ||
'November', | ||
'December', | ||
] | ||
|
||
let SHORT_WEEKDAYS = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] | ||
|
||
let FULL_WEEKDAYS = [ | ||
'Monday', | ||
'Tuesday', | ||
'Wednesday', | ||
'Thursday', | ||
'Friday', | ||
'Saturday', | ||
'Sunday', | ||
] | ||
|
||
fn month_prefix_index[T: Bytes](input: ref T, start: Int) -> Option[Int] { | ||
# For English we can take advantage of the fact that for all months the first | ||
# 3 bytes are unique. This allows us to efficiently reduce the amount of | ||
# months to compare in full to just a single month. | ||
let a = (ptr.add(input.to_pointer, start) as Pointer[UInt16]).0 as Int | ||
let b = ptr.add(input.to_pointer, start + 2).0 as Int << 16 | ||
|
||
# These magic values are the result of | ||
# `(byte 2 << 16) | (byte 1 << 8) | byte 0`, i.e. the first three bytes in | ||
# little endian order. | ||
match b | a { | ||
case 0x6E614A -> Option.Some(0) | ||
case 0x626546 -> Option.Some(1) | ||
case 0x72614D -> Option.Some(2) | ||
case 0x727041 -> Option.Some(3) | ||
case 0x79614D -> Option.Some(4) | ||
case 0x6E754A -> Option.Some(5) | ||
case 0x6C754A -> Option.Some(6) | ||
case 0x677541 -> Option.Some(7) | ||
case 0x706553 -> Option.Some(8) | ||
case 0x74634F -> Option.Some(9) | ||
case 0x766F4E -> Option.Some(10) | ||
case 0x636544 -> Option.Some(11) | ||
case _ -> Option.None | ||
} | ||
} | ||
|
||
# Locale data for English. | ||
# | ||
# This type handles both US and UK English as in its current implementation | ||
# there are no differences between the two. | ||
class pub copy Locale { | ||
# Returns a new `Locale`. | ||
fn pub inline static new -> Locale { | ||
Locale() | ||
} | ||
} | ||
|
||
impl LocaleTrait for Locale { | ||
fn parse_short_month[T: Bytes]( | ||
input: ref T, | ||
start: Int, | ||
) -> Option[(Int, Int)] { | ||
if input.size - start < 3 { return Option.None } | ||
|
||
match month_prefix_index(input, start) { | ||
case Some(v) -> Option.Some((v, 3)) | ||
case _ -> Option.None | ||
} | ||
} | ||
|
||
fn parse_full_month[T: Bytes]( | ||
input: ref T, | ||
start: Int, | ||
) -> Option[(Int, Int)] { | ||
let len = input.size - start | ||
|
||
# "May" is the shortest month and consists of 3 bytes, so anything shorter | ||
# is by definition not a name of the month. | ||
if len < 3 { return Option.None } | ||
|
||
let name_idx = try month_prefix_index(input, start) | ||
let mon = FULL_MONTHS.get(name_idx) | ||
let mut inp_idx = start + 3 | ||
let mut mon_idx = 3 | ||
let max = input.size | ||
|
||
if len < mon.size { return Option.None } | ||
|
||
while inp_idx < max { | ||
if input.byte(inp_idx) == mon.opt(mon_idx).or(-1) { | ||
inp_idx += 1 | ||
mon_idx += 1 | ||
} else { | ||
break | ||
} | ||
} | ||
|
||
if mon_idx == mon.size { | ||
Option.Some((name_idx, mon.size)) | ||
} else { | ||
Option.None | ||
} | ||
} | ||
|
||
fn parse_short_day_of_week[T: Bytes]( | ||
input: ref T, | ||
start: Int, | ||
) -> Option[(Int, Int)] { | ||
name_index_at(input, start, SHORT_WEEKDAYS) | ||
} | ||
|
||
fn parse_full_day_of_week[T: Bytes]( | ||
input: ref T, | ||
start: Int, | ||
) -> Option[(Int, Int)] { | ||
name_index_at(input, start, FULL_WEEKDAYS) | ||
} | ||
|
||
fn short_month(index: Int) -> String { | ||
SHORT_MONTHS.get(index) | ||
} | ||
|
||
fn full_month(index: Int) -> String { | ||
FULL_MONTHS.get(index) | ||
} | ||
|
||
fn short_day_of_week(index: Int) -> String { | ||
SHORT_WEEKDAYS.get(index) | ||
} | ||
|
||
fn full_day_of_week(index: Int) -> String { | ||
FULL_WEEKDAYS.get(index) | ||
} | ||
} |
Oops, something went wrong.