Add support for DateTime parsing and formatting

This adds the following methods to the DateTime type: - DateTime.parse - DateTime.format - DateTime.to_iso8601 - DateTime.to_rfc2822 These methods support locale-aware parsing and formatting of DateTime values, with the standard library providing support for English, Dutch and Japanese (with the ability to add more in the future). Locale data is located in locale-specific modules such as std.locale.en and std.locale.ja, such that this data is only included if necessary. The parsing and formatting is done according to a formatting string, which uses a syntax based on that of strftime(2)/strptime(2) This fixes #677 and fixes #678. Changelog: added
inko-lang · Dec 22, 2024 · 40b3293 · 40b3293
1 parent 6f78b95
commit 40b3293
Show file tree

Hide file tree

Showing 13 changed files with 1,868 additions and 55 deletions.
diff --git a/std/src/std/bytes.inko b/std/src/std/bytes.inko
@@ -0,0 +1,101 @@
+# Internal helper methods for working with bytes and byte streams.
+import std.cmp (min)
+import std.ptr
+import std.string (Bytes)
+
+let NINE = 57
+let ZERO = 48
+
+fn inline digit?(byte: Int) -> Bool {
+  byte >= ZERO and byte <= NINE
+}
+
+# Parses two base 10 digits into an `Int`.
+fn inline two_digits[T: Bytes](input: ref T, start: Int) -> Option[Int] {
+  if start.wrapping_add(2) > input.size { return Option.None }
+
+  let a = input.byte(start)
+  let b = input.byte(start.wrapping_add(1))
+
+  if digit?(a) and digit?(b) {
+    Option.Some(
+      a.wrapping_sub(ZERO).wrapping_mul(10).wrapping_add(b.wrapping_sub(ZERO)),
+    )
+  } else {
+    Option.None
+  }
+}
+
+# Parses four base 10 digits into an `Int`.
+fn inline four_digits[T: Bytes](input: ref T, start: Int) -> Option[Int] {
+  if start.wrapping_add(4) > input.size { return Option.None }
+
+  let a = input.byte(start)
+  let b = input.byte(start.wrapping_add(1))
+  let c = input.byte(start.wrapping_add(2))
+  let d = input.byte(start.wrapping_add(3))
+
+  if digit?(a) and digit?(b) and digit?(c) and digit?(d) {
+    Option.Some(
+      a
+        .wrapping_sub(ZERO)
+        .wrapping_mul(10)
+        .wrapping_add(b.wrapping_sub(ZERO))
+        .wrapping_mul(10)
+        .wrapping_add(c.wrapping_sub(ZERO))
+        .wrapping_mul(10)
+        .wrapping_add(d.wrapping_sub(ZERO)),
+    )
+  } else {
+    Option.None
+  }
+}
+
+# Parses up to N base 10 digits into an `Int`.
+fn digits[T: Bytes](
+  input: ref T,
+  start: Int,
+  limit: Int,
+) -> Option[(Int, Int)] {
+  let mut idx = start
+  let mut num = 0
+  let max = min(limit + 1, input.size)
+
+  while idx < max {
+    let byte = input.byte(idx)
+
+    if digit?(byte).false? { break }
+
+    num = num.wrapping_mul(10).wrapping_add(byte.wrapping_sub(ZERO))
+    idx = idx.wrapping_add(1)
+  }
+
+  let len = idx - start
+
+  if len > 0 { Option.Some((num, len)) } else { Option.None }
+}
+
+fn name_index_at[T: Bytes](
+  input: ref T,
+  start: Int,
+  names: ref Array[String],
+) -> Option[(Int, Int)] {
+  let in_len = input.size - start
+  let in_ptr = ptr.add(input.to_pointer, start)
+  let mut i = 0
+  let max = names.size
+
+  while i < max {
+    let name = names.get(i)
+    let name_ptr = name.to_pointer
+    let name_len = name.size
+
+    if ptr.starts_with?(in_ptr, in_len, name_ptr, name_len) {
+      return Option.Some((i, name_len))
+    }
+
+    i += 1
+  }
+
+  Option.None
+}
diff --git a/std/src/std/json.inko b/std/src/std/json.inko
@@ -70,6 +70,7 @@
 # The implementation provided by this module isn't optimised for maximum
 # performance or optimal memory usage. Instead this module aims to provide an
 # implementation that's good enough for most cases.
+import std.bytes (digit?)
 import std.cmp (Equal)
 import std.fmt (Format as FormatTrait, Formatter)
 import std.int (Format)
@@ -132,10 +133,6 @@ let ESCAPE_TABLE = [
 # objects.
 let DEFAULT_PRETTY_INDENT = 2
 
-fn digit?(byte: Int) -> Bool {
-  byte >= ZERO and byte <= NINE
-}
-
 fn exponent?(byte: Int) -> Bool {
   byte == LOWER_E or byte == UPPER_E
 }

diff --git a/std/src/std/locale.inko b/std/src/std/locale.inko
@@ -0,0 +1,99 @@
+import std.string (Bytes)
+
+# A type describing a locale (e.g. English or Dutch).
+trait pub Locale {
+  # Parses a (case sensitive) abbreviated month name.
+  #
+  # The return value is an optional tuple containing the index (in the range
+  # 0-11) and the size of the name in bytes.
+  #
+  # Not all locales use abbreviated names, in which case this method should
+  # simply parse the input as full names.
+  #
+  # The `input` argument is a `String` or `ByteArray` to parse. The `start`
+  # argument is the offset to start parsing at.
+  #
+  # If the input points to a valid month name, the return value is an
+  # `Option.Some` containing the month number, otherwise an `Option.None` is
+  # returned.
+  fn parse_short_month[T: Bytes](input: ref T, start: Int) -> Option[(Int, Int)]
+
+  # Parses a (case sensitive) full month name.
+  #
+  # The return value is an optional tuple containing the index (in the range
+  # 0-11) and the size of the name in bytes.
+  #
+  # The `input` argument is a `String` or `ByteArray` to parse. The `start`
+  # argument is the offset to start parsing at.
+  #
+  # If the input points to a valid month name, the return value is an
+  # `Option.Some` containing the month number, otherwise an `Option.None` is
+  # returned.
+  fn parse_full_month[T: Bytes](input: ref T, start: Int) -> Option[(Int, Int)]
+
+  # Parses a (case sensitive) abbreviated name of the day of the week.
+  #
+  # The return value is an optional tuple containing the number of the day of
+  # the week (in the range 1-7) and the size of the name in bytes.
+  #
+  # Not all locales use abbreviated names, in which case this method should
+  # simply parse the input as full names.
+  #
+  # The `input` argument is a `String` or `ByteArray` to parse. The `start`
+  # argument is the offset to start parsing at.
+  #
+  # If the input points to a valid day name, the return value is an
+  # `Option.Some` containing the day number, otherwise an `Option.None` is
+  # returned.
+  fn parse_short_day_of_week[T: Bytes](
+    input: ref T,
+    start: Int,
+  ) -> Option[(Int, Int)]
+
+  # Parses a (case sensitive) full name of the day of the week.
+  #
+  # The return value is an optional tuple containing the number of the day of
+  # the week (in the range 1-7) and the size of the name in bytes.
+  #
+  # The `input` argument is a `String` or `ByteArray` to parse. The `start`
+  # argument is the offset to start parsing at.
+  #
+  # If the input points to a valid day name, the return value is an
+  # `Option.Some` containing the day number, otherwise an `Option.None` is
+  # returned.
+  fn parse_full_day_of_week[T: Bytes](
+    input: ref T,
+    start: Int,
+  ) -> Option[(Int, Int)]
+
+  # Returns the abbreviated month name for the given month index in the range
+  # 0-11.
+  #
+  # # Panics
+  #
+  # This method should panic if the month is out of bounds.
+  fn short_month(index: Int) -> String
+
+  # Returns the full month name for the given month index in the range 0-11.
+  #
+  # # Panics
+  #
+  # This method should panic if the month is out of bounds.
+  fn full_month(index: Int) -> String
+
+  # Returns the abbreviated name of the day of the week for the given day index
+  # in the range 0-6.
+  #
+  # # Panics
+  #
+  # This method should panic if the month is out of bounds.
+  fn short_day_of_week(index: Int) -> String
+
+  # Returns the full name of the day of the week for the given day index in the
+  # range 0-6.
+  #
+  # # Panics
+  #
+  # This method should panic if the month is out of bounds.
+  fn full_day_of_week(index: Int) -> String
+}
diff --git a/std/src/std/locale/en.inko b/std/src/std/locale/en.inko
@@ -0,0 +1,163 @@
+# Locale information for English.
+import std.bytes (name_index_at)
+import std.locale (Locale as LocaleTrait)
+import std.ptr
+import std.string (Bytes)
+
+let SHORT_MONTHS = [
+  'Jan',
+  'Feb',
+  'Mar',
+  'Apr',
+  'May',
+  'Jun',
+  'Jul',
+  'Aug',
+  'Sep',
+  'Oct',
+  'Nov',
+  'Dec',
+]
+
+let FULL_MONTHS = [
+  'January',
+  'February',
+  'March',
+  'April',
+  'May',
+  'June',
+  'July',
+  'August',
+  'September',
+  'October',
+  'November',
+  'December',
+]
+
+let SHORT_WEEKDAYS = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+
+let FULL_WEEKDAYS = [
+  'Monday',
+  'Tuesday',
+  'Wednesday',
+  'Thursday',
+  'Friday',
+  'Saturday',
+  'Sunday',
+]
+
+fn month_prefix_index[T: Bytes](input: ref T, start: Int) -> Option[Int] {
+  # For English we can take advantage of the fact that for all months the first
+  # 3 bytes are unique. This allows us to efficiently reduce the amount of
+  # months to compare in full to just a single month.
+  let a = (ptr.add(input.to_pointer, start) as Pointer[UInt16]).0 as Int
+  let b = ptr.add(input.to_pointer, start + 2).0 as Int << 16
+
+  # These magic values are the result of
+  # `(byte 2 << 16) | (byte 1 << 8) | byte 0`, i.e. the first three bytes in
+  # little endian order.
+  match b | a {
+    case 0x6E614A -> Option.Some(0)
+    case 0x626546 -> Option.Some(1)
+    case 0x72614D -> Option.Some(2)
+    case 0x727041 -> Option.Some(3)
+    case 0x79614D -> Option.Some(4)
+    case 0x6E754A -> Option.Some(5)
+    case 0x6C754A -> Option.Some(6)
+    case 0x677541 -> Option.Some(7)
+    case 0x706553 -> Option.Some(8)
+    case 0x74634F -> Option.Some(9)
+    case 0x766F4E -> Option.Some(10)
+    case 0x636544 -> Option.Some(11)
+    case _ -> Option.None
+  }
+}
+
+# Locale data for English.
+#
+# This type handles both US and UK English as in its current implementation
+# there are no differences between the two.
+class pub copy Locale {
+  # Returns a new `Locale`.
+  fn pub inline static new -> Locale {
+    Locale()
+  }
+}
+
+impl LocaleTrait for Locale {
+  fn parse_short_month[T: Bytes](
+    input: ref T,
+    start: Int,
+  ) -> Option[(Int, Int)] {
+    if input.size - start < 3 { return Option.None }
+
+    match month_prefix_index(input, start) {
+      case Some(v) -> Option.Some((v, 3))
+      case _ -> Option.None
+    }
+  }
+
+  fn parse_full_month[T: Bytes](
+    input: ref T,
+    start: Int,
+  ) -> Option[(Int, Int)] {
+    let len = input.size - start
+
+    # "May" is the shortest month and consists of 3 bytes, so anything shorter
+    # is by definition not a name of the month.
+    if len < 3 { return Option.None }
+
+    let name_idx = try month_prefix_index(input, start)
+    let mon = FULL_MONTHS.get(name_idx)
+    let mut inp_idx = start + 3
+    let mut mon_idx = 3
+    let max = input.size
+
+    if len < mon.size { return Option.None }
+
+    while inp_idx < max {
+      if input.byte(inp_idx) == mon.opt(mon_idx).or(-1) {
+        inp_idx += 1
+        mon_idx += 1
+      } else {
+        break
+      }
+    }
+
+    if mon_idx == mon.size {
+      Option.Some((name_idx, mon.size))
+    } else {
+      Option.None
+    }
+  }
+
+  fn parse_short_day_of_week[T: Bytes](
+    input: ref T,
+    start: Int,
+  ) -> Option[(Int, Int)] {
+    name_index_at(input, start, SHORT_WEEKDAYS)
+  }
+
+  fn parse_full_day_of_week[T: Bytes](
+    input: ref T,
+    start: Int,
+  ) -> Option[(Int, Int)] {
+    name_index_at(input, start, FULL_WEEKDAYS)
+  }
+
+  fn short_month(index: Int) -> String {
+    SHORT_MONTHS.get(index)
+  }
+
+  fn full_month(index: Int) -> String {
+    FULL_MONTHS.get(index)
+  }
+
+  fn short_day_of_week(index: Int) -> String {
+    SHORT_WEEKDAYS.get(index)
+  }
+
+  fn full_day_of_week(index: Int) -> String {
+    FULL_WEEKDAYS.get(index)
+  }
+}