From 7ba26b541fce5c0a1b28035b3d62ba58e8f7f976 Mon Sep 17 00:00:00 2001 From: jtmoon79 <815261+jtmoon79@users.noreply.github.com> Date: Wed, 31 Aug 2022 21:17:30 +0000 Subject: [PATCH] be exact about whitespace Be exact about whitespace in parsing. This changes pattern matching in `format::parse::parse` as it does not allow arbitrary whitespace before, after, or between the datetime specifiers. `format/parse.rs:datetime_from_str` is exact about whitespace in the passed data `s` and passed strftime format `fmt`. Also be more exacting about colons and whitespace around timezones. Instead of unlimited colons and whitespace, only match a more limited possible set of leading colons and whitespace. Issue chronotope#660 --- src/datetime/mod.rs | 5 + src/datetime/tests.rs | 391 +++++++++++++++++++++++++++++++++--- src/format/mod.rs | 14 +- src/format/parse.rs | 384 +++++++++++++++++++++++++++++------ src/format/scan.rs | 139 ++++++++++++- src/format/strftime.rs | 76 +++++-- src/naive/date.rs | 15 +- src/naive/datetime/mod.rs | 6 - src/naive/datetime/tests.rs | 53 +++-- src/naive/time/mod.rs | 3 - src/naive/time/tests.rs | 44 +++- 11 files changed, 985 insertions(+), 145 deletions(-) diff --git a/src/datetime/mod.rs b/src/datetime/mod.rs index 3d746d27d2..0c21efb8b6 100644 --- a/src/datetime/mod.rs +++ b/src/datetime/mod.rs @@ -510,6 +510,9 @@ impl DateTime { /// RFC 2822 is the internet message standard that specifies the /// representation of times in HTTP and email headers. /// + /// The RFC 2822 standard allows arbitrary intermixed whitespace. + /// See [RFC 2822 Appendix A.5] + /// /// ``` /// # use chrono::{DateTime, FixedOffset, TimeZone}; /// assert_eq!( @@ -517,6 +520,8 @@ impl DateTime { /// FixedOffset::east(0).ymd(2015, 2, 18).and_hms(23, 16, 9) /// ); /// ``` + /// + /// [RFC 2822 Appendix A.5]: https://www.rfc-editor.org/rfc/rfc2822#appendix-A.5 pub fn parse_from_rfc2822(s: &str) -> ParseResult> { const ITEMS: &[Item<'static>] = &[Item::Fixed(Fixed::RFC2822)]; let mut parsed = Parsed::new(); diff --git a/src/datetime/tests.rs b/src/datetime/tests.rs index 62fa1c7aec..e84a8c05f5 100644 --- a/src/datetime/tests.rs +++ b/src/datetime/tests.rs @@ -86,30 +86,20 @@ fn test_datetime_with_timezone() { } #[test] -fn test_datetime_rfc2822_and_rfc3339() { +fn test_datetime_rfc2822() { let edt = FixedOffset::east(5 * 60 * 60); assert_eq!( Utc.ymd(2015, 2, 18).and_hms(23, 16, 9).to_rfc2822(), "Wed, 18 Feb 2015 23:16:09 +0000" ); - assert_eq!(Utc.ymd(2015, 2, 18).and_hms(23, 16, 9).to_rfc3339(), "2015-02-18T23:16:09+00:00"); assert_eq!( edt.ymd(2015, 2, 18).and_hms_milli(23, 16, 9, 150).to_rfc2822(), "Wed, 18 Feb 2015 23:16:09 +0500" ); - assert_eq!( - edt.ymd(2015, 2, 18).and_hms_milli(23, 16, 9, 150).to_rfc3339(), - "2015-02-18T23:16:09.150+05:00" - ); assert_eq!( edt.ymd(2015, 2, 18).and_hms_micro(23, 59, 59, 1_234_567).to_rfc2822(), "Wed, 18 Feb 2015 23:59:60 +0500" ); - assert_eq!( - edt.ymd(2015, 2, 18).and_hms_micro(23, 59, 59, 1_234_567).to_rfc3339(), - "2015-02-18T23:59:60.234567+05:00" - ); - assert_eq!( DateTime::parse_from_rfc2822("Wed, 18 Feb 2015 23:16:09 +0000"), Ok(FixedOffset::east(0).ymd(2015, 2, 18).and_hms(23, 16, 9)) @@ -119,18 +109,69 @@ fn test_datetime_rfc2822_and_rfc3339() { Ok(FixedOffset::east(0).ymd(2015, 2, 18).and_hms(23, 16, 9)) ); assert_eq!( - DateTime::parse_from_rfc3339("2015-02-18T23:16:09Z"), - Ok(FixedOffset::east(0).ymd(2015, 2, 18).and_hms(23, 16, 9)) + DateTime::parse_from_rfc2822("Wed, 18 Feb 2015 23:59:60 +0500"), + Ok(edt.ymd(2015, 2, 18).and_hms_milli(23, 59, 59, 1_000)) ); assert_eq!( - DateTime::parse_from_rfc2822("Wed, 18 Feb 2015 23:59:60 +0500"), + DateTime::parse_from_rfc2822( + "\t\t\tWed,\n\t\t18 \r\n\t\tFeb \u{3000} 2015\r\n\t\t\t23:59:60 \t+0500" + ), Ok(edt.ymd(2015, 2, 18).and_hms_milli(23, 59, 59, 1_000)) ); + + assert!(DateTime::parse_from_rfc2822("Wed, 18 Feb 2015 23:16:09 +00:00").is_err()); + assert!(DateTime::parse_from_rfc2822("Wednesday, 18 Feb 2015 23:16:09 +0000").is_err()); + assert!(DateTime::parse_from_rfc2822("Wednesday 18 Feb 2015 23:16:09 +0000").is_err()); + assert!(DateTime::parse_from_rfc2822("Wed. 18 Feb 2015 23:16:09 +0000").is_err()); assert!(DateTime::parse_from_rfc2822("31 DEC 262143 23:59 -2359").is_err()); + // note that trailing space causes failure + assert!(DateTime::parse_from_rfc2822("Wed, 18 Feb 2015 23:16:09 +0000 ").is_err()); + // XXX: example from RFC 2822 Appendix A.5. wrongly fails due to trailing " (Newfoundland Time)" + //assert!(DateTime::parse_from_rfc2822("Thu,\n\t13\n Feb\n 1969\n 23:32\n -0330 (Newfoundland Time)").is_err()); +} + +#[test] +fn test_datetime_rfc3339() { + let edt = FixedOffset::east(5 * 60 * 60); + assert_eq!(Utc.ymd(2015, 2, 18).and_hms(23, 16, 9).to_rfc3339(), "2015-02-18T23:16:09+00:00"); + assert_eq!( + edt.ymd(2015, 2, 18).and_hms_milli(23, 16, 9, 150).to_rfc3339(), + "2015-02-18T23:16:09.150+05:00" + ); + assert_eq!( + edt.ymd(2015, 2, 18).and_hms_micro(23, 59, 59, 1_234_567).to_rfc3339(), + "2015-02-18T23:59:60.234567+05:00" + ); + assert_eq!( + DateTime::parse_from_rfc3339("2015-02-18T23:59:59.123+05:00"), + Ok(edt.ymd(2015, 2, 18).and_hms_micro(23, 59, 59, 123_000)) + ); + assert_eq!( + DateTime::parse_from_rfc3339("2015-02-18T23:59:59.123456+05:00"), + Ok(edt.ymd(2015, 2, 18).and_hms_micro(23, 59, 59, 123_456)) + ); assert_eq!( - DateTime::parse_from_rfc3339("2015-02-18T23:59:60.234567+05:00"), - Ok(edt.ymd(2015, 2, 18).and_hms_micro(23, 59, 59, 1_234_567)) + DateTime::parse_from_rfc3339("2015-02-18T23:59:59.123456789+05:00"), + Ok(edt.ymd(2015, 2, 18).and_hms_nano(23, 59, 59, 123_456_789)) + ); + assert_eq!( + DateTime::parse_from_rfc3339("2015-02-18T23:16:09Z"), + Ok(FixedOffset::east(0).ymd(2015, 2, 18).and_hms(23, 16, 9)) ); + + assert!(DateTime::parse_from_rfc3339("2015-02-18T23:59:60.234567 +05:00").is_err()); + assert!(DateTime::parse_from_rfc3339("2015-02-18T23:059:60.234567+05:00").is_err()); + assert!(DateTime::parse_from_rfc3339("2015-02-18T23:59:60.234567+05:00PST").is_err()); + assert!(DateTime::parse_from_rfc3339("2015-02-18T23:59:60.234567+PST").is_err()); + assert!(DateTime::parse_from_rfc3339("2015-02-18T23:59:60.234567PST").is_err()); + assert!(DateTime::parse_from_rfc3339("2015-02-18T23:59:60.234567+0500").is_err()); + assert!(DateTime::parse_from_rfc3339("2015-02-18T23:59:60.234567+05:00:00").is_err()); + assert!(DateTime::parse_from_rfc3339("2015-02-18 23:59:60.234567+05:00").is_err()); + assert!(DateTime::parse_from_rfc3339("2015-02-18T23:59:60.234567:+05:00").is_err()); + assert!(DateTime::parse_from_rfc3339("2015-02-18T23:59:60.234567+05:00 ").is_err()); + assert!(DateTime::parse_from_rfc3339(" 2015-02-18T23:59:60.234567+05:00").is_err()); + assert!(DateTime::parse_from_rfc3339("2015- 02-18T23:59:60.234567+05:00").is_err()); + assert!(DateTime::parse_from_rfc3339("2015-02-18T23:59:60.234567A+05:00").is_err()); } #[test] @@ -204,19 +245,321 @@ fn test_fixedoffset_parse_from_str() { // no test for `DateTime`, we cannot verify that much. } +#[test] +fn test_parse_datetime_utc() { + // valid cases + let valid = [ + "2001-02-03T04:05:06Z", + "2012-12-12T12:12:12Z", + "2015-02-18T23:16:09.153Z", + "2015-2-18T23:16:09.153Z", + "-77-02-18T23:16:09Z", + "+82701-05-6T15:9:60.898989898989Z", + ]; + for &s in &valid { + eprintln!("test_datetime_from_str valid {:?}", s); + let d = match s.parse::>() { + Ok(d) => d, + Err(e) => panic!("parsing `{}` has failed: {}", s, e), + }; + let s_ = format!("{:?}", d); + // `s` and `s_` may differ, but `s.parse()` and `s_.parse()` must be same + let d_ = match s_.parse::>() { + Ok(d) => d, + Err(e) => { + panic!("`{}` is parsed into `{:?}`, but reparsing that has failed: {}", s, d, e) + } + }; + assert!( + d == d_, + "`{}` is parsed into `{:?}`, but reparsed result \ + `{:?}` does not match", + s, + d, + d_ + ); + } + + // some invalid cases + // since `ParseErrorKind` is private, all we can do is to check if there was an error + let invalid = [ + "", + "Z", + "15Z", + "15:8:9Z", + "15-8-9Z", + "2015-15-15T15:15:15Z", + "2012-12-12T12:12:12x", + "2012-123-12T12:12:12Z", + "2012 -12-12T12:12:12Z", + "2012 -12-12T12:12:12Z", + "2012- 12-12T12:12:12Z", + "2012- 12-12T12:12:12Z", + "2012-12-12 T12:12:12Z", + "2012-12-12T 12:12:12Z", + "2012-12-12T12 :12:12Z", + "2012-12-12T12 :12:12Z", + "2012-12-12T12: 12:12Z", + "2012-12-12T12: 12:12Z", + "2012-12-12T12 : 12:12Z", + "2012-12-12T12:12:12Z ", + " 2012-12-12T12:12:12Z", + "2012-12-12t12:12:12Z", + "+ 82701-123-12T12:12:12Z", + "+802701-123-12T12:12:12Z", // out-of-bound + " +82701 - 05 - 6 T 15 : 9 : 60.898989898989 Z", + ]; + for &s in &invalid { + eprintln!("test_datetime_from_str invalid {:?}", s); + assert!(s.parse::>().is_err()); + } +} + +#[test] +fn test_utc_datetime_from_str() { + // with varying spaces - should succeed + assert_eq!( + Utc.datetime_from_str(" Aug 09 2013 23:54:35", " %b %d %Y %H:%M:%S"), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + assert_eq!( + Utc.datetime_from_str("Aug 09 2013 23:54:35 ", "%b %d %Y %H:%M:%S "), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + assert_eq!( + Utc.datetime_from_str(" Aug 09 2013 23:54:35 ", " %b %d %Y %H:%M:%S "), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + assert_eq!( + Utc.datetime_from_str(" Aug 09 2013 23:54:35", " %b %d %Y %H:%M:%S"), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + assert_eq!( + Utc.datetime_from_str(" Aug 09 2013 23:54:35", " %b %d %Y %H:%M:%S"), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + assert_eq!( + Utc.datetime_from_str("\n\tAug 09 2013 23:54:35 ", "\n\t%b %d %Y %H:%M:%S "), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + assert_eq!( + Utc.datetime_from_str("\tAug 09 2013 23:54:35\t", "\t%b %d %Y %H:%M:%S\t"), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + assert_eq!( + Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S"), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + assert_eq!( + Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S"), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + assert_eq!( + Utc.datetime_from_str("Aug 09 2013\t23:54:35", "%b %d %Y\t%H:%M:%S"), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + assert_eq!( + Utc.datetime_from_str("Aug 09 2013\t\t23:54:35", "%b %d %Y\t\t%H:%M:%S"), + Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)), + ); + // with varying spaces - should fail + assert!(Utc.datetime_from_str(" Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S").is_err()); + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35 ", "%b %d %Y %H:%M:%S").is_err()); + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S ").is_err()); + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35", " %b %d %Y %H:%M:%S").is_err()); + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35\t", "%b %d %Y %H:%M:%S").is_err()); + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S\n").is_err()); + assert!(Utc.datetime_from_str("\nAug 09 2013 23:54:35", "%b %d %Y %H:%M:%S\n").is_err()); + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35 ", "%b %d %Y %H:%M:%S\n").is_err()); + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y\t%H:%M:%S").is_err()); + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35 !!!", "%b %d %Y %H:%M:%S ").is_err()); +} + #[test] fn test_datetime_parse_from_str() { - let ymdhms = |y, m, d, h, n, s, off| FixedOffset::east(off).ymd(y, m, d).and_hms(h, n, s); + let dt = FixedOffset::east(-9 * 60 * 60).ymd(2013, 8, 9).and_hms(23, 54, 35); + + // timezone variations + + // %Z + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900", "%b %d %Y %H:%M:%S %Z").is_err()); + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 PST", "%b %d %Y %H:%M:%S %Z").is_err()); + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 XXXXX", "%b %d %Y %H:%M:%S %Z").is_err()); + // %z + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900", "%b %d %Y %H:%M:%S %z"), + Ok(dt), + ); assert_eq!( - DateTime::parse_from_str("2014-5-7T12:34:56+09:30", "%Y-%m-%dT%H:%M:%S%z"), - Ok(ymdhms(2014, 5, 7, 12, 34, 56, 570 * 60)) - ); // ignore offset - assert!(DateTime::parse_from_str("20140507000000", "%Y%m%d%H%M%S").is_err()); // no offset - assert!(DateTime::parse_from_str("Fri, 09 Aug 2013 23:54:35 GMT", "%a, %d %b %Y %H:%M:%S GMT") + DateTime::parse_from_str("Aug 09 2013 23:54:35 --0900", "%b %d %Y %H:%M:%S -%z"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 +-0900", "%b %d %Y %H:%M:%S +%z"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00 ", "%b %d %Y %H:%M:%S %z "), + Ok(dt), + ); + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00\n", "%b %d %Y %H:%M:%S %z").is_err() + ); + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00\n", "%b %d %Y %H:%M:%S %z ").is_err() + ); + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00:", "%b %d %Y %H:%M:%S %z").is_err() + ); + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00: ", "%b %d %Y %H:%M:%S %z ").is_err() + ); + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00:", "%b %d %Y %H:%M:%S %z ").is_err() + ); + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 -09", "%b %d %Y %H:%M:%S %z").is_err()); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900::", "%b %d %Y %H:%M:%S %z::"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00:00", "%b %d %Y %H:%M:%S %z:00"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00:00 ", "%b %d %Y %H:%M:%S %z:00 "), + Ok(dt), + ); + // %:z + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00", "%b %d %Y %H:%M:%S %:z"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900", "%b %d %Y %H:%M:%S %:z"), + Ok(dt), + ); + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 -09", "%b %d %Y %H:%M:%S %:z").is_err()); + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00:", "%b %d %Y %H:%M:%S %:z").is_err() + ); + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00::", "%b %d %Y %H:%M:%S %:z").is_err() + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00::", "%b %d %Y %H:%M:%S %:z::"), + Ok(dt), + ); + // %:::z + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900", "%b %d %Y %H:%M:%S %::z"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00", "%b %d %Y %H:%M:%S %::z"), + Ok(dt), + ); + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00:00", "%b %d %Y %H:%M:%S %::z") .is_err()); + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 -09", "%b %d %Y %H:%M:%S %::z").is_err()); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09001234", "%b %d %Y %H:%M:%S %::z1234"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:001234", "%b %d %Y %H:%M:%S %::z1234"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900 ", "%b %d %Y %H:%M:%S %::z "), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900\t\n", "%b %d %Y %H:%M:%S %::z\t\n"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900:", "%b %d %Y %H:%M:%S %::z:"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 :-0900:0", "%b %d %Y %H:%M:%S :%::z:0"), + Ok(dt), + ); + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 :-0900: ", "%b %d %Y %H:%M:%S :%::z::") + .is_err()); + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00:00", "%b %d %Y %H:%M:%S %::z") + .is_err()); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 -0900: 23:54:35", "%b %d %Y %::z: %H:%M:%S"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 :-0900:0 23:54:35", "%b %d %Y :%::z:0 %H:%M:%S"), + Ok(dt), + ); + assert!(DateTime::parse_from_str("Aug 09 2013 :-0900: 23:54:35", "%b %d %Y :%::z %H:%M:%S") + .is_err()); + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00:00 ", "%b %d %Y %H:%M:%S %::z ") + .is_err()); + + // %:::z + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00", "%b %d %Y %H:%M:%S %:::z"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900", "%b %d %Y %H:%M:%S %:::z"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900 ", "%b %d %Y %H:%M:%S %:::z "), + Ok(dt), + ); + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09", "%b %d %Y %H:%M:%S %:::z").is_err() + ); + // %::::z + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900", "%b %d %Y %H:%M:%S %::::z").is_err() + ); + + // %#z + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00", "%b %d %Y %H:%M:%S %#z"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900", "%b %d %Y %H:%M:%S %#z"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00 ", "%b %d %Y %H:%M:%S %#z "), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -0900 ", "%b %d %Y %H:%M:%S %#z "), + Ok(dt), + ); + assert!(DateTime::parse_from_str("Aug 09 2013 23:54:35 -090", "%b %d %Y %H:%M:%S %#z").is_err()); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09", "%b %d %Y %H:%M:%S %#z"), + Ok(dt), + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:", "%b %d %Y %H:%M:%S %#z"), + Ok(dt), + ); + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09: ", "%b %d %Y %H:%M:%S %#z ").is_err() + ); + assert_eq!( + DateTime::parse_from_str("Aug 09 2013 23:54:35+-09", "%b %d %Y %H:%M:%S+%#z"), + Ok(dt), + ); assert_eq!( - Utc.datetime_from_str("Fri, 09 Aug 2013 23:54:35 GMT", "%a, %d %b %Y %H:%M:%S GMT"), - Ok(Utc.ymd(2013, 8, 9).and_hms(23, 54, 35)) + DateTime::parse_from_str("Aug 09 2013 23:54:35--09", "%b %d %Y %H:%M:%S-%#z"), + Ok(dt), ); } diff --git a/src/format/mod.rs b/src/format/mod.rs index ff0363bb94..f5e0ccd3c5 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -218,27 +218,27 @@ pub enum Fixed { /// /// It does not support parsing, its use in the parser is an immediate failure. TimezoneName, - /// Offset from the local time to UTC (`+09:00` or `-04:00` or `+00:00`). + /// Offset from the local time to UTC (`+09:00` or `-0400` or `+00:00`). /// - /// In the parser, the colon can be omitted and/or surrounded with any amount of whitespace. + /// In the parser, the colon may be omitted, /// The offset is limited from `-24:00` to `+24:00`, /// which is the same as [`FixedOffset`](../offset/struct.FixedOffset.html)'s range. TimezoneOffsetColon, - /// Offset from the local time to UTC with seconds (`+09:00:00` or `-04:00:00` or `+00:00:00`). + /// Offset from the local time to UTC with seconds (`+09:00:00` or `-0400:00` or `+000000`). /// - /// In the parser, the colon can be omitted and/or surrounded with any amount of whitespace. + /// In the parser, the colon may be omitted, /// The offset is limited from `-24:00:00` to `+24:00:00`, /// which is the same as [`FixedOffset`](../offset/struct.FixedOffset.html)'s range. TimezoneOffsetDoubleColon, /// Offset from the local time to UTC without minutes (`+09` or `-04` or `+00`). /// - /// In the parser, the colon can be omitted and/or surrounded with any amount of whitespace. + /// In the parser, the colon may be omitted, /// The offset is limited from `-24` to `+24`, /// which is the same as [`FixedOffset`](../offset/struct.FixedOffset.html)'s range. TimezoneOffsetTripleColon, - /// Offset from the local time to UTC (`+09:00` or `-04:00` or `Z`). + /// Offset from the local time to UTC (`+09:00` or `-0400` or `Z`). /// - /// In the parser, the colon can be omitted and/or surrounded with any amount of whitespace, + /// In the parser, the colon may be omitted, /// and `Z` can be either in upper case or in lower case. /// The offset is limited from `-24:00` to `+24:00`, /// which is the same as [`FixedOffset`](../offset/struct.FixedOffset.html)'s range. diff --git a/src/format/parse.rs b/src/format/parse.rs index ea97a35b61..b9a970e27d 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -241,7 +241,7 @@ fn parse_rfc3339<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st /// /// - Padding-agnostic (for numeric items). /// The [`Pad`](./enum.Pad.html) field is completely ignored, -/// so one can prepend any number of whitespace then any number of zeroes before numbers. +/// so one can prepend any number of zeroes before numbers. /// /// - (Still) obeying the intrinsic parsing width. This allows, for example, parsing `HHMMSS`. pub fn parse<'a, I, B>(parsed: &mut Parsed, s: &str, items: I) -> ParseResult<()> @@ -296,13 +296,37 @@ where s = &s[prefix.len()..]; } - Item::Space(_) => { - s = s.trim_left(); + Item::Space(item_space) => { + for item_c in item_space.chars() { + let s_c: char = match s.chars().next() { + Some(c) => c, + None => { + return Err((s, TOO_SHORT)); + } + }; + if item_c != s_c { + return Err((s, INVALID)); + } + // advance `s` forward 1 char + s = scan::s_next(s); + } } #[cfg(any(feature = "alloc", feature = "std", test))] - Item::OwnedSpace(_) => { - s = s.trim_left(); + Item::OwnedSpace(ref item_space) => { + for item_c in item_space.chars() { + let s_c: char = match s.chars().next() { + Some(c) => c, + None => { + return Err((s, TOO_SHORT)); + } + }; + if item_c != s_c { + return Err((s, INVALID)); + } + // advance `s` forward 1 char + s = scan::s_next(s); + } } Item::Numeric(ref spec, ref _pad) => { @@ -335,7 +359,6 @@ where Internal(ref int) => match int._dummy {}, }; - s = s.trim_left(); let v = if signed { if s.starts_with('-') { let v = try_consume!(scan::number(&s[1..], 1, usize::MAX)); @@ -428,27 +451,24 @@ where | &TimezoneOffsetDoubleColon | &TimezoneOffsetTripleColon | &TimezoneOffset => { - let offset = try_consume!(scan::timezone_offset( - s.trim_left(), - scan::colon_or_space - )); + s = scan::space1(s); + let offset = try_consume!(scan::timezone_offset(s, scan::colon_or_space)); parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?; } &TimezoneOffsetColonZ | &TimezoneOffsetZ => { - let offset = try_consume!(scan::timezone_offset_zulu( - s.trim_left(), - scan::colon_or_space - )); + s = scan::space1(s); + let offset = + try_consume!(scan::timezone_offset_zulu(s, scan::colon_or_space)); parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?; } + &Internal(InternalFixed { val: InternalInternal::TimezoneOffsetPermissive, }) => { - let offset = try_consume!(scan::timezone_offset_permissive( - s.trim_left(), - scan::colon_or_space - )); + s = scan::space1(s); + let offset = + try_consume!(scan::timezone_offset_permissive(s, scan::colon_or_space)); parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?; } @@ -472,15 +492,13 @@ where } /// Accepts a relaxed form of RFC3339. -/// A space or a 'T' are acepted as the separator between the date and time -/// parts. Additional spaces are allowed between each component. +/// A space or a 'T' are accepted as the separator between the date and time +/// parts. /// -/// All of these examples are equivalent: /// ``` /// # use chrono::{DateTime, offset::FixedOffset}; -/// "2012-12-12T12:12:12Z".parse::>(); -/// "2012-12-12 12:12:12Z".parse::>(); -/// "2012- 12-12T12: 12:12Z".parse::>(); +/// "2000-01-02T03:04:05Z".parse::>(); +/// "2000-01-02 03:04:05Z".parse::>(); /// ``` impl str::FromStr for DateTime { type Err = ParseError; @@ -488,25 +506,19 @@ impl str::FromStr for DateTime { fn from_str(s: &str) -> ParseResult> { const DATE_ITEMS: &[Item<'static>] = &[ Item::Numeric(Numeric::Year, Pad::Zero), - Item::Space(""), Item::Literal("-"), Item::Numeric(Numeric::Month, Pad::Zero), - Item::Space(""), Item::Literal("-"), Item::Numeric(Numeric::Day, Pad::Zero), ]; const TIME_ITEMS: &[Item<'static>] = &[ Item::Numeric(Numeric::Hour, Pad::Zero), - Item::Space(""), Item::Literal(":"), Item::Numeric(Numeric::Minute, Pad::Zero), - Item::Space(""), Item::Literal(":"), Item::Numeric(Numeric::Second, Pad::Zero), Item::Fixed(Fixed::Nanosecond), - Item::Space(""), Item::Fixed(Fixed::TimezoneOffsetZ), - Item::Space(""), ]; let mut parsed = Parsed::new(); @@ -528,7 +540,6 @@ impl str::FromStr for DateTime { #[cfg(test)] #[test] fn test_parse() { - use super::IMPOSSIBLE; use super::*; // workaround for Rust issue #22255 @@ -553,35 +564,66 @@ fn test_parse() { check!("", []; ); check!(" ", []; TOO_LONG); check!("a", []; TOO_LONG); + check!("abc", []; TOO_LONG); // whitespaces check!("", [sp!("")]; ); - check!(" ", [sp!("")]; ); - check!("\t", [sp!("")]; ); - check!(" \n\r \n", [sp!("")]; ); + check!(" ", [sp!(" ")]; ); + check!(" ", [sp!(" ")]; ); + check!(" ", [sp!(" "), sp!(" ")]; ); + check!("\t", [sp!("\t")]; ); + check!("\t\r", [sp!("\t\r")]; ); + check!("\t\r ", [sp!("\t\r ")]; ); + check!(" \n\r \n", [sp!(" \n\r \n")]; ); + check!("abc", []; TOO_LONG); check!("a", [sp!("")]; TOO_LONG); + check!("abc", [sp!("")]; TOO_LONG); + check!(" ", [sp!(" ")]; TOO_LONG); + check!(" \t\n", [sp!(" \t")]; TOO_LONG); + check!("", [sp!(" ")]; TOO_SHORT); + check!(" ", [sp!(" ")]; TOO_SHORT); + check!(" ", [sp!(" ")]; TOO_SHORT); + check!(" ", [sp!(" "), sp!(" ")]; TOO_SHORT); // literal + check!("", [lit!("")]; ); check!("", [lit!("a")]; TOO_SHORT); + check!("๐Ÿค ", [lit!("๐Ÿค ")]; ); + check!("๐Ÿค a", [lit!("๐Ÿค "), lit!("a")]; ); + check!("๐Ÿค a๐Ÿค ", [lit!("๐Ÿค "), lit!("a๐Ÿค ")]; ); check!(" ", [lit!("a")]; INVALID); check!("a", [lit!("a")]; ); check!("aa", [lit!("a")]; TOO_LONG); check!("A", [lit!("a")]; INVALID); - check!("xy", [lit!("xy")]; ); - check!("xy", [lit!("x"), lit!("y")]; ); + check!("1", [lit!("1")]; ); + check!("1234", [lit!("1234")]; ); + check!("+1234", [lit!("+1234")]; ); + check!("PST", [lit!("PST")]; ); + check!("xy", [lit!("xy")]; ); // literals can be together + check!("xy", [lit!("x"), lit!("y")]; ); // or literals can be apart check!("x y", [lit!("x"), lit!("y")]; INVALID); - check!("xy", [lit!("x"), sp!(""), lit!("y")]; ); - check!("x y", [lit!("x"), sp!(""), lit!("y")]; ); + check!("x y", [lit!("x"), sp!(" "), lit!("y")]; ); + + // whitespaces + literals + check!("a\n", [lit!("a"), sp!("\n")]; ); + check!("\tab\n", [sp!("\t"), lit!("ab"), sp!("\n")]; ); + check!("ab\tcd\ne", [lit!("ab"), sp!("\t"), lit!("cd"), sp!("\n"), lit!("e")]; ); + check!("+1ab\tcd\r\n+,.", [lit!("+1ab"), sp!("\t"), lit!("cd"), sp!("\r\n"), lit!("+,.")]; ); + // whitespace and literals can be intermixed + check!("a\tb", [lit!("a\tb")]; ); + check!("a\tb", [sp!("a\tb")]; ); // numeric check!("1987", [num!(Year)]; year: 1987); check!("1987 ", [num!(Year)]; TOO_LONG); check!("0x12", [num!(Year)]; TOO_LONG); // `0` is parsed check!("x123", [num!(Year)]; INVALID); + check!("o123", [num!(Year)]; INVALID); check!("2015", [num!(Year)]; year: 2015); check!("0000", [num!(Year)]; year: 0); check!("9999", [num!(Year)]; year: 9999); - check!(" \t987", [num!(Year)]; year: 987); + check!(" \t987", [sp!(" \t"), num!(Year)]; year: 987); + check!(" \t987๐Ÿค ", [sp!(" \t"), num!(Year), lit!("๐Ÿค ")]; year: 987); check!("5", [num!(Year)]; year: 5); check!("5\0", [num!(Year)]; TOO_LONG); check!("\x005", [num!(Year)]; INVALID); @@ -590,12 +632,14 @@ fn test_parse() { check!("12345", [nums!(Year), lit!("5")]; year: 1234); check!("12345", [num0!(Year), lit!("5")]; year: 1234); check!("12341234", [num!(Year), num!(Year)]; year: 1234); - check!("1234 1234", [num!(Year), num!(Year)]; year: 1234); - check!("1234 1235", [num!(Year), num!(Year)]; IMPOSSIBLE); + check!("1234 1234", [num!(Year), sp!(" "), num!(Year)]; year: 1234); + check!("1234 1235", [num!(Year), num!(Year)]; INVALID); check!("1234 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); check!("1234x1234", [num!(Year), lit!("x"), num!(Year)]; year: 1234); check!("1234xx1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); - check!("1234 x 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); + check!("1234xx1234", [num!(Year), lit!("xx"), num!(Year)]; year: 1234); + check!("1234 x 1234", [num!(Year), sp!(" "), lit!("x"), sp!(" "), num!(Year)]; year: 1234); + check!("1234 x 1235", [num!(Year), sp!(" "), lit!("x"), sp!(" "), lit!("1235")]; year: 1234); // signed numeric check!("-42", [num!(Year)]; year: -42); @@ -604,10 +648,10 @@ fn test_parse() { check!("+0042", [num!(Year)]; year: 42); check!("-42195", [num!(Year)]; year: -42195); check!("+42195", [num!(Year)]; year: 42195); - check!(" -42195", [num!(Year)]; year: -42195); - check!(" +42195", [num!(Year)]; year: 42195); - check!(" - 42", [num!(Year)]; INVALID); - check!(" + 42", [num!(Year)]; INVALID); + check!(" -42195", [sp!(" "), num!(Year)]; year: -42195); + check!(" +42195", [sp!(" "), num!(Year)]; year: 42195); + check!(" - 42", [sp!(" "), num!(Year)]; INVALID); + check!(" + 42", [sp!(" "), num!(Year)]; INVALID); check!("-", [num!(Year)]; TOO_SHORT); check!("+", [num!(Year)]; TOO_SHORT); @@ -615,26 +659,28 @@ fn test_parse() { check!("345", [num!(Ordinal)]; ordinal: 345); check!("+345", [num!(Ordinal)]; INVALID); check!("-345", [num!(Ordinal)]; INVALID); - check!(" 345", [num!(Ordinal)]; ordinal: 345); - check!(" +345", [num!(Ordinal)]; INVALID); - check!(" -345", [num!(Ordinal)]; INVALID); + check!("\t345", [sp!("\t"), num!(Ordinal)]; ordinal: 345); + check!(" +345", [sp!(" "), num!(Ordinal)]; INVALID); + check!(" -345", [sp!(" "), num!(Ordinal)]; INVALID); // various numeric fields check!("1234 5678", - [num!(Year), num!(IsoYear)]; + [num!(Year), sp!(" "), num!(IsoYear)]; year: 1234, isoyear: 5678); check!("12 34 56 78", - [num!(YearDiv100), num!(YearMod100), num!(IsoYearDiv100), num!(IsoYearMod100)]; + [num!(YearDiv100), sp!(" "), num!(YearMod100), sp!(" "), num!(IsoYearDiv100), + sp!(" "), num!(IsoYearMod100)]; year_div_100: 12, year_mod_100: 34, isoyear_div_100: 56, isoyear_mod_100: 78); - check!("1 2 3 4 5 6", - [num!(Month), num!(Day), num!(WeekFromSun), num!(WeekFromMon), num!(IsoWeek), - num!(NumDaysFromSun)]; + check!("1 2 3\t4 5\n6", + [num!(Month), sp!(" "), num!(Day), sp!(" "), num!(WeekFromSun), sp!("\t"), + num!(WeekFromMon), sp!(" "), num!(IsoWeek), sp!("\n"), num!(NumDaysFromSun)]; month: 1, day: 2, week_from_sun: 3, week_from_mon: 4, isoweek: 5, weekday: Weekday::Sat); check!("7 89 01", - [num!(WeekdayFromMon), num!(Ordinal), num!(Hour12)]; + [num!(WeekdayFromMon), sp!(" "), num!(Ordinal), sp!(" "), num!(Hour12)]; weekday: Weekday::Sun, ordinal: 89, hour_mod_12: 1); check!("23 45 6 78901234 567890123", - [num!(Hour), num!(Minute), num!(Second), num!(Nanosecond), num!(Timestamp)]; + [num!(Hour), sp!(" "), num!(Minute), sp!(" "), num!(Second), sp!(" "), + num!(Nanosecond), sp!(" "), num!(Timestamp)]; hour_div_12: 1, hour_mod_12: 11, minute: 45, second: 6, nanosecond: 78_901_234, timestamp: 567_890_123); @@ -680,7 +726,10 @@ fn test_parse() { check!("AM", [fix!(UpperAmPm)]; hour_div_12: 0); check!("PM", [fix!(UpperAmPm)]; hour_div_12: 1); check!("Am", [fix!(LowerAmPm)]; hour_div_12: 0); + check!(" Am", [sp!(" "), fix!(LowerAmPm)]; hour_div_12: 0); check!(" Am", [fix!(LowerAmPm)]; INVALID); + check!("a.m.", [fix!(LowerAmPm)]; INVALID); + check!("A.M.", [fix!(LowerAmPm)]; INVALID); check!("ame", [fix!(LowerAmPm)]; TOO_LONG); // `am` is parsed check!("a", [fix!(LowerAmPm)]; TOO_SHORT); check!("p", [fix!(LowerAmPm)]; TOO_SHORT); @@ -697,10 +746,14 @@ fn test_parse() { check!(".42", [fix!(Nanosecond)]; nanosecond: 420_000_000); check!(".421", [fix!(Nanosecond)]; nanosecond: 421_000_000); check!(".42195", [fix!(Nanosecond)]; nanosecond: 421_950_000); + check!(".421951", [fix!(Nanosecond)]; nanosecond: 421_951_000); + check!(".4219512", [fix!(Nanosecond)]; nanosecond: 421_951_200); + check!(".42195123", [fix!(Nanosecond)]; nanosecond: 421_951_230); check!(".421950803", [fix!(Nanosecond)]; nanosecond: 421_950_803); check!(".421950803547", [fix!(Nanosecond)]; nanosecond: 421_950_803); check!(".000000003547", [fix!(Nanosecond)]; nanosecond: 3); check!(".000000000547", [fix!(Nanosecond)]; nanosecond: 0); + check!(".0000000009999999999999999999999999", [fix!(Nanosecond)]; nanosecond: 0); check!(".", [fix!(Nanosecond)]; TOO_SHORT); check!(".4x", [fix!(Nanosecond)]; TOO_LONG); check!(". 4", [fix!(Nanosecond)]; INVALID); @@ -740,6 +793,26 @@ fn test_parse() { check!(".42100000", [internal_fix!(Nanosecond9NoDot)]; INVALID); // fixed: timezone offsets + + // TimezoneOffset + check!("+0", [fix!(TimezoneOffset)]; TOO_SHORT); + check!("+00", [fix!(TimezoneOffset)]; TOO_SHORT); + check!("+000", [fix!(TimezoneOffset)]; TOO_SHORT); + check!("+0000", [fix!(TimezoneOffset)]; offset: 0); + check!("+00000", [fix!(TimezoneOffset)]; TOO_LONG); + check!("+000000", [fix!(TimezoneOffset)]; TOO_LONG); + check!("0000", [fix!(TimezoneOffset)]; INVALID); + check!("000000", [fix!(TimezoneOffset)]; INVALID); + check!("00:00", [fix!(TimezoneOffset)]; INVALID); + check!("Z00:00", [fix!(TimezoneOffset)]; INVALID); + check!("X00:00", [fix!(TimezoneOffset)]; INVALID); + check!("+00::00", [fix!(TimezoneOffset)]; INVALID); + check!("+00: :00", [fix!(TimezoneOffset)]; INVALID); + check!("+00:::00", [fix!(TimezoneOffset)]; INVALID); + check!("+00::::00", [fix!(TimezoneOffset)]; INVALID); + check!("+00:00:00", [fix!(TimezoneOffset)]; TOO_LONG); + check!("+00:0000", [fix!(TimezoneOffset)]; TOO_LONG); + check!("+0000:00", [fix!(TimezoneOffset)]; TOO_LONG); check!("+00:00", [fix!(TimezoneOffset)]; offset: 0); check!("-00:00", [fix!(TimezoneOffset)]; offset: 0); check!("+00:01", [fix!(TimezoneOffset)]; offset: 60); @@ -758,8 +831,19 @@ fn test_parse() { check!("#12:34", [fix!(TimezoneOffset)]; INVALID); check!("12:34", [fix!(TimezoneOffset)]; INVALID); check!("+12:34 ", [fix!(TimezoneOffset)]; TOO_LONG); + check!("+12:34", [fix!(TimezoneOffset)]; offset: 754 * 60); + check!("-12:34", [fix!(TimezoneOffset)]; offset: -754 * 60); check!(" +12:34", [fix!(TimezoneOffset)]; offset: 754 * 60); - check!("\t -12:34", [fix!(TimezoneOffset)]; offset: -754 * 60); + check!(" -12:34", [fix!(TimezoneOffset)]; offset: -754 * 60); + check!(" +12:34", [fix!(TimezoneOffset)]; INVALID); + check!(" -12:34", [fix!(TimezoneOffset)]; INVALID); + check!("-12: 34", [fix!(TimezoneOffset)]; offset: -754 * 60); + check!("-12 :34", [fix!(TimezoneOffset)]; offset: -754 * 60); + check!("-12 : 34", [fix!(TimezoneOffset)]; offset: -754 * 60); + check!("-12 : 34", [fix!(TimezoneOffset)]; INVALID); + check!("-12 : 34", [fix!(TimezoneOffset)]; INVALID); + check!("-12: 34", [fix!(TimezoneOffset)]; INVALID); + check!("-12 :34", [fix!(TimezoneOffset)]; INVALID); check!("", [fix!(TimezoneOffset)]; TOO_SHORT); check!("+", [fix!(TimezoneOffset)]; TOO_SHORT); check!("+1", [fix!(TimezoneOffset)]; TOO_SHORT); @@ -768,20 +852,143 @@ fn test_parse() { check!("+1234", [fix!(TimezoneOffset)]; offset: 754 * 60); check!("+12345", [fix!(TimezoneOffset)]; TOO_LONG); check!("+12345", [fix!(TimezoneOffset), num!(Day)]; offset: 754 * 60, day: 5); + check!(":Z", [fix!(TimezoneOffset)]; INVALID); check!("Z", [fix!(TimezoneOffset)]; INVALID); check!("z", [fix!(TimezoneOffset)]; INVALID); + check!(" :Z", [fix!(TimezoneOffset)]; INVALID); + check!(" Z", [fix!(TimezoneOffset)]; INVALID); + check!(" z", [fix!(TimezoneOffset)]; INVALID); + + // TimezoneOffsetColon + check!("+0", [fix!(TimezoneOffsetColon)]; TOO_SHORT); + check!("+00", [fix!(TimezoneOffsetColon)]; TOO_SHORT); + check!("+000", [fix!(TimezoneOffsetColon)]; TOO_SHORT); + check!("+0000", [fix!(TimezoneOffsetColon)]; offset: 0); + check!("+0000:", [fix!(TimezoneOffsetColon)]; TOO_LONG); + check!("+00000", [fix!(TimezoneOffsetColon)]; TOO_LONG); + check!("+000000", [fix!(TimezoneOffsetColon)]; TOO_LONG); + check!("0000", [fix!(TimezoneOffsetColon)]; INVALID); + check!("000000", [fix!(TimezoneOffsetColon)]; INVALID); + check!("00:00", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+00::00", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+00: :00", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+00:::00", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+00::::00", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+00::00", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+00:00:00", [fix!(TimezoneOffsetColon)]; TOO_LONG); + check!("+00:0000", [fix!(TimezoneOffsetColon)]; TOO_LONG); + check!("+0000:00", [fix!(TimezoneOffsetColon)]; TOO_LONG); + check!("+00:00", [fix!(TimezoneOffsetColon)]; offset: 0); + check!("-00:00", [fix!(TimezoneOffsetColon)]; offset: 0); + check!("-0000", [fix!(TimezoneOffsetColon)]; offset: 0); + check!("+00:01", [fix!(TimezoneOffsetColon)]; offset: 60); + check!("+0001", [fix!(TimezoneOffsetColon)]; offset: 60); + check!("-00:01", [fix!(TimezoneOffsetColon)]; offset: -60); + check!("-0001", [fix!(TimezoneOffsetColon)]; offset: -60); + check!("+00:30", [fix!(TimezoneOffsetColon)]; offset: 30 * 60); + check!("+0030", [fix!(TimezoneOffsetColon)]; offset: 30 * 60); + check!("-00:30", [fix!(TimezoneOffsetColon)]; offset: -30 * 60); + check!("-0030", [fix!(TimezoneOffsetColon)]; offset: -30 * 60); + check!("+04:56", [fix!(TimezoneOffsetColon)]; offset: 296 * 60); + check!("+0456", [fix!(TimezoneOffsetColon)]; offset: 296 * 60); + check!("-04:56", [fix!(TimezoneOffsetColon)]; offset: -296 * 60); + check!("-0456", [fix!(TimezoneOffsetColon)]; offset: -296 * 60); + check!("+24:00", [fix!(TimezoneOffsetColon)]; offset: 24 * 60 * 60); + check!("+2400", [fix!(TimezoneOffsetColon)]; offset: 24 * 60 * 60); + check!("#12:34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("#1234", [fix!(TimezoneOffsetColon)]; INVALID); + check!("1234", [fix!(TimezoneOffsetColon)]; INVALID); + check!("12:34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12:34 ", [fix!(TimezoneOffsetColon)]; TOO_LONG); + check!("+12:34", [fix!(TimezoneOffsetColon)]; offset: 754 * 60); + check!("+1234", [fix!(TimezoneOffsetColon)]; offset: 754 * 60); + check!("-12:34", [fix!(TimezoneOffsetColon)]; offset: -754 * 60); + check!("-12: 34", [fix!(TimezoneOffsetColon)]; offset: -754 * 60); + check!("-12 :34", [fix!(TimezoneOffsetColon)]; offset: -754 * 60); + check!("-12 : 34", [fix!(TimezoneOffsetColon)]; offset: -754 * 60); + check!("-12 : 34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("-12 : 34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("-12: 34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("-12 :34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("-1234", [fix!(TimezoneOffsetColon)]; offset: -754 * 60); + check!("\t+12:34", [fix!(TimezoneOffsetColon)]; offset: 754 * 60); + check!("\t\t+12:34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("", [fix!(TimezoneOffsetColon)]; TOO_SHORT); + check!("+", [fix!(TimezoneOffsetColon)]; TOO_SHORT); + check!(":", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+1", [fix!(TimezoneOffsetColon)]; TOO_SHORT); + check!("+12", [fix!(TimezoneOffsetColon)]; TOO_SHORT); + check!("+123", [fix!(TimezoneOffsetColon)]; TOO_SHORT); + check!("+1234", [fix!(TimezoneOffsetColon)]; offset: 754 * 60); + check!("+12345", [fix!(TimezoneOffsetColon)]; TOO_LONG); + check!("+12345", [fix!(TimezoneOffsetColon), num!(Day)]; offset: 754 * 60, day: 5); + // testing `TimezoneOffsetColon` also tests same path as `TimezoneOffsetDoubleColon` + // and `TimezoneOffsetTripleColon` for function `parse_internal`. + // No need for separate tests. + + // TimezoneOffsetZ + check!("+0", [fix!(TimezoneOffsetZ)]; TOO_SHORT); + check!("+00", [fix!(TimezoneOffsetZ)]; TOO_SHORT); + check!("+000", [fix!(TimezoneOffsetZ)]; TOO_SHORT); + check!("+0000", [fix!(TimezoneOffsetZ)]; offset: 0); + check!("+0000:", [fix!(TimezoneOffsetZ)]; TOO_LONG); + check!("+00000", [fix!(TimezoneOffsetZ)]; TOO_LONG); + check!("+000000", [fix!(TimezoneOffsetZ)]; TOO_LONG); + check!("0000", [fix!(TimezoneOffsetZ)]; INVALID); + check!("000000", [fix!(TimezoneOffsetZ)]; INVALID); + check!("00:00", [fix!(TimezoneOffsetZ)]; INVALID); + check!("+00::00", [fix!(TimezoneOffsetZ)]; INVALID); + check!("+00:00:00", [fix!(TimezoneOffsetZ)]; TOO_LONG); + check!("+00:0000", [fix!(TimezoneOffsetZ)]; TOO_LONG); + check!("+0000:00", [fix!(TimezoneOffsetZ)]; TOO_LONG); + check!("Z00:00", [fix!(TimezoneOffsetZ)]; TOO_LONG); + check!("X00:00", [fix!(TimezoneOffsetZ)]; INVALID); check!("Z", [fix!(TimezoneOffsetZ)]; offset: 0); check!("z", [fix!(TimezoneOffsetZ)]; offset: 0); + check!(" Z", [fix!(TimezoneOffsetZ)]; offset: 0); + check!(" z", [fix!(TimezoneOffsetZ)]; offset: 0); + check!("Z ", [fix!(TimezoneOffsetZ)]; TOO_LONG); + check!(":Z", [fix!(TimezoneOffsetZ)]; INVALID); + check!(":z", [fix!(TimezoneOffsetZ)]; INVALID); + check!("+Z", [fix!(TimezoneOffsetZ)]; INVALID); + check!("-Z", [fix!(TimezoneOffsetZ)]; INVALID); + check!(" +Z", [fix!(TimezoneOffsetZ)]; INVALID); + check!(" -Z", [fix!(TimezoneOffsetZ)]; INVALID); + check!("+:Z", [fix!(TimezoneOffsetZ)]; INVALID); check!("Y", [fix!(TimezoneOffsetZ)]; INVALID); + check!("+00:01", [fix!(TimezoneOffsetZ)]; offset: 60); + check!("-00:01", [fix!(TimezoneOffsetZ)]; offset: -60); + check!("+00: 01", [fix!(TimezoneOffsetZ)]; offset: 60); + check!("+00 :01", [fix!(TimezoneOffsetZ)]; offset: 60); + check!("+00 : 01", [fix!(TimezoneOffsetZ)]; offset: 60); + check!("+00 : 01", [fix!(TimezoneOffsetZ)]; INVALID); + check!("+00 : 01", [fix!(TimezoneOffsetZ)]; INVALID); + check!("00:01", [fix!(TimezoneOffsetZ)]; INVALID); check!("Zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0); check!("zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0); check!("+1234ulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 754 * 60); check!("+12:34ulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 754 * 60); + // Testing `TimezoneOffsetZ` also tests same path as `TimezoneOffsetColonZ` + // in function `parse_internal`. + // So no need for separate tests. + + // TimezoneOffsetPermissive check!("Z", [internal_fix!(TimezoneOffsetPermissive)]; offset: 0); check!("z", [internal_fix!(TimezoneOffsetPermissive)]; offset: 0); check!("+12:00", [internal_fix!(TimezoneOffsetPermissive)]; offset: 12 * 60 * 60); check!("+12", [internal_fix!(TimezoneOffsetPermissive)]; offset: 12 * 60 * 60); + check!("-00:01", [internal_fix!(TimezoneOffsetPermissive)]; offset: -60); + check!("00:00", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+00::00", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+00:00:00", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG); + + // TimezoneName + check!("CEST", [fix!(TimezoneName)]; ); + check!("cest", [fix!(TimezoneName)]; ); + check!("XXXXXXXX", [fix!(TimezoneName)]; ); // not a real timezone name check!("CEST 5", [fix!(TimezoneName), lit!(" "), num!(Day)]; day: 5); + check!("CEST ", [fix!(TimezoneName)]; TOO_LONG); + check!(" CEST", [fix!(TimezoneName)]; TOO_LONG); // some practical examples check!("2015-02-04T14:37:05+09:00", @@ -789,23 +996,57 @@ fn test_parse() { num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)]; year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2, minute: 37, second: 5, offset: 32400); + // XXX: known failures + //check!("2015-02-04T14:37:05+09 ", + // [num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"), + // num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset), + // lit!(" ")]; + // year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2, + // minute: 37, second: 5, offset: 32400); + //check!("2015-02-04T14:37:05+09๐Ÿค ", + // [num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"), + // num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset), + // lit!("๐Ÿค ")]; + // year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2, + // minute: 37, second: 5, offset: 32400); check!("20150204143705567", [num!(Year), num!(Month), num!(Day), num!(Hour), num!(Minute), num!(Second), internal_fix!(Nanosecond3NoDot)]; year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2, minute: 37, second: 5, nanosecond: 567000000); - check!("Mon, 10 Jun 2013 09:32:37 GMT", + check!("20150204143705.567", + [num!(Year), num!(Month), num!(Day), + num!(Hour), num!(Minute), num!(Second), fix!(Nanosecond)]; + year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2, + minute: 37, second: 5, nanosecond: 567000000); + check!("20150204143705.567891", + [num!(Year), num!(Month), num!(Day), + num!(Hour), num!(Minute), num!(Second), fix!(Nanosecond)]; + year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2, + minute: 37, second: 5, nanosecond: 567891000); + check!("20150204143705.567891023", + [num!(Year), num!(Month), num!(Day), + num!(Hour), num!(Minute), num!(Second), fix!(Nanosecond)]; + year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2, + minute: 37, second: 5, nanosecond: 567891023); + check!("Mon, 10 Jun 2013 09:32:37 GMT", [fix!(ShortWeekdayName), lit!(","), sp!(" "), num!(Day), sp!(" "), fix!(ShortMonthName), sp!(" "), num!(Year), sp!(" "), num!(Hour), lit!(":"), - num!(Minute), lit!(":"), num!(Second), sp!(" "), lit!("GMT")]; + num!(Minute), lit!(":"), num!(Second), sp!(" "), lit!("GMT")]; + year: 2013, month: 6, day: 10, weekday: Weekday::Mon, + hour_div_12: 0, hour_mod_12: 9, minute: 32, second: 37); + check!("๐Ÿค Mon, 10 Jun๐Ÿค 2013 09:32:37 GMT๐Ÿค ", + [lit!("๐Ÿค "), fix!(ShortWeekdayName), lit!(","), sp!(" "), num!(Day), sp!(" "), + fix!(ShortMonthName), lit!("๐Ÿค "), num!(Year), sp!(" "), num!(Hour), lit!(":"), + num!(Minute), lit!(":"), num!(Second), sp!(" "), lit!("GMT"), lit!("๐Ÿค ")]; year: 2013, month: 6, day: 10, weekday: Weekday::Mon, hour_div_12: 0, hour_mod_12: 9, minute: 32, second: 37); check!("Sun Aug 02 13:39:15 CEST 2020", - [fix!(ShortWeekdayName), sp!(" "), fix!(ShortMonthName), sp!(" "), + [fix!(ShortWeekdayName), sp!(" "), fix!(ShortMonthName), sp!(" "), num!(Day), sp!(" "), num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), sp!(" "), fix!(TimezoneName), sp!(" "), num!(Year)]; - year: 2020, month: 8, day: 2, weekday: Weekday::Sun, - hour_div_12: 1, hour_mod_12: 1, minute: 39, second: 15); + year: 2020, month: 8, day: 2, weekday: Weekday::Sun, + hour_div_12: 1, hour_mod_12: 1, minute: 39, second: 15); check!("20060102150405", [num!(Year), num!(Month), num!(Day), num!(Hour), num!(Minute), num!(Second)]; year: 2006, month: 1, day: 2, hour_div_12: 1, hour_mod_12: 3, minute: 4, second: 5); @@ -818,6 +1059,22 @@ fn test_parse() { check!("12345678901234.56789", [num!(Timestamp), fix!(Nanosecond)]; nanosecond: 567_890_000, timestamp: 12_345_678_901_234); + + // docstring examples from `impl str::FromStr` + check!("2000-01-02T03:04:05Z", + [num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"), + num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), + internal_fix!(TimezoneOffsetPermissive)]; + year: 2000, month: 1, day: 2, + hour_div_12: 0, hour_mod_12: 3, minute: 4, second: 5, + offset: 0); + check!("2000-01-02 03:04:05Z", + [num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), sp!(" "), + num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), + internal_fix!(TimezoneOffsetPermissive)]; + year: 2000, month: 1, day: 2, + hour_div_12: 0, hour_mod_12: 3, minute: 4, second: 5, + offset: 0); } #[cfg(test)] @@ -834,6 +1091,8 @@ fn test_parse_rfc2822() { ("Fri, 2 Jan 2015 17:35:20 -0800", Ok("Fri, 02 Jan 2015 17:35:20 -0800")), // folding whitespace ("Fri, 02 Jan 2015 17:35:20 -0800", Ok("Fri, 02 Jan 2015 17:35:20 -0800")), // leading zero ("Tue, 20 Jan 2015 17:35:20 -0800 (UTC)", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // trailing comment + ("Tue, 20 Jan 2015 17:35:20 -0800 (UTC)", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // intermixed arbitrary whitespace + ("Tue, 20 Jan 2015\t17:35:20\t-0800\t\t(UTC)", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // intermixed arbitrary whitespace ( r"Tue, 20 Jan 2015 17:35:20 -0800 ( (UTC ) (\( (a)\(( \t ) ) \\( \) ))", Ok("Tue, 20 Jan 2015 17:35:20 -0800"), @@ -858,6 +1117,7 @@ fn test_parse_rfc2822() { ("Tue, 20 Jan 2015 17:35:20 -0890", Err(OUT_OF_RANGE)), // bad offset ("6 Jun 1944 04:00:00Z", Err(INVALID)), // bad offset (zulu not allowed) ("Tue, 20 Jan 2015 17:35:20 HAS", Err(NOT_ENOUGH)), // bad named time zone + ("Tue, 20 Jan 2015๐Ÿ˜ˆ17:35:20 -0800", Err(INVALID)), // bad character! ]; fn rfc2822_to_datetime(date: &str) -> ParseResult> { diff --git a/src/format/scan.rs b/src/format/scan.rs index 7334a3b2ed..512c410e30 100644 --- a/src/format/scan.rs +++ b/src/format/scan.rs @@ -198,9 +198,69 @@ pub(super) fn space(s: &str) -> ParseResult<&str> { } } -/// Consumes any number (including zero) of colon or spaces. +/// returns slice `s` remaining after first char +/// if `s.len() <= 1` then return an empty slice +pub(super) fn s_next(s: &str) -> &str { + if s.len() <= 1 { + return &s[s.len()..]; + } + match s.char_indices().nth(1) { + Some((offset, _)) => &s[offset..], + None => { + panic!("unexpected None for s {:?}.char_indices().nth(1)", s) + } + } +} + +/// Consume one whitespace from the start of `s` if the first `char` is +/// whitespace. +pub(super) fn space1(s: &str) -> &str { + match s.chars().next() { + Some(c) if c.is_whitespace() => s_next(s), + Some(_) | None => s, + } +} + +/// Allow a colon with possible whitespace padding. +/// Consumes zero or one of leading patterns +/// `":"`, `" "`, `" :"`, `": "`, or `" : "` pub(super) fn colon_or_space(s: &str) -> ParseResult<&str> { - Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace())) + let c0_ = match s.chars().next() { + Some(c) => c, + None => { + return Ok(s); + } + }; + if c0_ != ':' && !c0_.is_whitespace() { + return Ok(s); + } + let c1_ = s.chars().nth(1); + match (c0_, c1_) { + (c0, None) if c0 == ':' || c0.is_whitespace() => { + return Ok(s_next(s)); + } + (c0, Some(c1)) if c0 == ':' && c1.is_whitespace() => { + return Ok(s_next(s_next(s))); + } + (c0, Some(c1)) if c0 == ':' && !c1.is_whitespace() => { + return Ok(s_next(s)); + } + (c0, Some(c1)) if c0.is_whitespace() && (!c1.is_whitespace() && c1 != ':') => { + return Ok(s_next(s)); + } + _ => {} + } + let c2_ = s.chars().nth(2); + match (c0_, c1_, c2_) { + (c0, Some(c1), None) if c0.is_whitespace() && c1 == ':' => Ok(s_next(s_next(s))), + (c0, Some(c1), Some(c2)) if c0.is_whitespace() && c1 == ':' && !c2.is_whitespace() => { + Ok(s_next(s_next(s))) + } + (c0, Some(c1), Some(c2)) if c0.is_whitespace() && c1 == ':' && c2.is_whitespace() => { + Ok(s_next(s_next(s_next(s)))) + } + _ => Ok(s), + } } /// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible. @@ -238,6 +298,16 @@ where }; s = &s[1..]; + // special check for `Z` to return more accurate error `INVALID`. + // Otherwise the upcoming match for digits might return error `TOO_SHORT` + // which is confusing for the user. + match s.as_bytes().first() { + Some(&b'Z') | Some(&b'z') => { + return Err(INVALID); + } + _ => {} + } + // hours (00--99) let hours = match digits(s)? { (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')), @@ -413,3 +483,68 @@ fn test_rfc2822_comments() { ); } } + +#[test] +fn test_space() { + assert_eq!(space(""), Err(TOO_SHORT)); + assert_eq!(space(" "), Ok("")); + assert_eq!(space(" \t"), Ok("")); + assert_eq!(space(" \ta"), Ok("a")); + assert_eq!(space(" \ta "), Ok("a ")); + assert_eq!(space("a"), Err(INVALID)); + assert_eq!(space("a "), Err(INVALID)); +} + +#[test] +fn test_s_next() { + assert_eq!(s_next(""), ""); + assert_eq!(s_next(" "), ""); + assert_eq!(s_next("a"), ""); + assert_eq!(s_next("ab"), "b"); + assert_eq!(s_next("abc"), "bc"); + assert_eq!(s_next("๐Ÿ˜พb"), "b"); + assert_eq!(s_next("a๐Ÿ˜พ"), "๐Ÿ˜พ"); + assert_eq!(s_next("๐Ÿ˜พbc"), "bc"); + assert_eq!(s_next("a๐Ÿ˜พc"), "๐Ÿ˜พc"); +} + +#[test] +fn test_space1() { + assert_eq!(space1(""), ""); + assert_eq!(space1(" "), ""); + assert_eq!(space1("\t"), ""); + assert_eq!(space1("\t\t"), "\t"); + assert_eq!(space1(" "), " "); + assert_eq!(space1("a"), "a"); + assert_eq!(space1("a "), "a "); + assert_eq!(space1("ab"), "ab"); + assert_eq!(space1("๐Ÿ˜ผ"), "๐Ÿ˜ผ"); + assert_eq!(space1("๐Ÿ˜ผb"), "๐Ÿ˜ผb"); +} + +#[test] +fn test_colon_or_space() { + assert_eq!(colon_or_space(""), Ok("")); + assert_eq!(colon_or_space(" "), Ok("")); + assert_eq!(colon_or_space(":"), Ok("")); + assert_eq!(colon_or_space(" :"), Ok("")); + assert_eq!(colon_or_space(": "), Ok("")); + assert_eq!(colon_or_space(" : "), Ok("")); + assert_eq!(colon_or_space(" :: "), Ok(": ")); + assert_eq!(colon_or_space("๐Ÿ˜ธ"), Ok("๐Ÿ˜ธ")); + assert_eq!(colon_or_space("๐Ÿ˜ธ๐Ÿ˜ธ"), Ok("๐Ÿ˜ธ๐Ÿ˜ธ")); + assert_eq!(colon_or_space("๐Ÿ˜ธ:"), Ok("๐Ÿ˜ธ:")); + assert_eq!(colon_or_space("๐Ÿ˜ธ "), Ok("๐Ÿ˜ธ ")); + assert_eq!(colon_or_space(" ๐Ÿ˜ธ"), Ok("๐Ÿ˜ธ")); + assert_eq!(colon_or_space(":๐Ÿ˜ธ"), Ok("๐Ÿ˜ธ")); + assert_eq!(colon_or_space(":๐Ÿ˜ธ "), Ok("๐Ÿ˜ธ ")); + assert_eq!(colon_or_space(" :๐Ÿ˜ธ"), Ok("๐Ÿ˜ธ")); + assert_eq!(colon_or_space(" :๐Ÿ˜ธ "), Ok("๐Ÿ˜ธ ")); + assert_eq!(colon_or_space(" :๐Ÿ˜ธ:"), Ok("๐Ÿ˜ธ:")); + assert_eq!(colon_or_space(": ๐Ÿ˜ธ"), Ok("๐Ÿ˜ธ")); + assert_eq!(colon_or_space(": ๐Ÿ˜ธ"), Ok(" ๐Ÿ˜ธ")); + assert_eq!(colon_or_space(": :๐Ÿ˜ธ"), Ok(":๐Ÿ˜ธ")); + assert_eq!(colon_or_space(" : ๐Ÿ˜ธ"), Ok("๐Ÿ˜ธ")); + assert_eq!(colon_or_space(" ::๐Ÿ˜ธ"), Ok(":๐Ÿ˜ธ")); + assert_eq!(colon_or_space(" :: ๐Ÿ˜ธ"), Ok(": ๐Ÿ˜ธ")); +} diff --git a/src/format/strftime.rs b/src/format/strftime.rs index bece1b58a3..eacb68a485 100644 --- a/src/format/strftime.rs +++ b/src/format/strftime.rs @@ -466,28 +466,54 @@ impl<'a> Iterator for StrftimeItems<'a> { } } - // the next item is space + // whitespace Some(c) if c.is_whitespace() => { - // `%` is not a whitespace, so `c != '%'` is redundant - let nextspec = self - .remainder - .find(|c: char| !c.is_whitespace()) - .unwrap_or(self.remainder.len()); - assert!(nextspec > 0); - let item = sp!(&self.remainder[..nextspec]); - self.remainder = &self.remainder[nextspec..]; + // LAST WORKING HERE 20220830 must compare whitespace chars + // wait, are any tests checking for mismatching whitespace? what about wide chars? + // same for case of literals below + let ws = self.remainder; + let mut end: usize = 0; + for (offset, c_) in self.remainder.char_indices() { + if !c_.is_whitespace() { + break; + } + // advance `end` by 1 char + end = offset; + } + // get the offset of the last char too + end += match &self.remainder[end..].char_indices().nth(1) { + Some((offset, _c)) => *offset, + None => self.remainder[end..].len(), + }; + self.remainder = &self.remainder[end..]; + let item = sp!(&ws[..end]); Some(item) } - // the next item is literal - _ => { - let nextspec = self - .remainder - .find(|c: char| c.is_whitespace() || c == '%') - .unwrap_or(self.remainder.len()); - assert!(nextspec > 0); - let item = lit!(&self.remainder[..nextspec]); - self.remainder = &self.remainder[nextspec..]; + // literals + Some(_c) => { + let ws = self.remainder; + let mut end: usize = 0; + fn is_literal(c: &char) -> bool { + if !c.is_whitespace() && c != &'%' { + return true; + } + false + } + for (offset, c_) in self.remainder.char_indices() { + if !is_literal(&c_) { + break; + } + // advance `end` by 1 char + end = offset; + } + // get the offset of the last char too + end += match &self.remainder[end..].char_indices().nth(1) { + Some((offset, _c)) => *offset, + None => self.remainder[end..].len(), + }; + self.remainder = &self.remainder[end..]; + let item = lit!(&ws[..end]); Some(item) } } @@ -499,8 +525,11 @@ impl<'a> Iterator for StrftimeItems<'a> { fn test_strftime_items() { fn parse_and_collect(s: &str) -> Vec> { // map any error into `[Item::Error]`. useful for easy testing. + eprintln!("test_strftime_items: parse_and_collect({:?})", s); let items = StrftimeItems::new(s); + eprintln!(" items: {:?}", &items); let items = items.map(|spec| if spec == Item::Error { None } else { Some(spec) }); + eprintln!(" items: {:?}", &items); items.collect::>>().unwrap_or_else(|| vec![Item::Error]) } @@ -518,6 +547,7 @@ fn test_strftime_items() { parse_and_collect("%Y-%m-%d"), [num0!(Year), lit!("-"), num0!(Month), lit!("-"), num0!(Day)] ); + assert_eq!(parse_and_collect("%Y--%m"), [num0!(Year), lit!("--"), num0!(Month)]); assert_eq!(parse_and_collect("[%F]"), parse_and_collect("[%Y-%m-%d]")); assert_eq!(parse_and_collect("%m %d"), [num0!(Month), sp!(" "), num0!(Day)]); assert_eq!(parse_and_collect("%"), [Item::Error]); @@ -543,6 +573,9 @@ fn test_strftime_items() { assert_eq!(parse_and_collect("%0e"), [num0!(Day)]); assert_eq!(parse_and_collect("%_e"), [nums!(Day)]); assert_eq!(parse_and_collect("%z"), [fix!(TimezoneOffset)]); + assert_eq!(parse_and_collect("%:z"), [fix!(TimezoneOffsetColon)]); + assert_eq!(parse_and_collect("%Z"), [fix!(TimezoneName)]); + assert_eq!(parse_and_collect("%ZZZZ"), [fix!(TimezoneName), lit!("ZZZ")]); assert_eq!(parse_and_collect("%#z"), [internal_fix!(TimezoneOffsetPermissive)]); assert_eq!(parse_and_collect("%#m"), [Item::Error]); } @@ -643,6 +676,13 @@ fn test_strftime_format() { assert_eq!(dt.format("%t").to_string(), "\t"); assert_eq!(dt.format("%n").to_string(), "\n"); assert_eq!(dt.format("%%").to_string(), "%"); + + // complex format specifiers + assert_eq!(dt.format(" %Y%d%m%%%%%t%H%M%S\t").to_string(), " 20010807%%\t003460\t"); + assert_eq!( + dt.format(" %Y%d%m%%%%%t%H:%P:%M%S%:::z\t").to_string(), + " 20010807%%\t00:am:3460+09\t" + ); } #[cfg(feature = "unstable-locales")] diff --git a/src/naive/date.rs b/src/naive/date.rs index cb2898dcfb..c473d7ff3e 100644 --- a/src/naive/date.rs +++ b/src/naive/date.rs @@ -1953,13 +1953,10 @@ impl str::FromStr for NaiveDate { fn from_str(s: &str) -> ParseResult { const ITEMS: &[Item<'static>] = &[ Item::Numeric(Numeric::Year, Pad::Zero), - Item::Space(""), Item::Literal("-"), Item::Numeric(Numeric::Month, Pad::Zero), - Item::Space(""), Item::Literal("-"), Item::Numeric(Numeric::Day, Pad::Zero), - Item::Space(""), ]; let mut parsed = Parsed::new(); @@ -2611,24 +2608,28 @@ mod tests { // valid cases let valid = [ "-0000000123456-1-2", - " -123456 - 1 - 2 ", + "-123456-1-2", "-12345-1-2", "-1234-12-31", "-7-6-5", "350-2-28", "360-02-29", "0360-02-29", - "2015-2 -18", + "2015-2-18", + "2015-02-18", "+70-2-18", "+70000-2-18", "+00007-2-18", ]; for &s in &valid { + eprintln!("test_date_from_str test case {:?}", s); let d = match s.parse::() { Ok(d) => d, Err(e) => panic!("parsing `{}` has failed: {}", s, e), }; + eprintln!("d {:?} (NaiveDate)", d); let s_ = format!("{:?}", d); + eprintln!("s_ {:?}", s_); // `s` and `s_` may differ, but `s.parse()` and `s_.parse()` must be same let d_ = match s_.parse::() { Ok(d) => d, @@ -2636,6 +2637,7 @@ mod tests { panic!("`{}` is parsed into `{:?}`, but reparsing that has failed: {}", s, d, e) } }; + eprintln!("d_ {:?} (NaiveDate)", d_); assert!( d == d_, "`{}` is parsed into `{:?}`, but reparsed result \ @@ -2653,6 +2655,7 @@ mod tests { assert!("2014".parse::().is_err()); assert!("2014-01".parse::().is_err()); assert!("2014-01-00".parse::().is_err()); + assert!("2014-11-32".parse::().is_err()); assert!("2014-13-57".parse::().is_err()); assert!("9999999-9-9".parse::().is_err()); // out-of-bounds } @@ -2665,7 +2668,7 @@ mod tests { Ok(ymd(2014, 5, 7)) ); // ignore time and offset assert_eq!( - NaiveDate::parse_from_str("2015-W06-1=2015-033", "%G-W%V-%u = %Y-%j"), + NaiveDate::parse_from_str("2015-W06-1=2015-033", "%G-W%V-%u=%Y-%j"), Ok(ymd(2015, 2, 2)) ); assert_eq!( diff --git a/src/naive/datetime/mod.rs b/src/naive/datetime/mod.rs index 49cc091504..97f2614645 100644 --- a/src/naive/datetime/mod.rs +++ b/src/naive/datetime/mod.rs @@ -1619,23 +1619,17 @@ impl str::FromStr for NaiveDateTime { fn from_str(s: &str) -> ParseResult { const ITEMS: &[Item<'static>] = &[ Item::Numeric(Numeric::Year, Pad::Zero), - Item::Space(""), Item::Literal("-"), Item::Numeric(Numeric::Month, Pad::Zero), - Item::Space(""), Item::Literal("-"), Item::Numeric(Numeric::Day, Pad::Zero), - Item::Space(""), Item::Literal("T"), // XXX shouldn't this be case-insensitive? Item::Numeric(Numeric::Hour, Pad::Zero), - Item::Space(""), Item::Literal(":"), Item::Numeric(Numeric::Minute, Pad::Zero), - Item::Space(""), Item::Literal(":"), Item::Numeric(Numeric::Second, Pad::Zero), Item::Fixed(Fixed::Nanosecond), - Item::Space(""), ]; let mut parsed = Parsed::new(); diff --git a/src/naive/datetime/tests.rs b/src/naive/datetime/tests.rs index e86eafdb47..88585dd73d 100644 --- a/src/naive/datetime/tests.rs +++ b/src/naive/datetime/tests.rs @@ -110,14 +110,18 @@ fn test_datetime_timestamp() { } #[test] -fn test_datetime_from_str() { +fn test_parse_naivedatetime() { // valid cases let valid = [ - "2015-2-18T23:16:9.15", + "2001-02-03T04:05:06", + "2012-12-12T12:12:12", + "2015-02-18T23:16:09.153", + "2015-2-18T23:16:09.153", "-77-02-18T23:16:09", - " +82701 - 05 - 6 T 15 : 9 : 60.898989898989 ", + "+82701-05-6T15:9:60.898989898989", ]; for &s in &valid { + eprintln!("test_datetime_from_str valid {:?}", s); let d = match s.parse::() { Ok(d) => d, Err(e) => panic!("parsing `{}` has failed: {}", s, e), @@ -142,16 +146,37 @@ fn test_datetime_from_str() { // some invalid cases // since `ParseErrorKind` is private, all we can do is to check if there was an error - assert!("".parse::().is_err()); - assert!("x".parse::().is_err()); - assert!("15".parse::().is_err()); - assert!("15:8:9".parse::().is_err()); - assert!("15-8-9".parse::().is_err()); - assert!("2015-15-15T15:15:15".parse::().is_err()); - assert!("2012-12-12T12:12:12x".parse::().is_err()); - assert!("2012-123-12T12:12:12".parse::().is_err()); - assert!("+ 82701-123-12T12:12:12".parse::().is_err()); - assert!("+802701-123-12T12:12:12".parse::().is_err()); // out-of-bound + let invalid = [ + "", + "x", + "15", + "15:8:9", + "15-8-9", + "2015-15-15T15:15:15", + "2012-12-12T12:12:12x", + "2012-123-12T12:12:12", + "2012 -12-12T12:12:12", + "2012 -12-12T12:12:12", + "2012- 12-12T12:12:12", + "2012- 12-12T12:12:12", + "2012-12-12 T12:12:12", + "2012-12-12T 12:12:12", + "2012-12-12T12 :12:12", + "2012-12-12T12 :12:12", + "2012-12-12T12: 12:12", + "2012-12-12T12: 12:12", + "2012-12-12T12 : 12:12", + "2012-12-12T12:12:12 ", + " 2012-12-12T12:12:12", + "2012-12-12t12:12:12", + "+ 82701-123-12T12:12:12", + "+802701-123-12T12:12:12", // out-of-bound + " +82701 - 05 - 6 T 15 : 9 : 60.898989898989 ", + ]; + for &s in &invalid { + eprintln!("test_datetime_from_str invalid {:?}", s); + assert!(s.parse::().is_err()); + } } #[test] @@ -163,7 +188,7 @@ fn test_naivedatetime_parse_from_str() { Ok(ymdhms(2014, 5, 7, 12, 34, 56)) ); // ignore offset assert_eq!( - NaiveDateTime::parse_from_str("2015-W06-1 000000", "%G-W%V-%u%H%M%S"), + NaiveDateTime::parse_from_str("2015-W06-1 000000", "%G-W%V-%u %H%M%S"), Ok(ymdhms(2015, 2, 2, 0, 0, 0)) ); assert_eq!( diff --git a/src/naive/time/mod.rs b/src/naive/time/mod.rs index 84305bf8f5..7ab0dffb73 100644 --- a/src/naive/time/mod.rs +++ b/src/naive/time/mod.rs @@ -1300,14 +1300,11 @@ impl str::FromStr for NaiveTime { fn from_str(s: &str) -> ParseResult { const ITEMS: &[Item<'static>] = &[ Item::Numeric(Numeric::Hour, Pad::Zero), - Item::Space(""), Item::Literal(":"), Item::Numeric(Numeric::Minute, Pad::Zero), - Item::Space(""), Item::Literal(":"), Item::Numeric(Numeric::Second, Pad::Zero), Item::Fixed(Fixed::Nanosecond), - Item::Space(""), ]; let mut parsed = Parsed::new(); diff --git a/src/naive/time/tests.rs b/src/naive/time/tests.rs index 4df26d9402..98b5167ac3 100644 --- a/src/naive/time/tests.rs +++ b/src/naive/time/tests.rs @@ -196,12 +196,38 @@ fn test_time_parse_from_str() { "0:0:0", "0:0:0.0000000", "0:0:0.0000003", - " 4 : 3 : 2.1 ", - " 09:08:07 ", - " 9:8:07 ", + "01:02:03", + "4:3:2.1", + "9:8:7", + "09:8:7", + "9:08:7", + "9:8:07", + "09:08:7", + "09:8:07", + "09:08:7", + //"09:08:00000000007", + "9:08:07", + "09:08:07", + "9:8:07.123", + "9:08:7.123", + "09:8:7.123", + "09:08:7.123", + "9:08:07.123", + "09:8:07.123", + "09:08:07.123", + "09:08:07.123", + "09:08:07.1234", + "09:08:07.12345", + "09:08:07.123456", + "09:08:07.1234567", + "09:08:07.12345678", + "09:08:07.123456789", + "09:08:07.1234567891", + "09:08:07.12345678912", "23:59:60.373929310237", ]; for &s in &valid { + eprintln!("test_time_parse_from_str: {:?}", s); let d = match s.parse::() { Ok(d) => d, Err(e) => panic!("parsing `{}` has failed: {}", s, e), @@ -233,6 +259,9 @@ fn test_time_parse_from_str() { assert!("15:8:x".parse::().is_err()); assert!("15:8:9x".parse::().is_err()); assert!("23:59:61".parse::().is_err()); + assert!("001:02:03".parse::().is_err()); + assert!("01:002:03".parse::().is_err()); + assert!("01:02:003".parse::().is_err()); assert!("12:34:56.x".parse::().is_err()); assert!("12:34:56. 0".parse::().is_err()); } @@ -245,6 +274,15 @@ fn test_naivetime_parse_from_str() { Ok(hms(12, 34, 56)) ); // ignore date and offset assert_eq!(NaiveTime::parse_from_str("PM 12:59", "%P %H:%M"), Ok(hms(12, 59, 0))); + assert_eq!(NaiveTime::parse_from_str("12:59 \n\t PM", "%H:%M \n\t %P"), Ok(hms(12, 59, 0))); + assert_eq!(NaiveTime::parse_from_str("\t\t12:59\tPM\t", "\t\t%H:%M\t%P\t"), Ok(hms(12, 59, 0))); + assert_eq!( + NaiveTime::parse_from_str("\t\t1259\t\tPM\t", "\t\t%H%M\t\t%P\t"), + Ok(hms(12, 59, 0)) + ); + assert!(NaiveTime::parse_from_str("12:59 PM", "%H:%M\t%P").is_err()); + assert!(NaiveTime::parse_from_str("\t\t12:59 PM\t", "\t\t%H:%M\t%P\t").is_err()); + assert!(NaiveTime::parse_from_str("12:59 PM", "%H:%M %P").is_err()); assert!(NaiveTime::parse_from_str("12:3456", "%H:%M:%S").is_err()); }