From 8c3caeb65cb74c4ca47d03d60877e36cb9c20e09 Mon Sep 17 00:00:00 2001 From: jtmoon79 <815261+jtmoon79@users.noreply.github.com> Date: Tue, 18 Oct 2022 14:34:34 -0700 Subject: [PATCH] Constrain timezone separator colon strings Constrain timezone middle-colon separator string from infinite intermixed whitespace and colons to possible patterns `":"`, `" "`, `" :"`, `": "`, or `" : "`. A reasonable trade-off of previous extreme flexibility for a little flexbility and concise input. Issue #660 --- src/datetime/tests.rs | 46 ++++++++++++------------- src/format/parse.rs | 74 ++++++++++++++++++++-------------------- src/format/scan.rs | 79 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 137 insertions(+), 62 deletions(-) diff --git a/src/datetime/tests.rs b/src/datetime/tests.rs index 24a5fde981..3ba6f5b4fc 100644 --- a/src/datetime/tests.rs +++ b/src/datetime/tests.rs @@ -750,12 +750,11 @@ fn test_parse_datetime_utc() { "2001-02-03T04:05:06Z", "2001-02-03T04:05:06+0000", "2001-02-03T04:05:06-00:00", + "2001-02-03T04:05:06-00 00", "2001-02-03T04:05:06-01:00", "2001-02-03T04:05:06-01: 00", "2001-02-03T04:05:06-01 :00", "2001-02-03T04:05:06-01 : 00", - "2001-02-03T04:05:06-01 : 00", - "2001-02-03T04:05:06-01 : :00", "2012-12-12T12:12:12Z", "2015-02-18T23:16:09.153Z", "2015-2-18T23:16:09.153Z", @@ -827,6 +826,8 @@ fn test_parse_datetime_utc() { "2012-12-12T12 : 12:12Z", // space space before and after hour-minute divider "2012-12-12T12:12:12Z ", // trailing space " 2012-12-12T12:12:12Z", // leading space + "2001-02-03T04:05:06-01 : 00", // invalid timezone spacing + "2001-02-03T04:05:06-01 : :00", // invalid timezone spacing " +82701 - 05 - 6 T 15 : 9 : 60.898989898989 Z", // valid datetime, wrong format ]; for &s in &invalid { @@ -1077,13 +1078,11 @@ fn test_datetime_parse_from_str() { "%b %d %Y %H:%M:%S %z" ) .is_err()); - assert_eq!( - DateTime::::parse_from_str( - "Aug 09 2013 23:54:35 -09::00", - "%b %d %Y %H:%M:%S %z" - ), - Ok(dt), - ); + assert!(DateTime::::parse_from_str( + "Aug 09 2013 23:54:35 -09::00", + "%b %d %Y %H:%M:%S %z" + ) + .is_err()); assert_eq!( DateTime::::parse_from_str( "Aug 09 2013 23:54:35 -0900::", @@ -1150,13 +1149,11 @@ fn test_datetime_parse_from_str() { "%b %d %Y %H:%M:%S %:z" ) .is_err()); - assert_eq!( - DateTime::::parse_from_str( - "Aug 09 2013 23:54:35 -09::00", - "%b %d %Y %H:%M:%S %:z" - ), - Ok(dt), - ); + assert!(DateTime::::parse_from_str( + "Aug 09 2013 23:54:35 -09::00", + "%b %d %Y %H:%M:%S %:z" + ) + .is_err()); // timezone data hs too many colons assert!(DateTime::::parse_from_str( "Aug 09 2013 23:54:35 -09:00:", @@ -1207,13 +1204,16 @@ fn test_datetime_parse_from_str() { "%b %d %Y %H:%M:%S %::z" ) .is_err()); - assert_eq!( - DateTime::::parse_from_str( - "Aug 09 2013 23:54:35 -09::00", - "%b %d %Y %H:%M:%S %::z" - ), - Ok(dt), - ); + assert!(DateTime::::parse_from_str( + "Aug 09 2013 23:54:35 -09::00", + "%b %d %Y %H:%M:%S %::z" + ) + .is_err()); + assert!(DateTime::::parse_from_str( + "Aug 09 2013 23:54:35 -09::00", + "%b %d %Y %H:%M:%S %:z" + ) + .is_err()); // wrong timezone data assert!(DateTime::::parse_from_str( "Aug 09 2013 23:54:35 -09", diff --git a/src/format/parse.rs b/src/format/parse.rs index ba0f00129c..517dc38646 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -927,14 +927,14 @@ fn test_parse() { check!("+12:34:56", [fix!(TimezoneOffset)]; TOO_LONG); check!("+12:34:56:", [fix!(TimezoneOffset)]; TOO_LONG); check!("+12 34", [fix!(TimezoneOffset)]; offset: 45_240); - check!("+12 34", [fix!(TimezoneOffset)]; offset: 45_240); + check!("+12 34", [fix!(TimezoneOffset)]; INVALID); check!("12:34", [fix!(TimezoneOffset)]; INVALID); check!("12:34:56", [fix!(TimezoneOffset)]; INVALID); - check!("+12::34", [fix!(TimezoneOffset)]; offset: 45_240); - check!("+12: :34", [fix!(TimezoneOffset)]; offset: 45_240); - check!("+12:::34", [fix!(TimezoneOffset)]; offset: 45_240); - check!("+12::::34", [fix!(TimezoneOffset)]; offset: 45_240); - check!("+12::34", [fix!(TimezoneOffset)]; offset: 45_240); + check!("+12::34", [fix!(TimezoneOffset)]; INVALID); + check!("+12: :34", [fix!(TimezoneOffset)]; INVALID); + check!("+12:::34", [fix!(TimezoneOffset)]; INVALID); + check!("+12::::34", [fix!(TimezoneOffset)]; INVALID); + check!("+12::34", [fix!(TimezoneOffset)]; INVALID); check!("+12:34:56", [fix!(TimezoneOffset)]; TOO_LONG); check!("+12:3456", [fix!(TimezoneOffset)]; TOO_LONG); check!("+1234:56", [fix!(TimezoneOffset)]; TOO_LONG); @@ -962,11 +962,11 @@ fn test_parse() { check!("-12: 34", [fix!(TimezoneOffset)]; offset: -45_240); check!("-12 :34", [fix!(TimezoneOffset)]; offset: -45_240); check!("-12 : 34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("-12 : 34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("-12 : 34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("-12: 34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("-12 :34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("-12 : 34", [fix!(TimezoneOffset)]; offset: -45_240); + check!("-12 : 34", [fix!(TimezoneOffset)]; INVALID); + check!("-12 : 34", [fix!(TimezoneOffset)]; INVALID); + check!("-12: 34", [fix!(TimezoneOffset)]; INVALID); + check!("-12 :34", [fix!(TimezoneOffset)]; INVALID); + check!("-12 : 34", [fix!(TimezoneOffset)]; INVALID); check!("12:34 ", [fix!(TimezoneOffset)]; INVALID); check!(" 12:34", [fix!(TimezoneOffset)]; INVALID); check!("", [fix!(TimezoneOffset)]; TOO_SHORT); @@ -1038,14 +1038,14 @@ fn test_parse() { check!("+12: 34", [fix!(TimezoneOffsetColon)]; offset: 45_240); check!("+12 :34", [fix!(TimezoneOffsetColon)]; offset: 45_240); check!("+12 : 34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12::34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12: :34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12:::34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12::::34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12::34", [fix!(TimezoneOffsetColon)]; offset: 45_240); + check!("+12 : 34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12 : 34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12 : 34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12::34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12: :34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12:::34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12::::34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12::34", [fix!(TimezoneOffsetColon)]; INVALID); check!("#1234", [fix!(TimezoneOffsetColon)]; INVALID); check!("#12:34", [fix!(TimezoneOffsetColon)]; INVALID); check!("+12:34 ", [fix!(TimezoneOffsetColon)]; TOO_LONG); @@ -1113,17 +1113,17 @@ fn test_parse() { check!("+12:34:56:", [fix!(TimezoneOffsetZ)]; TOO_LONG); check!("+12:34:56:7", [fix!(TimezoneOffsetZ)]; TOO_LONG); check!("+12:34:56:78", [fix!(TimezoneOffsetZ)]; TOO_LONG); - check!("+12::34", [fix!(TimezoneOffsetZ)]; offset: 45_240); + check!("+12::34", [fix!(TimezoneOffsetZ)]; INVALID); check!("+12:3456", [fix!(TimezoneOffsetZ)]; TOO_LONG); check!("+1234:56", [fix!(TimezoneOffsetZ)]; TOO_LONG); check!("+12 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); - check!("+12 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); + check!("+12 34", [fix!(TimezoneOffsetZ)]; INVALID); check!("+12: 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); check!("+12 :34", [fix!(TimezoneOffsetZ)]; offset: 45_240); check!("+12 : 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); + check!("+12 : 34", [fix!(TimezoneOffsetZ)]; INVALID); + check!("+12 : 34", [fix!(TimezoneOffsetZ)]; INVALID); + check!("+12 : 34", [fix!(TimezoneOffsetZ)]; INVALID); check!("12:34 ", [fix!(TimezoneOffsetZ)]; INVALID); check!(" 12:34", [fix!(TimezoneOffsetZ)]; INVALID); check!("+12:34 ", [fix!(TimezoneOffsetZ)]; TOO_LONG); @@ -1200,22 +1200,22 @@ fn test_parse() { check!("+12:34:56:7", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG); check!("+12:34:56:78", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG); check!("+12 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); + check!("+12 34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!("+12 :34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); check!("+12: 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); check!("+12 : 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12 :34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12: 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12 : 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12::34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12 ::34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12: :34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12:: 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12 ::34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12: :34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12:: 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12:::34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12::::34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); + check!("+12 :34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12: 34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12 : 34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12::34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12 ::34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12: :34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12:: 34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12 ::34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12: :34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12:: 34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12:::34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12::::34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!("12:34 ", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!(" 12:34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!("+12:34 ", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG); diff --git a/src/format/scan.rs b/src/format/scan.rs index e77f550855..eab790478f 100644 --- a/src/format/scan.rs +++ b/src/format/scan.rs @@ -219,9 +219,48 @@ pub(super) fn trim1(s: &str) -> &str { } } -/// Consumes any number (including zero) of colon or spaces. +/// Allow a colon with possible one-character whitespace padding. +/// Consumes zero or one of these leading patterns: +/// `":"`, `" "`, `" :"`, `": "`, or `" : "`. pub(super) fn colon_or_space(s: &str) -> ParseResult<&str> { - Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace())) + let c0s = match s.chars().next() { + Some(c) => c, + None => { + return Ok(s); + } + }; + if c0s != ':' && !c0s.is_whitespace() { + return Ok(s); + } + let c1s = s.chars().nth(1); + match (c0s, c1s) { + (c0, None) if c0 == ':' || c0.is_whitespace() => { + return Ok(s_next(s)); + } + (c0, Some(c1)) if c0 == ':' && c1.is_whitespace() => { + return Ok(s_next(s_next(s))); + } + (c0, Some(c1)) if c0 == ':' && !c1.is_whitespace() => { + return Ok(s_next(s)); + } + (c0, Some(c1)) if c0.is_whitespace() && (!c1.is_whitespace() && c1 != ':') => { + return Ok(s_next(s)); + } + _ => {} + } + let c2s = s.chars().nth(2); + match (c0s, c1s, c2s) { + (c0, Some(c1), None) if c0.is_whitespace() && c1 == ':' => Ok(s_next(s_next(s))), + (c0, Some(_), None) if c0.is_whitespace() => Ok(s_next(s)), + (c0, Some(c1), Some(c2)) if c0.is_whitespace() && c1 == ':' && !c2.is_whitespace() => { + Ok(s_next(s_next(s))) + } + (c0, Some(c1), Some(c2)) if c0.is_whitespace() && c1 == ':' && c2.is_whitespace() => { + Ok(s_next(s_next(s_next(s)))) + } + (c0, Some(_), Some(_)) if c0.is_whitespace() => Ok(s_next(s)), + _ => Ok(s), + } } /// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible. @@ -464,3 +503,39 @@ fn test_trim1() { assert_eq!(trim1("😼"), "😼"); assert_eq!(trim1("😼b"), "😼b"); } + +#[test] +fn test_colon_or_space() { + assert_eq!(colon_or_space(""), Ok("")); + assert_eq!(colon_or_space(" "), Ok("")); + assert_eq!(colon_or_space("\n"), Ok("")); + assert_eq!(colon_or_space(" "), Ok(" ")); + assert_eq!(colon_or_space(" "), Ok(" ")); + assert_eq!(colon_or_space(" "), Ok(" ")); + assert_eq!(colon_or_space("\t\t\t\t"), Ok("\t\t\t")); + assert_eq!(colon_or_space(":"), Ok("")); + assert_eq!(colon_or_space(" :"), Ok("")); + assert_eq!(colon_or_space(": "), Ok("")); + assert_eq!(colon_or_space(" : "), Ok("")); + assert_eq!(colon_or_space(" : "), Ok(" ")); + assert_eq!(colon_or_space(" :"), Ok(" :")); + assert_eq!(colon_or_space(" : "), Ok(" : ")); + assert_eq!(colon_or_space(" :: "), Ok(": ")); + assert_eq!(colon_or_space(" : : "), Ok(": ")); + assert_eq!(colon_or_space("😸"), Ok("😸")); + assert_eq!(colon_or_space("😸😸"), Ok("😸😸")); + assert_eq!(colon_or_space("😸:"), Ok("😸:")); + assert_eq!(colon_or_space("😸 "), Ok("😸 ")); + assert_eq!(colon_or_space(" 😸"), Ok("😸")); + assert_eq!(colon_or_space(":😸"), Ok("😸")); + assert_eq!(colon_or_space(":😸 "), Ok("😸 ")); + assert_eq!(colon_or_space(" :😸"), Ok("😸")); + assert_eq!(colon_or_space(" :😸 "), Ok("😸 ")); + assert_eq!(colon_or_space(" :😸:"), Ok("😸:")); + assert_eq!(colon_or_space(": 😸"), Ok("😸")); + assert_eq!(colon_or_space(": 😸"), Ok(" 😸")); + assert_eq!(colon_or_space(": :😸"), Ok(":😸")); + assert_eq!(colon_or_space(" : 😸"), Ok("😸")); + assert_eq!(colon_or_space(" ::😸"), Ok(":😸")); + assert_eq!(colon_or_space(" :: 😸"), Ok(": 😸")); +}