From cae08ebe0d60d0115d7a51fa38d736236687a963 Mon Sep 17 00:00:00 2001 From: Michael Miller Date: Fri, 3 May 2019 10:27:13 -0600 Subject: [PATCH] Change String#to_i to parse octals with prefix 0o (#7691) * Change String#to_i to parse octals with prefix 0o This is a breaking change. Previously, numbers starting with 0 would be parsed as octals. A previous change in Crystal requires octals start with 0o. Now that is not the case, and they will be parsed as base-10. The 0o prefix must be present to treat it as an octal. This addresses parsing 0 with prefix: true raising an error. Additionally, YAML parsing used the old style. This updates the spec to handle the new parsing. There is no official documentation for YAML (that I'm aware of) for octals. * Add leading_zero_is_octal to String#to_i variations This allows parsing of octals in strings that use just 0 as a prefix instead of 0o. When this flag is true, the prior behavior for parsing octals is used. YAML parsing has been updated to accept 0 and 0o prefixes for octals. * Resolve merge issues with b1666f2 --- spec/std/string_spec.cr | 14 +++- spec/std/yaml/schema/core_spec.cr | 8 ++- src/string.cr | 115 +++++++++++++++++------------- src/yaml/schema/core.cr | 8 ++- 4 files changed, 88 insertions(+), 57 deletions(-) diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 63ce0020f488..be5cedd2cbe7 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -235,7 +235,16 @@ describe "String" do it { "0x123abc".to_i(prefix: true).should eq(1194684) } it { "0b1101".to_i(prefix: true).should eq(13) } it { "0b001101".to_i(prefix: true).should eq(13) } - it { "0123".to_i(prefix: true).should eq(83) } + it { "0123".to_i(prefix: true).should eq(123) } + it { "0o123".to_i(prefix: true).should eq(83) } + it { "0123".to_i(leading_zero_is_octal: true).should eq(83) } + it { "123".to_i(leading_zero_is_octal: true).should eq(123) } + it { "0o755".to_i(prefix: true, leading_zero_is_octal: true).should eq(493) } + it { "5".to_i(prefix: true).should eq(5) } + it { "0".to_i(prefix: true).should eq(0) } + it { "00".to_i(prefix: true).should eq(0) } + it { "00".to_i(leading_zero_is_octal: true).should eq(0) } + it { "00".to_i(prefix: true, leading_zero_is_octal: true).should eq(0) } it { "123hello".to_i(strict: false).should eq(123) } it { "99 red balloons".to_i(strict: false).should eq(99) } it { " 99 red balloons".to_i(strict: false).should eq(99) } @@ -247,7 +256,10 @@ describe "String" do it { expect_raises(ArgumentError) { "0b123".to_i } } it { expect_raises(ArgumentError) { "000b123".to_i(prefix: true) } } it { expect_raises(ArgumentError) { "000x123".to_i(prefix: true) } } + it { expect_raises(ArgumentError) { "000o89a".to_i(prefix: true) } } it { expect_raises(ArgumentError) { "123hello".to_i } } + it { expect_raises(ArgumentError) { "0".to_i(leading_zero_is_octal: true) } } + it { expect_raises(ArgumentError) { "0o755".to_i(leading_zero_is_octal: true) } } it { "z".to_i(36).should eq(35) } it { "Z".to_i(36).should eq(35) } it { "0".to_i(62).should eq(0) } diff --git a/spec/std/yaml/schema/core_spec.cr b/spec/std/yaml/schema/core_spec.cr index 2938c8b87780..0201f8904428 100644 --- a/spec/std/yaml/schema/core_spec.cr +++ b/spec/std/yaml/schema/core_spec.cr @@ -105,7 +105,9 @@ describe YAML::Schema::Core do # integer (octal) it_parses_scalar "00", 0 - it_parses_scalar "0123", 0o123 + it_parses_scalar "0o0", 0 + it_parses_scalar "0o123", 0o123 + it_parses_scalar "0755", 0o755 # integer (hex) it_parses_scalar "0x0", 0 @@ -207,9 +209,11 @@ describe YAML::Schema::Core do it_raises_on_parse "!!float 'hello'", "Invalid float" # !!int + it_parses "!!int 0", 0 it_parses "!!int 123", 123 it_parses "!!int 0b10", 0b10 - it_parses "!!int 0123", 0o123 + it_parses "!!int 0o123", 0o123 + it_parses "!!int 0755", 0o755 it_parses "!!int 0xabc", 0xabc it_parses "!!int -123", -123 it_raises_on_parse "!!int 'hello'", "Invalid int" diff --git a/src/string.cr b/src/string.cr index f0388efe0e36..be144a261418 100644 --- a/src/string.cr +++ b/src/string.cr @@ -289,8 +289,9 @@ class String # Options: # * **whitespace**: if `true`, leading and trailing whitespaces are allowed # * **underscore**: if `true`, underscores in numbers are allowed - # * **prefix**: if `true`, the prefixes `"0x"`, `"0"` and `"0b"` override the base + # * **prefix**: if `true`, the prefixes `"0x"`, `"0o"` and `"0b"` override the base # * **strict**: if `true`, extraneous characters past the end of the number are disallowed + # * **leading_zero_is_octal**: if `true`, then a number prefixed with `"0"` will be treated as an octal # # ``` # "12345".to_i # => 12345 @@ -313,9 +314,12 @@ class String # # "99 red balloons".to_i # raises ArgumentError # "99 red balloons".to_i(strict: false) # => 99 + # + # "0755".to_i # => 755 + # "0755".to_i(leading_zero_is_octal: true) # => 493 # ``` - def to_i(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) - to_i32(base, whitespace, underscore, prefix, strict) + def to_i(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) + to_i32(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) end # Same as `#to_i`, but returns `nil` if there is not a valid number at the start @@ -327,8 +331,8 @@ class String # "0a".to_i?(strict: false) # => 0 # "hello".to_i? # => nil # ``` - def to_i?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) - to_i32?(base, whitespace, underscore, prefix, strict) + def to_i?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) + to_i32?(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) end # Same as `#to_i`, but returns the block's value if there is not a valid number at the start @@ -338,127 +342,127 @@ class String # "12345".to_i { 0 } # => 12345 # "hello".to_i { 0 } # => 0 # ``` - def to_i(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, &block) - to_i32(base, whitespace, underscore, prefix, strict) { yield } + def to_i(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) + to_i32(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { yield } end # Same as `#to_i` but returns an `Int8`. - def to_i8(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : Int8 - to_i8(base, whitespace, underscore, prefix, strict) { raise ArgumentError.new("Invalid Int8: #{self}") } + def to_i8(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : Int8 + to_i8(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { raise ArgumentError.new("Invalid Int8: #{self}") } end # Same as `#to_i` but returns an `Int8` or `nil`. - def to_i8?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : Int8? - to_i8(base, whitespace, underscore, prefix, strict) { nil } + def to_i8?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : Int8? + to_i8(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { nil } end # Same as `#to_i` but returns an `Int8` or the block's value. - def to_i8(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, &block) + def to_i8(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) gen_to_ i8, 127, 128 end # Same as `#to_i` but returns an `UInt8`. - def to_u8(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : UInt8 - to_u8(base, whitespace, underscore, prefix, strict) { raise ArgumentError.new("Invalid UInt8: #{self}") } + def to_u8(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : UInt8 + to_u8(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { raise ArgumentError.new("Invalid UInt8: #{self}") } end # Same as `#to_i` but returns an `UInt8` or `nil`. - def to_u8?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : UInt8? - to_u8(base, whitespace, underscore, prefix, strict) { nil } + def to_u8?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : UInt8? + to_u8(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { nil } end # Same as `#to_i` but returns an `UInt8` or the block's value. - def to_u8(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, &block) + def to_u8(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) gen_to_ u8, 255 end # Same as `#to_i` but returns an `Int16`. - def to_i16(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : Int16 - to_i16(base, whitespace, underscore, prefix, strict) { raise ArgumentError.new("Invalid Int16: #{self}") } + def to_i16(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : Int16 + to_i16(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { raise ArgumentError.new("Invalid Int16: #{self}") } end # Same as `#to_i` but returns an `Int16` or `nil`. - def to_i16?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : Int16? - to_i16(base, whitespace, underscore, prefix, strict) { nil } + def to_i16?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : Int16? + to_i16(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { nil } end # Same as `#to_i` but returns an `Int16` or the block's value. - def to_i16(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, &block) + def to_i16(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) gen_to_ i16, 32767, 32768 end # Same as `#to_i` but returns an `UInt16`. - def to_u16(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : UInt16 - to_u16(base, whitespace, underscore, prefix, strict) { raise ArgumentError.new("Invalid UInt16: #{self}") } + def to_u16(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : UInt16 + to_u16(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { raise ArgumentError.new("Invalid UInt16: #{self}") } end # Same as `#to_i` but returns an `UInt16` or `nil`. - def to_u16?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : UInt16? - to_u16(base, whitespace, underscore, prefix, strict) { nil } + def to_u16?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : UInt16? + to_u16(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { nil } end # Same as `#to_i` but returns an `UInt16` or the block's value. - def to_u16(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, &block) + def to_u16(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) gen_to_ u16, 65535 end # Same as `#to_i`. - def to_i32(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : Int32 - to_i32(base, whitespace, underscore, prefix, strict) { raise ArgumentError.new("Invalid Int32: #{self}") } + def to_i32(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : Int32 + to_i32(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { raise ArgumentError.new("Invalid Int32: #{self}") } end # Same as `#to_i`. - def to_i32?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : Int32? - to_i32(base, whitespace, underscore, prefix, strict) { nil } + def to_i32?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : Int32? + to_i32(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { nil } end # Same as `#to_i`. - def to_i32(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, &block) + def to_i32(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) gen_to_ i32, 2147483647, 2147483648 end # Same as `#to_i` but returns an `UInt32`. - def to_u32(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : UInt32 - to_u32(base, whitespace, underscore, prefix, strict) { raise ArgumentError.new("Invalid UInt32: #{self}") } + def to_u32(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : UInt32 + to_u32(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { raise ArgumentError.new("Invalid UInt32: #{self}") } end # Same as `#to_i` but returns an `UInt32` or `nil`. - def to_u32?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : UInt32? - to_u32(base, whitespace, underscore, prefix, strict) { nil } + def to_u32?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : UInt32? + to_u32(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { nil } end # Same as `#to_i` but returns an `UInt32` or the block's value. - def to_u32(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, &block) + def to_u32(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) gen_to_ u32, 4294967295 end # Same as `#to_i` but returns an `Int64`. - def to_i64(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : Int64 - to_i64(base, whitespace, underscore, prefix, strict) { raise ArgumentError.new("Invalid Int64: #{self}") } + def to_i64(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : Int64 + to_i64(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { raise ArgumentError.new("Invalid Int64: #{self}") } end # Same as `#to_i` but returns an `Int64` or `nil`. - def to_i64?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : Int64? - to_i64(base, whitespace, underscore, prefix, strict) { nil } + def to_i64?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : Int64? + to_i64(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { nil } end # Same as `#to_i` but returns an `Int64` or the block's value. - def to_i64(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, &block) + def to_i64(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) gen_to_ i64, 9223372036854775807, 9223372036854775808 end # Same as `#to_i` but returns an `UInt64`. - def to_u64(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : UInt64 - to_u64(base, whitespace, underscore, prefix, strict) { raise ArgumentError.new("Invalid UInt64: #{self}") } + def to_u64(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : UInt64 + to_u64(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { raise ArgumentError.new("Invalid UInt64: #{self}") } end # Same as `#to_i` but returns an `UInt64` or `nil`. - def to_u64?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true) : UInt64? - to_u64(base, whitespace, underscore, prefix, strict) { nil } + def to_u64?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : UInt64? + to_u64(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { nil } end # Same as `#to_i` but returns an `UInt64` or the block's value. - def to_u64(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, &block) + def to_u64(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) gen_to_ u64 end @@ -491,7 +495,7 @@ class String invalid : Bool private macro gen_to_(method, max_positive = nil, max_negative = nil) - info = to_u64_info(base, whitespace, underscore, prefix, strict) + info = to_u64_info(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) return yield if info.invalid if info.negative @@ -509,7 +513,7 @@ class String end end - private def to_u64_info(base, whitespace, underscore, prefix, strict) + private def to_u64_info(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) raise ArgumentError.new("Invalid base #{base}") unless 2 <= base <= 36 || base == 62 ptr = to_unsafe @@ -546,10 +550,19 @@ class String when 'x' base = 16 ptr += 1 - else + when 'o' base = 8 + ptr += 1 + else + if leading_zero_is_octal + base = 8 + else + base = 10 + found_digit = true + end end - found_digit = false + elsif leading_zero_is_octal + base = 8 else found_digit = true end diff --git a/src/yaml/schema/core.cr b/src/yaml/schema/core.cr index cbc517619562..cc22ab2df5e8 100644 --- a/src/yaml/schema/core.cr +++ b/src/yaml/schema/core.cr @@ -87,7 +87,7 @@ module YAML::Schema::Core return value || string when .starts_with?('0') return 0_i64 if string.size == 1 - value = string.to_i64?(base: 8, prefix: true) + value = string.to_i64?(base: 8, prefix: true, leading_zero_is_octal: true) return value || string when .starts_with?('-'), .starts_with?('+') @@ -241,7 +241,9 @@ module YAML::Schema::Core end protected def self.parse_int(string, location) : Int64 - string.to_i64?(underscore: true, prefix: true) || + return 0_i64 if string == "0" + + string.to_i64?(underscore: true, prefix: true, leading_zero_is_octal: true) || raise(YAML::ParseException.new("Invalid int", *location)) end @@ -314,7 +316,7 @@ module YAML::Schema::Core end private def self.parse_int?(string) - string.to_i64?(underscore: true) + string.to_i64?(underscore: true, leading_zero_is_octal: true) end private def self.parse_float?(string)