diff --git a/spec/std/base64_spec.cr b/spec/std/base64_spec.cr index ba601955677b..1a5b021b2956 100644 --- a/spec/std/base64_spec.cr +++ b/spec/std/base64_spec.cr @@ -87,7 +87,7 @@ describe "Base64" do it "works for most characters" do a = String.build(65536 * 4) do |buf| - 65536.times { |i| buf << (i + 1).chr } + 65536.times { |i| buf << (i + 1).unsafe_chr } end b = Base64.encode(a) Crystal::Digest::MD5.hexdigest(Base64.decode_string(b)).should eq(Crystal::Digest::MD5.hexdigest(a)) diff --git a/spec/std/char_spec.cr b/spec/std/char_spec.cr index d995b44ff79e..f0c81f8d1723 100644 --- a/spec/std/char_spec.cr +++ b/spec/std/char_spec.cr @@ -286,12 +286,6 @@ describe "Char" do it "does for unicode" do '青'.bytesize.should eq(3) end - - it "raises on codepoint bigger than 0x10ffff" do - expect_raises InvalidByteSequenceError do - (0x10ffff + 1).unsafe_chr.bytesize - end - end end describe "in_set?" do @@ -338,12 +332,6 @@ describe "Char" do end end - it "raises on codepoint bigger than 0x10ffff when doing each_byte" do - expect_raises InvalidByteSequenceError do - (0x10ffff + 1).unsafe_chr.each_byte { |b| } - end - end - it "does each_byte" do 'a'.each_byte(&.should eq('a'.ord)).should be_nil end diff --git a/spec/std/int_spec.cr b/spec/std/int_spec.cr index 67ae40774793..e2aed420ed7d 100644 --- a/spec/std/int_spec.cr +++ b/spec/std/int_spec.cr @@ -789,9 +789,17 @@ describe "Int" do it "#chr" do 65.chr.should eq('A') - expect_raises(ArgumentError, "#{0x10ffff + 1} out of char range") do + expect_raises(ArgumentError, "0x110000 out of char range") do (0x10ffff + 1).chr end + + expect_raises(ArgumentError, "0xd800 out of char range") do + 0xd800.chr + end + + expect_raises(ArgumentError, "0xdfff out of char range") do + 0xdfff.chr + end end it "#unsafe_chr" do diff --git a/src/char.cr b/src/char.cr index 319de3188e73..24b1effd2c8d 100644 --- a/src/char.cr +++ b/src/char.cr @@ -724,14 +724,12 @@ struct Char yield (0xe0 | (c >> 12)).to_u8 yield (0x80 | ((c >> 6) & 0x3f)).to_u8 yield (0x80 | (c & 0x3f)).to_u8 - elsif c <= MAX_CODEPOINT + else # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx yield (0xf0 | (c >> 18)).to_u8 yield (0x80 | ((c >> 12) & 0x3f)).to_u8 yield (0x80 | ((c >> 6) & 0x3f)).to_u8 yield (0x80 | (c & 0x3f)).to_u8 - else - raise InvalidByteSequenceError.new("Invalid char value #{dump}") end end @@ -754,11 +752,9 @@ struct Char elsif c <= 0xffff # 1110xxxx 10xxxxxx 10xxxxxx 3 - elsif c <= MAX_CODEPOINT + else # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 4 - else - raise InvalidByteSequenceError.new("Invalid char value #{dump}") end end diff --git a/src/http/common.cr b/src/http/common.cr index 3bacbb3a3d5a..18a3a062467d 100644 --- a/src/http/common.cr +++ b/src/http/common.cr @@ -387,7 +387,7 @@ module HTTP String.build do |io| while quoted_pair_index io.write(data[0, quoted_pair_index]) - io << data[quoted_pair_index + 1].chr + io << data[quoted_pair_index + 1].unsafe_chr data += quoted_pair_index + 2 quoted_pair_index = data.index('\\'.ord) diff --git a/src/int.cr b/src/int.cr index 13c455f1a25f..2695b92beea3 100644 --- a/src/int.cr +++ b/src/int.cr @@ -64,14 +64,15 @@ struct Int # Returns a `Char` that has the unicode codepoint of `self`. # - # Raises `ArgumentError` if this integer's value doesn't fit a char's range (`0..0x10ffff`). + # Raises `ArgumentError` if this integer's value doesn't fit a char's range + # (`0..0xd7ff` and `0xe000..0x10ffff`). # # ``` # 97.chr # => 'a' # ``` def chr : Char - unless 0 <= self <= Char::MAX_CODEPOINT - raise ArgumentError.new("#{self} out of char range") + unless 0 <= self <= 0xd7ff || 0xe000 <= self <= Char::MAX_CODEPOINT + raise ArgumentError.new("0x#{self.to_s(16)} out of char range") end unsafe_chr end diff --git a/src/primitives.cr b/src/primitives.cr index 038de34818ba..46497d9a11d2 100644 --- a/src/primitives.cr +++ b/src/primitives.cr @@ -331,7 +331,7 @@ end struct {{int.id}} # Returns a `Char` that has the unicode codepoint of `self`, # without checking if this integer is in the range valid for - # chars (`0..0x10ffff`). + # chars (`0..0xd7ff` and `0xe000..0x10ffff`). # # You should never use this method unless `chr` turns out to # be a bottleneck. diff --git a/src/string.cr b/src/string.cr index 2e7a90fdb05e..fa337155e66a 100644 --- a/src/string.cr +++ b/src/string.cr @@ -713,18 +713,18 @@ class String unless v.finite? startptr = to_unsafe if whitespace - while startptr.value.chr.ascii_whitespace? + while startptr.value.unsafe_chr.ascii_whitespace? startptr += 1 end end - if startptr.value.chr.in?('+', '-') + if startptr.value.unsafe_chr.in?('+', '-') startptr += 1 end if v.nan? - return unless startptr.value.chr.in?('n', 'N') + return unless startptr.value.unsafe_chr.in?('n', 'N') else - return unless startptr.value.chr.in?('i', 'I') + return unless startptr.value.unsafe_chr.in?('i', 'I') end end @@ -735,7 +735,7 @@ class String if strict if whitespace - while endptr < string_end && endptr.value.chr.ascii_whitespace? + while endptr < string_end && endptr.value.unsafe_chr.ascii_whitespace? endptr += 1 end end @@ -744,7 +744,7 @@ class String else ptr = to_unsafe if whitespace - while ptr < string_end && ptr.value.chr.ascii_whitespace? + while ptr < string_end && ptr.value.unsafe_chr.ascii_whitespace? ptr += 1 end end