diff --git a/core/encoding/list_spec.rb b/core/encoding/list_spec.rb index 2a2078974e..c7897c1958 100644 --- a/core/encoding/list_spec.rb +++ b/core/encoding/list_spec.rb @@ -36,6 +36,16 @@ Encoding.list.select {|e| e.dummy?}.should_not == [] end + it 'includes UTF-8 encoding' do + Encoding.list.include?(Encoding::UTF_8).should be_true + end + + ruby_version_is "2.7" do + it 'includes CESU-8 encoding' do + Encoding.list.include?(Encoding::CESU_8).should be_true + end + end + # TODO: Find example that illustrates this it "updates the list when #find is used to load a new encoding" end diff --git a/core/integer/chr_spec.rb b/core/integer/chr_spec.rb index a8755eeb84..9f105e4241 100644 --- a/core/integer/chr_spec.rb +++ b/core/integer/chr_spec.rb @@ -240,4 +240,17 @@ -> { integer.chr(encoding_name) }.should raise_error(RangeError) end end + + ruby_version_is "2.7" do + it 'returns a String encoding self interpreted as a codepoint in the CESU-8 encoding' do + # see more details here https://en.wikipedia.org/wiki/CESU-8 + # code points from U+0000 to U+FFFF is encoded in the same way as in UTF-8 + 0x0045.chr(Encoding::CESU_8).bytes.should == 0x0045.chr(Encoding::UTF_8).bytes + + # code points in range from U+10000 to U+10FFFF is CESU-8 data containing a 6-byte surrogate pair, + # which decodes to a 4-byte UTF-8 string + 0x10400.chr(Encoding::CESU_8).bytes.should != 0x10400.chr(Encoding::UTF_8).bytes + 0x10400.chr(Encoding::CESU_8).bytes.to_a.should == [0xED, 0xA0, 0x81, 0xED, 0xB0, 0x80] + end + end end