crystal-lang · straight-shoota · Apr 6, 2024 · Apr 4, 2024
diff --git a/spec/std/char/reader_spec.cr b/spec/std/char/reader_spec.cr
@@ -1,5 +1,6 @@
 require "spec"
 require "char/reader"
+require "../../support/string"
 
 private def assert_invalid_byte_sequence(bytes, *, file = __FILE__, line = __LINE__)
   reader = Char::Reader.new(String.new bytes)
@@ -8,11 +9,11 @@ private def assert_invalid_byte_sequence(bytes, *, file = __FILE__, line = __LIN
   reader.error.should eq(bytes[0]), file: file, line: line
 end
 
-private def assert_reads_at_end(bytes, *, file = __FILE__, line = __LINE__)
+private def assert_reads_at_end(bytes, char, *, file = __FILE__, line = __LINE__)
   str = String.new bytes
   reader = Char::Reader.new(str, pos: bytes.size)
-  reader.previous_char
-  reader.current_char.should eq(str[0]), file: file, line: line
+  reader.previous_char.should eq(char), file: file, line: line
+  reader.current_char.should eq(char), file: file, line: line
   reader.current_char_width.should eq(bytes.size), file: file, line: line
   reader.pos.should eq(0), file: file, line: line
   reader.error.should be_nil, file: file, line: line
@@ -214,100 +215,17 @@ describe "Char::Reader" do
     reader.pos.should eq(0)
   end
 
-  it "errors if 0x80 <= first_byte < 0xC2" do
-    assert_invalid_byte_sequence Bytes[0x80]
-    assert_invalid_byte_sequence Bytes[0xC1]
-  end
-
-  it "errors if (second_byte & 0xC0) != 0x80" do
-    assert_invalid_byte_sequence Bytes[0xd0]
-  end
-
-  it "errors if first_byte == 0xE0 && second_byte < 0xA0" do
-    assert_invalid_byte_sequence Bytes[0xe0, 0x9F, 0xA0]
-  end
-
-  it "errors if first_byte == 0xED && second_byte >= 0xA0" do
-    assert_invalid_byte_sequence Bytes[0xed, 0xB0, 0xA0]
-  end
-
-  it "errors if first_byte < 0xF0 && (third_byte & 0xC0) != 0x80" do
-    assert_invalid_byte_sequence Bytes[0xe0, 0xA0, 0]
-  end
-
-  it "errors if first_byte == 0xF0 && second_byte < 0x90" do
-    assert_invalid_byte_sequence Bytes[0xf0, 0x8F, 0xA0]
-  end
-
-  it "errors if first_byte == 0xF4 && second_byte >= 0x90" do
-    assert_invalid_byte_sequence Bytes[0xf4, 0x90, 0xA0]
-  end
-
-  it "errors if first_byte < 0xF5 && (fourth_byte & 0xC0) != 0x80" do
-    assert_invalid_byte_sequence Bytes[0xf4, 0x8F, 0xA0, 0]
-  end
-
-  it "errors if first_byte >= 0xF5" do
-    assert_invalid_byte_sequence Bytes[0xf5, 0x8F, 0xA0, 0xA0]
-  end
-
-  it "errors if second_byte is out of bounds" do
-    assert_invalid_byte_sequence Bytes[0xf4]
-  end
-
-  it "errors if third_byte is out of bounds" do
-    assert_invalid_byte_sequence Bytes[0xf4, 0x8f]
-  end
-
-  it "errors if fourth_byte is out of bounds" do
-    assert_invalid_byte_sequence Bytes[0xf4, 0x8f, 0xa0]
+  it "errors on invalid UTF-8" do
+    {% for bytes in INVALID_UTF8_BYTE_SEQUENCES %}
+      assert_invalid_byte_sequence Bytes{{ bytes }}
+    {% end %}
   end
 
   describe "#previous_char" do
     it "reads on valid UTF-8" do
-      assert_reads_at_end Bytes[0x00]
-      assert_reads_at_end Bytes[0x7f]
-
-      assert_reads_at_end Bytes[0xc2, 0x80]
-      assert_reads_at_end Bytes[0xc2, 0xbf]
-      assert_reads_at_end Bytes[0xdf, 0x80]
-      assert_reads_at_end Bytes[0xdf, 0xbf]
-
-      assert_reads_at_end Bytes[0xe1, 0x80, 0x80]
-      assert_reads_at_end Bytes[0xe1, 0x80, 0xbf]
-      assert_reads_at_end Bytes[0xe1, 0x9f, 0x80]
-      assert_reads_at_end Bytes[0xe1, 0x9f, 0xbf]
-      assert_reads_at_end Bytes[0xed, 0x80, 0x80]
-      assert_reads_at_end Bytes[0xed, 0x80, 0xbf]
-      assert_reads_at_end Bytes[0xed, 0x9f, 0x80]
-      assert_reads_at_end Bytes[0xed, 0x9f, 0xbf]
-      assert_reads_at_end Bytes[0xef, 0x80, 0x80]
-      assert_reads_at_end Bytes[0xef, 0x80, 0xbf]
-      assert_reads_at_end Bytes[0xef, 0x9f, 0x80]
-      assert_reads_at_end Bytes[0xef, 0x9f, 0xbf]
-
-      assert_reads_at_end Bytes[0xe0, 0xa0, 0x80]
-      assert_reads_at_end Bytes[0xe0, 0xa0, 0xbf]
-      assert_reads_at_end Bytes[0xe0, 0xbf, 0x80]
-      assert_reads_at_end Bytes[0xe0, 0xbf, 0xbf]
-      assert_reads_at_end Bytes[0xe1, 0xa0, 0x80]
-      assert_reads_at_end Bytes[0xe1, 0xa0, 0xbf]
-      assert_reads_at_end Bytes[0xe1, 0xbf, 0x80]
-      assert_reads_at_end Bytes[0xe1, 0xbf, 0xbf]
-      assert_reads_at_end Bytes[0xef, 0xa0, 0x80]
-      assert_reads_at_end Bytes[0xef, 0xa0, 0xbf]
-      assert_reads_at_end Bytes[0xef, 0xbf, 0x80]
-      assert_reads_at_end Bytes[0xef, 0xbf, 0xbf]
-
-      assert_reads_at_end Bytes[0xf1, 0x80, 0x80, 0x80]
-      assert_reads_at_end Bytes[0xf1, 0x8f, 0x80, 0x80]
-      assert_reads_at_end Bytes[0xf4, 0x80, 0x80, 0x80]
-      assert_reads_at_end Bytes[0xf4, 0x8f, 0x80, 0x80]
-
-      assert_reads_at_end Bytes[0xf0, 0x90, 0x80, 0x80]
-      assert_reads_at_end Bytes[0xf0, 0xbf, 0x80, 0x80]
-      assert_reads_at_end Bytes[0xf3, 0x90, 0x80, 0x80]
-      assert_reads_at_end Bytes[0xf3, 0xbf, 0x80, 0x80]
+      {% for bytes, char in VALID_UTF8_BYTE_SEQUENCES %}
+        assert_reads_at_end Bytes{{ bytes }}, {{ char }}
+      {% end %}
     end
 
     it "errors on invalid UTF-8" do

diff --git a/spec/std/io/buffered_spec.cr b/spec/std/io/buffered_spec.cr
@@ -1,4 +1,5 @@
 require "../spec_helper"
+require "../../support/string"
 
 private class BufferedWrapper < IO
   include IO::Buffered
@@ -176,22 +177,15 @@ describe "IO::Buffered" do
     io.read_char.should eq('界')
     io.read_char.should be_nil
 
-    io = IO::Memory.new
-    io.write Bytes[0xf8, 0xff, 0xff, 0xff]
-    io.rewind
-    io = BufferedWrapper.new(io)
-
-    expect_raises(InvalidByteSequenceError) do
-      io.read_char
-    end
+    {% for bytes, char in VALID_UTF8_BYTE_SEQUENCES %}
+      BufferedWrapper.new(IO::Memory.new(Bytes{{ bytes }})).read_char.should eq({{ char }})
+    {% end %}
 
-    io = IO::Memory.new
-    io.write_byte 0x81_u8
-    io.rewind
-    io = BufferedWrapper.new(io)
-    expect_raises(InvalidByteSequenceError) do
-      p io.read_char
-    end
+    {% for bytes in INVALID_UTF8_BYTE_SEQUENCES %}
+      expect_raises(InvalidByteSequenceError) do
+        BufferedWrapper.new(IO::Memory.new(Bytes{{ bytes }})).read_char
+      end
+    {% end %}
   end
 
   it "reads byte" do

diff --git a/spec/std/io/io_spec.cr b/spec/std/io/io_spec.cr
@@ -1,5 +1,6 @@
 require "../spec_helper"
 require "../../support/channel"
+require "../../support/string"
 require "spec/helpers/iterate"
 
 require "socket"
@@ -338,29 +339,13 @@ describe IO do
       io.read_char.should eq('界')
       io.read_char.should be_nil
 
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xc4, 0x70]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xc4, 0x70, 0x00, 0x00]).read_char }
-
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xf8]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xf8, 0x00, 0x00, 0x00]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0x81]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0x81, 0x00, 0x00, 0x00]).read_char }
-
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xed, 0xa0, 0x80]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xed, 0xa0, 0x80, 0x00]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xed, 0xbf, 0xbf]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xed, 0xbf, 0xbf, 0x00]).read_char }
-
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xc0, 0x80]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xc0, 0x80, 0x00, 0x00]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xc1, 0xbf]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xc1, 0xbf, 0x00, 0x00]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xe0, 0x80, 0x80]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xe0, 0x80, 0x80, 0x00]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xe0, 0x9f, 0xbf]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xe0, 0x9f, 0xbf, 0x00]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xf0, 0x80, 0x80, 0x80]).read_char }
-      expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes[0xf0, 0x8f, 0xbf, 0xbf]).read_char }
+      {% for bytes, char in VALID_UTF8_BYTE_SEQUENCES %}
+        SimpleIOMemory.new(Bytes{{ bytes }}).read_char.should eq({{ char }})
+      {% end %}
+
+      {% for bytes in INVALID_UTF8_BYTE_SEQUENCES %}
+        expect_raises(InvalidByteSequenceError) { SimpleIOMemory.new(Bytes{{ bytes }}).read_char }
+      {% end %}
     end
 
     it "reads byte" do

diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr
@@ -1,4 +1,5 @@
 require "./spec_helper"
+require "../support/string"
 require "spec/helpers/iterate"
 require "spec/helpers/string"
 
@@ -2946,65 +2947,13 @@ describe "String" do
       "hello".valid_encoding?.should be_true
       "hello\u{80}\u{7FF}\u{800}\u{FFFF}\u{10000}\u{10FFFF}".valid_encoding?.should be_true
 
-      # non-starters
-      String.new(Bytes[0x80]).valid_encoding?.should be_false
-      String.new(Bytes[0x8F]).valid_encoding?.should be_false
-      String.new(Bytes[0x90]).valid_encoding?.should be_false
-      String.new(Bytes[0x9F]).valid_encoding?.should be_false
-      String.new(Bytes[0xA0]).valid_encoding?.should be_false
-      String.new(Bytes[0xAF]).valid_encoding?.should be_false
-
-      # incomplete, 2-byte
-      String.new(Bytes[0xC2]).valid_encoding?.should be_false
-      String.new(Bytes[0xC2, 0x00]).valid_encoding?.should be_false
-      String.new(Bytes[0xC2, 0xC2]).valid_encoding?.should be_false
-
-      # overlong, 2-byte
-      String.new(Bytes[0xC0, 0x80]).valid_encoding?.should be_false
-      String.new(Bytes[0xC1, 0xBF]).valid_encoding?.should be_false
-      String.new(Bytes[0xC2, 0x80]).valid_encoding?.should be_true
-
-      # incomplete, 3-byte
-      String.new(Bytes[0xE1]).valid_encoding?.should be_false
-      String.new(Bytes[0xE1, 0x00]).valid_encoding?.should be_false
-      String.new(Bytes[0xE1, 0xC2]).valid_encoding?.should be_false
-      String.new(Bytes[0xE1, 0x80]).valid_encoding?.should be_false
-      String.new(Bytes[0xE1, 0x80, 0x00]).valid_encoding?.should be_false
-      String.new(Bytes[0xE1, 0x80, 0xC2]).valid_encoding?.should be_false
-
-      # overlong, 3-byte
-      String.new(Bytes[0xE0, 0x80, 0x80]).valid_encoding?.should be_false
-      String.new(Bytes[0xE0, 0x9F, 0xBF]).valid_encoding?.should be_false
-      String.new(Bytes[0xE0, 0xA0, 0x80]).valid_encoding?.should be_true
-
-      # surrogate pairs
-      String.new(Bytes[0xED, 0x9F, 0xBF]).valid_encoding?.should be_true
-      String.new(Bytes[0xED, 0xA0, 0x80]).valid_encoding?.should be_false
-      String.new(Bytes[0xED, 0xBF, 0xBF]).valid_encoding?.should be_false
-      String.new(Bytes[0xEE, 0x80, 0x80]).valid_encoding?.should be_true
-
-      # incomplete, 4-byte
-      String.new(Bytes[0xF1]).valid_encoding?.should be_false
-      String.new(Bytes[0xF1, 0x00]).valid_encoding?.should be_false
-      String.new(Bytes[0xF1, 0xC2]).valid_encoding?.should be_false
-      String.new(Bytes[0xF1, 0x80]).valid_encoding?.should be_false
-      String.new(Bytes[0xF1, 0x80, 0x00]).valid_encoding?.should be_false
-      String.new(Bytes[0xF1, 0x80, 0xC2]).valid_encoding?.should be_false
-      String.new(Bytes[0xF1, 0x80, 0x80]).valid_encoding?.should be_false
-      String.new(Bytes[0xF1, 0x80, 0x80, 0x00]).valid_encoding?.should be_false
-      String.new(Bytes[0xF1, 0x80, 0x80, 0xC2]).valid_encoding?.should be_false
-
-      # overlong, 4-byte
-      String.new(Bytes[0xF0, 0x80, 0x80, 0x80]).valid_encoding?.should be_false
-      String.new(Bytes[0xF0, 0x8F, 0xBF, 0xBF]).valid_encoding?.should be_false
-      String.new(Bytes[0xF0, 0x90, 0x80, 0x80]).valid_encoding?.should be_true
-
-      # upper boundary, 4-byte
-      String.new(Bytes[0xF4, 0x8F, 0xBF, 0xBF]).valid_encoding?.should be_true
-      String.new(Bytes[0xF4, 0x90, 0x80, 0x80]).valid_encoding?.should be_false
-      String.new(Bytes[0xF5]).valid_encoding?.should be_false
-      String.new(Bytes[0xF8]).valid_encoding?.should be_false
-      String.new(Bytes[0xFF]).valid_encoding?.should be_false
+      {% for bytes in VALID_UTF8_BYTE_SEQUENCES %}
+        String.new(Bytes{{ bytes }}).valid_encoding?.should be_true
+      {% end %}
+
+      {% for bytes in INVALID_UTF8_BYTE_SEQUENCES %}
+        String.new(Bytes{{ bytes }}).valid_encoding?.should be_false
+      {% end %}
     end
 
     it "scrubs" do
@@ -3114,44 +3063,13 @@ end
 class String
   describe String do
     it ".char_bytesize_at" do
-      String.char_bytesize_at(Bytes[0x00, 0].to_unsafe).should eq 1
-      String.char_bytesize_at(Bytes[0x7F, 0].to_unsafe).should eq 1
-      String.char_bytesize_at(Bytes[0x80, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xBF, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xC2, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xC3, 0].to_unsafe).should eq 1 # malformed
-
-      String.char_bytesize_at(Bytes[0xC2, 0x7F, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xC2, 0x80, 0].to_unsafe).should eq 2
-      String.char_bytesize_at(Bytes[0xDF, 0xBF, 0].to_unsafe).should eq 2
-      String.char_bytesize_at(Bytes[0xDF, 0xC0, 0].to_unsafe).should eq 1 # malformed
-
-      String.char_bytesize_at(Bytes[0xE0, 0xA0, 0x7F, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xE0, 0x9F, 0x8F, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xE0, 0xA0, 0x80, 0].to_unsafe).should eq 3
-      String.char_bytesize_at(Bytes[0xED, 0x9F, 0xBF, 0].to_unsafe).should eq 3
-      String.char_bytesize_at(Bytes[0xED, 0x9F, 0xC0, 0].to_unsafe).should eq 1 # surrogate
-      String.char_bytesize_at(Bytes[0xED, 0xBF, 0xBF, 0].to_unsafe).should eq 1 # surrogate
-      String.char_bytesize_at(Bytes[0xEE, 0x80, 0x80, 0].to_unsafe).should eq 3
-      String.char_bytesize_at(Bytes[0xEF, 0xBF, 0xBD, 0].to_unsafe).should eq 3
-      String.char_bytesize_at(Bytes[0xEF, 0xBF, 0xBF, 0].to_unsafe).should eq 3
-      String.char_bytesize_at(Bytes[0xEF, 0xBF, 0xC0, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xEF, 0xC0, 0xBF, 0].to_unsafe).should eq 1 # malformed
-
-      String.char_bytesize_at(Bytes[0xF0, 0x90, 0x80, 0x7F, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xF0, 0x90, 0x7F, 0x80, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xF0, 0x8F, 0x80, 0x80, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xF0, 0x90, 0x80, 0x80, 0].to_unsafe).should eq 4
-      String.char_bytesize_at(Bytes[0xF0, 0x9F, 0xBF, 0xBF, 0].to_unsafe).should eq 4
-      String.char_bytesize_at(Bytes[0xF3, 0x90, 0x80, 0x80, 0].to_unsafe).should eq 4
-      String.char_bytesize_at(Bytes[0xF4, 0x8F, 0xBD, 0xBF, 0].to_unsafe).should eq 4
-      String.char_bytesize_at(Bytes[0xF4, 0x8F, 0xBF, 0xBF, 0].to_unsafe).should eq 4
-      String.char_bytesize_at(Bytes[0xF4, 0x8F, 0xBF, 0xC0, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xF4, 0x8F, 0xC0, 0xBF, 0].to_unsafe).should eq 1 # malformed
-      String.char_bytesize_at(Bytes[0xF4, 0x90, 0xBF, 0xBF, 0].to_unsafe).should eq 1 # malformed
-
-      String.char_bytesize_at(Bytes[0xF5, 0].to_unsafe).should eq 1 # out of codepoint range
-      String.char_bytesize_at(Bytes[0xFF, 0].to_unsafe).should eq 1 # out of codepoint range
+      {% for bytes, char in VALID_UTF8_BYTE_SEQUENCES %}
+        String.char_bytesize_at(Bytes[{{ bytes.splat }}, 0].to_unsafe).should eq({{ bytes.size }})
+      {% end %}
+
+      {% for bytes in INVALID_UTF8_BYTE_SEQUENCES %}
+        String.char_bytesize_at(Bytes[{{ bytes.splat }}, 0].to_unsafe).should eq 1
+      {% end %}
     end
   end
 end