From b8f0857655d3f05146f96d7bb33e6d3ad889c725 Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Thu, 9 Dec 2021 19:34:03 +0800 Subject: [PATCH] Add `Slice#unsafe_slice_of`, `#to_unsafe_bytes` (#11379) --- spec/std/slice_spec.cr | 119 +++++++++++++++++++++++++++-------------- src/slice.cr | 85 +++++++++++++++++++++++++---- 2 files changed, 155 insertions(+), 49 deletions(-) diff --git a/spec/std/slice_spec.cr b/spec/std/slice_spec.cr index 93ad06bc3b40..5df21f5e43b5 100644 --- a/spec/std/slice_spec.cr +++ b/spec/std/slice_spec.cr @@ -347,45 +347,86 @@ describe "Slice" do end end - it "does hexstring" do - slice = Bytes.new(4) { |i| i.to_u8 + 1 } - slice.hexstring.should eq("01020304") - end - - it "does hexdump for empty slice" do - Bytes.empty.hexdump.should eq("") - - io = IO::Memory.new - Bytes.empty.hexdump(io).should eq(0) - io.to_s.should eq("") - end - - it "does hexdump" do - slice = Bytes.new(96) { |i| i.to_u8 + 32 } - assert_prints slice.hexdump, <<-EOF - 00000000 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f !"#$%&'()*+,-./ - 00000010 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 0123456789:;<=>? - 00000020 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO - 00000030 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f PQRSTUVWXYZ[\\]^_ - 00000040 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f `abcdefghijklmno - 00000050 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f pqrstuvwxyz{|}~.\n - EOF - - plus = Bytes.new(101) { |i| i.to_u8 + 32 } - assert_prints plus.hexdump, <<-EOF - 00000000 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f !"#$%&'()*+,-./ - 00000010 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 0123456789:;<=>? - 00000020 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO - 00000030 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f PQRSTUVWXYZ[\\]^_ - 00000040 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f `abcdefghijklmno - 00000050 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f pqrstuvwxyz{|}~. - 00000060 80 81 82 83 84 .....\n - EOF - - num = Bytes.new(10) { |i| i.to_u8 + 48 } - assert_prints num.hexdump, <<-EOF - 00000000 30 31 32 33 34 35 36 37 38 39 0123456789\n - EOF + describe "#unsafe_slice_of" do + it "reinterprets a slice's elements" do + slice = Bytes.new(10) { |i| i.to_u8 + 1 } + + {% if IO::ByteFormat::SystemEndian == IO::ByteFormat::LittleEndian %} + slice.unsafe_slice_of(Int16).should eq(Int16.slice(0x0201, 0x0403, 0x0605, 0x0807, 0x0A09)) + slice.unsafe_slice_of(Int32).should eq(Int32.slice(0x04030201, 0x08070605)) + + slice.unsafe_slice_of(UInt64)[0] = 0x1122_3344_5566_7788_u64 + slice.should eq(Bytes[0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x09, 0x0A]) + {% else %} + slice.unsafe_slice_of(Int16).should eq(Int16.slice(0x0102, 0x0304, 0x0506, 0x0708, 0x090A)) + slice.unsafe_slice_of(Int32).should eq(Int32.slice(0x01020304, 0x05060708)) + + slice.unsafe_slice_of(UInt64)[0] = 0x1122_3344_5566_7788_u64 + slice.should eq(Bytes[0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x09, 0x0A]) + {% end %} + end + end + + describe "#to_unsafe_bytes" do + it "reinterprets a slice's elements as bytes" do + slice = Slice[0x01020304, -0x01020304] + bytes = slice.to_unsafe_bytes + + {% if IO::ByteFormat::SystemEndian == IO::ByteFormat::LittleEndian %} + bytes.should eq(Bytes[0x04, 0x03, 0x02, 0x01, 0xFC, 0xFC, 0xFD, 0xFE]) + bytes[3] = 0x55 + slice[0].should eq(0x55020304) + {% else %} + bytes.should eq(Bytes[0x01, 0x02, 0x03, 0x04, 0xFE, 0xFD, 0xFC, 0xFC]) + bytes[3] = 0x55 + slice[0].should eq(0x01020355) + {% end %} + end + end + + describe "#hexstring" do + it "works for Bytes" do + slice = Bytes.new(4) { |i| i.to_u8 + 1 } + slice.hexstring.should eq("01020304") + end + end + + describe "#hexdump" do + it "works for empty slice" do + Bytes.empty.hexdump.should eq("") + + io = IO::Memory.new + Bytes.empty.hexdump(io).should eq(0) + io.to_s.should eq("") + end + + it "works for Bytes" do + slice = Bytes.new(96) { |i| i.to_u8 + 32 } + assert_prints slice.hexdump, <<-EOF + 00000000 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f !"#$%&'()*+,-./ + 00000010 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 0123456789:;<=>? + 00000020 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO + 00000030 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f PQRSTUVWXYZ[\\]^_ + 00000040 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f `abcdefghijklmno + 00000050 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f pqrstuvwxyz{|}~.\n + EOF + + plus = Bytes.new(101) { |i| i.to_u8 + 32 } + assert_prints plus.hexdump, <<-EOF + 00000000 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f !"#$%&'()*+,-./ + 00000010 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 0123456789:;<=>? + 00000020 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO + 00000030 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f PQRSTUVWXYZ[\\]^_ + 00000040 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f `abcdefghijklmno + 00000050 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f pqrstuvwxyz{|}~. + 00000060 80 81 82 83 84 .....\n + EOF + + num = Bytes.new(10) { |i| i.to_u8 + 48 } + assert_prints num.hexdump, <<-EOF + 00000000 30 31 32 33 34 35 36 37 38 39 0123456789\n + EOF + end end it_iterates "#each", [1, 2, 3], Slice[1, 2, 3].each diff --git a/src/slice.cr b/src/slice.cr index 6c3117a5cc30..234dfe524fc0 100644 --- a/src/slice.cr +++ b/src/slice.cr @@ -531,15 +531,62 @@ struct Slice(T) to_s(io) end - # Returns a hexstring representation of this slice, assuming it's - # a `Slice(UInt8)`. + # Returns a new `Slice` pointing at the same contents as `self`, but + # reinterpreted as elements of the given *type*. + # + # The returned slice never refers to more memory than `self`; if the last + # bytes of `self` do not fit into a `U`, they are excluded from the returned + # slice. + # + # WARNING: This method is **unsafe**: elements are reinterpreted using + # `#unsafe_as`, and the resulting slice may not be properly aligned. + # Additionally, the same elements may produce different results depending on + # the system endianness. # # ``` - # slice = UInt8.slice(97, 62, 63, 8, 255) - # slice.hexstring # => "613e3f08ff" + # # assume little-endian system + # bytes = Bytes[0x01, 0x02, 0x03, 0x04, 0xFF, 0xFE] + # bytes.unsafe_slice_of(Int8) # => Slice[1_i8, 2_i8, 3_i8, 4_i8, -1_i8, -2_i8] + # bytes.unsafe_slice_of(Int16) # => Slice[513_i16, 1027_i16, -257_i16] + # bytes.unsafe_slice_of(Int32) # => Slice[0x04030201] + # ``` + def unsafe_slice_of(type : U.class) : Slice(U) forall U + Slice.new(to_unsafe.unsafe_as(Pointer(U)), bytesize // sizeof(U), read_only: @read_only) + end + + # Returns a new `Bytes` pointing at the same contents as `self`. + # + # WARNING: This method is **unsafe**: the returned slice is writable if `self` + # is also writable, and modifications through the returned slice may violate + # the binary representations of Crystal objects. Additionally, the same + # elements may produce different results depending on the system endianness. + # + # ``` + # # assume little-endian system + # ints = Slice[0x01020304, 0x05060708] + # bytes = ints.to_unsafe_bytes # => Bytes[0x04, 0x03, 0x02, 0x01, 0x08, 0x07, 0x06, 0x05] + # bytes[2] = 0xAD + # ints # => Slice[0x01AD0304, 0x05060708] + # ``` + def to_unsafe_bytes : Bytes + unsafe_slice_of(UInt8) + end + + # Returns a hexstring representation of this slice. + # + # `self` must be a `Slice(UInt8)`. To call this method on other `Slice`s, + # `#to_unsafe_bytes` should be used first. + # + # ``` + # UInt8.slice(97, 62, 63, 8, 255).hexstring # => "613e3f08ff" + # + # # assume little-endian system + # Int16.slice(97, 62, 1000, -2).to_unsafe_bytes.hexstring # => "61003e00e803feff" # ``` def hexstring : String - self.as(Slice(UInt8)) + {% unless T == UInt8 %} + {% raise "Can only call `#hexstring` on Slice(UInt8), not #{@type}" %} + {% end %} str_size = size * 2 String.new(str_size) do |buffer| @@ -550,7 +597,9 @@ struct Slice(T) # :nodoc: def hexstring(buffer) : Nil - self.as(Slice(UInt8)) + {% unless T == UInt8 %} + {% raise "Can only call `#hexstring` on Slice(UInt8), not #{@type}" %} + {% end %} offset = 0 each do |v| @@ -562,16 +611,26 @@ struct Slice(T) nil end - # Returns a hexdump of this slice, assuming it's a `Slice(UInt8)`. + # Returns a hexdump of this slice. + # + # `self` must be a `Slice(UInt8)`. To call this method on other `Slice`s, + # `#to_unsafe_bytes` should be used first. + # # This method is specially useful for debugging binary data and # incoming/outgoing data in protocols. # # ``` # slice = UInt8.slice(97, 62, 63, 8, 255) # slice.hexdump # => "00000000 61 3e 3f 08 ff a>?..\n" + # + # # assume little-endian system + # slice = Int16.slice(97, 62, 1000, -2) + # slice.to_unsafe_bytes.hexdump # => "00000000 61 00 3e 00 e8 03 fe ff a.>.....\n" # ``` def hexdump : String - self.as(Slice(UInt8)) + {% unless T == UInt8 %} + {% raise "Can only call `#hexdump` on Slice(UInt8), not #{@type}" %} + {% end %} return "" if empty? @@ -598,7 +657,11 @@ struct Slice(T) end end - # Writes a hexdump of this slice, assuming it's a `Slice(UInt8)`, to the given *io*. + # Writes a hexdump of this slice to the given *io*. + # + # `self` must be a `Slice(UInt8)`. To call this method on other `Slice`s, + # `#to_unsafe_bytes` should be used first. + # # This method is specially useful for debugging binary data and # incoming/outgoing data in protocols. # @@ -615,7 +678,9 @@ struct Slice(T) # 00000000 61 3e 3f 08 ff a>?.. # ``` def hexdump(io : IO) - self.as(Slice(UInt8)) + {% unless T == UInt8 %} + {% raise "Can only call `#hexdump` on Slice(UInt8), not #{@type}" %} + {% end %} return 0 if empty?