From 942b0f6dc9316ca76881ca4ed818fe752332b368 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig <aborenszweig@manas.com.ar> Date: Thu, 15 Dec 2016 12:10:13 -0300 Subject: [PATCH] Add chomp option to gets, lines, each_line --- spec/compiler/macro/macro_methods_spec.cr | 2 +- spec/std/file_spec.cr | 28 +++++++ spec/std/io/argf_spec.cr | 22 ++++-- spec/std/io/buffered_spec.cr | 55 ++++++++++--- spec/std/io/io_spec.cr | 96 +++++++++++++++++------ spec/std/io/memory_spec.cr | 30 +++++-- spec/std/io/sized_spec.cr | 13 ++- spec/std/logger_spec.cr | 12 +-- spec/std/string_spec.cr | 41 +++++++++- spec/std/tempfile_spec.cr | 4 +- src/file.cr | 12 +-- src/http/common.cr | 2 +- src/http/content.cr | 4 +- src/io.cr | 83 ++++++++++++++------ src/io/argf.cr | 8 +- src/io/buffered.cr | 15 +++- src/io/encoding.cr | 27 +++++-- src/io/memory.cr | 14 +++- src/io/sized.cr | 9 ++- src/string.cr | 42 +++++++--- src/string/builder.cr | 27 ++++++- 21 files changed, 417 insertions(+), 129 deletions(-) diff --git a/spec/compiler/macro/macro_methods_spec.cr b/spec/compiler/macro/macro_methods_spec.cr index 8152b0804bd6..07cdc9d4ee44 100644 --- a/spec/compiler/macro/macro_methods_spec.cr +++ b/spec/compiler/macro/macro_methods_spec.cr @@ -280,7 +280,7 @@ describe "macro methods" do end it "executes lines" do - assert_macro "x", %({{x.lines}}), [StringLiteral.new("1\n2\n3")] of ASTNode, %(["1\\n", "2\\n", "3"]) + assert_macro "x", %({{x.lines}}), [StringLiteral.new("1\n2\n3")] of ASTNode, %(["1", "2", "3"]) end it "executes size" do diff --git a/spec/std/file_spec.cr b/spec/std/file_spec.cr index 2a4328a44ec9..7c424f0019f9 100644 --- a/spec/std/file_spec.cr +++ b/spec/std/file_spec.cr @@ -52,12 +52,29 @@ describe "File" do it "reads lines from file" do lines = File.read_lines "#{__DIR__}/data/test_file.txt" lines.size.should eq(20) + lines.first.should eq("Hello World") + end + + it "reads lines from file with chomp = false" do + lines = File.read_lines "#{__DIR__}/data/test_file.txt", chomp: false + lines.size.should eq(20) lines.first.should eq("Hello World\n") end it "reads lines from file with each" do idx = 0 File.each_line("#{__DIR__}/data/test_file.txt") do |line| + if idx == 0 + line.should eq("Hello World") + end + idx += 1 + end + idx.should eq(20) + end + + it "reads lines from file with each, chomp = false" do + idx = 0 + File.each_line("#{__DIR__}/data/test_file.txt", chomp: false) do |line| if idx == 0 line.should eq("Hello World\n") end @@ -69,6 +86,17 @@ describe "File" do it "reads lines from file with each as iterator" do idx = 0 File.each_line("#{__DIR__}/data/test_file.txt").each do |line| + if idx == 0 + line.should eq("Hello World") + end + idx += 1 + end + idx.should eq(20) + end + + it "reads lines from file with each as iterator, chomp = false" do + idx = 0 + File.each_line("#{__DIR__}/data/test_file.txt", chomp: false).each do |line| if idx == 0 line.should eq("Hello World\n") end diff --git a/spec/std/io/argf_spec.cr b/spec/std/io/argf_spec.cr index 10e27b7889e8..1b9c074f2f3d 100644 --- a/spec/std/io/argf_spec.cr +++ b/spec/std/io/argf_spec.cr @@ -44,11 +44,21 @@ describe IO::ARGF do stdin = IO::Memory.new("hello\nworld\n") argf = IO::ARGF.new argv, stdin - argf.gets.should eq("hello\n") - argf.gets.should eq("world\n") + argf.gets.should eq("hello") + argf.gets.should eq("world") argf.gets.should be_nil end + it "reads from STDIN if ARGV isn't specified, chomp = false" do + argv = [] of String + stdin = IO::Memory.new("hello\nworld\n") + + argf = IO::ARGF.new argv, stdin + argf.gets(chomp: false).should eq("hello\n") + argf.gets(chomp: false).should eq("world\n") + argf.gets(chomp: false).should be_nil + end + it "reads from ARGV if specified" do path1 = "#{__DIR__}/../data/argf_test_file_1.txt" path2 = "#{__DIR__}/../data/argf_test_file_2.txt" @@ -58,16 +68,16 @@ describe IO::ARGF do argf = IO::ARGF.new argv, stdin argv.should eq([path1, path2]) - argf.gets.should eq("12345\n") + argf.gets(chomp: false).should eq("12345\n") argv.should eq([path2]) - argf.gets.should eq("67890\n") + argf.gets(chomp: false).should eq("67890\n") argv.empty?.should be_true - argf.gets.should be_nil + argf.gets(chomp: false).should be_nil argv << path1 - str = argf.gets + str = argf.gets(chomp: false) str.should eq("12345\n") end end diff --git a/spec/std/io/buffered_spec.cr b/spec/std/io/buffered_spec.cr index cb7c14204188..2e93f2228704 100644 --- a/spec/std/io/buffered_spec.cr +++ b/spec/std/io/buffered_spec.cr @@ -51,16 +51,35 @@ end describe "IO::Buffered" do it "does gets" do - io = BufferedWrapper.new(IO::Memory.new("hello\nworld\n")) - io.gets.should eq("hello\n") - io.gets.should eq("world\n") + io = BufferedWrapper.new(IO::Memory.new("hello\r\nworld\n")) + io.gets.should eq("hello") + io.gets.should eq("world") io.gets.should be_nil end + it "does gets with chomp = false" do + io = BufferedWrapper.new(IO::Memory.new("hello\nworld\n")) + io.gets(chomp: false).should eq("hello\n") + io.gets(chomp: false).should eq("world\n") + io.gets(chomp: false).should be_nil + end + it "does gets with big line" do big_line = "a" * 20_000 io = BufferedWrapper.new(IO::Memory.new("#{big_line}\nworld\n")) - io.gets.should eq("#{big_line}\n") + io.gets.should eq(big_line) + end + + it "does gets with big line and \\r\\n" do + big_line = "a" * 20_000 + io = BufferedWrapper.new(IO::Memory.new("#{big_line}\r\nworld\n")) + io.gets.should eq(big_line) + end + + it "does gets with big line and chomp = false" do + big_line = "a" * 20_000 + io = BufferedWrapper.new(IO::Memory.new("#{big_line}\nworld\n")) + io.gets(chomp: false).should eq("#{big_line}\n") end it "does gets with char delimiter" do @@ -174,9 +193,9 @@ describe "IO::Buffered" do it "rewinds" do str = IO::Memory.new("hello\nworld\n") io = BufferedWrapper.new str - io.gets.should eq("hello\n") + io.gets.should eq("hello") io.rewind - io.gets.should eq("hello\n") + io.gets.should eq("hello") end it "reads more than the buffer's internal capacity" do @@ -302,9 +321,19 @@ describe "IO::Buffered" do base_io = IO::Memory.new(str.encode("UCS-2LE")) io = BufferedWrapper.new(base_io) io.set_encoding("UCS-2LE") - io.gets.should eq("Hello world\n") - io.gets.should eq("Foo\n") - io.gets.should eq("Bar\n") + io.gets.should eq("Hello world") + io.gets.should eq("Foo") + io.gets.should eq("Bar") + end + + it "gets with chomp = false" do + str = "Hello world\nFoo\nBar\n" + ("1234567890" * 1000) + base_io = IO::Memory.new(str.encode("UCS-2LE")) + io = BufferedWrapper.new(base_io) + io.set_encoding("UCS-2LE") + io.gets(chomp: false).should eq("Hello world\n") + io.gets(chomp: false).should eq("Foo\n") + io.gets(chomp: false).should eq("Bar\n") end it "gets big string" do @@ -313,8 +342,8 @@ describe "IO::Buffered" do io = BufferedWrapper.new(base_io) io.set_encoding("UCS-2LE") 10_000.times do |i| - io.gets.should eq("Hello\n") - io.gets.should eq("World\n") + io.gets(chomp: false).should eq("Hello\n") + io.gets(chomp: false).should eq("World\n") end end @@ -324,7 +353,7 @@ describe "IO::Buffered" do io = BufferedWrapper.new(base_io) io.set_encoding("GB2312") 1000.times do - io.gets.should eq("你好我是人\n") + io.gets(chomp: false).should eq("你好我是人\n") end end @@ -333,7 +362,7 @@ describe "IO::Buffered" do base_io = IO::Memory.new(str.encode("UCS-2LE")) io = BufferedWrapper.new(base_io) io.set_encoding("UCS-2LE") - io.gets.should eq("x\n") + io.gets(chomp: false).should eq("x\n") str = str[2..-1] str.each_char do |char| io.read_char.should eq(char) diff --git a/spec/std/io/io_spec.cr b/spec/std/io/io_spec.cr index 8db82348a2d4..db1baf7dae18 100644 --- a/spec/std/io/io_spec.cr +++ b/spec/std/io/io_spec.cr @@ -61,6 +61,15 @@ private class SimpleIOMemory Slice.new(@buffer, @bytesize) end + def to_s + String.new @buffer, @bytesize + end + + def rewind + @pos = 0 + self + end + private def check_needs_resize resize_to_capacity(@capacity * 2) if @bytesize == @capacity end @@ -113,8 +122,19 @@ describe IO do describe "IO iterators" do it "iterates by line" do - io = IO::Memory.new("hello\nbye\n") + io = SimpleIOMemory.new("hello\nbye\n") lines = io.each_line + lines.next.should eq("hello") + lines.next.should eq("bye") + lines.next.should be_a(Iterator::Stop) + + lines.rewind + lines.next.should eq("hello") + end + + it "iterates by line with chomp false" do + io = SimpleIOMemory.new("hello\nbye\n") + lines = io.each_line(chomp: false) lines.next.should eq("hello\n") lines.next.should eq("bye\n") lines.next.should be_a(Iterator::Stop) @@ -124,7 +144,7 @@ describe IO do end it "iterates by char" do - io = IO::Memory.new("abあぼ") + io = SimpleIOMemory.new("abあぼ") chars = io.each_char chars.next.should eq('a') chars.next.should eq('b') @@ -137,7 +157,7 @@ describe IO do end it "iterates by byte" do - io = IO::Memory.new("ab") + io = SimpleIOMemory.new("ab") bytes = io.each_byte bytes.next.should eq('a'.ord) bytes.next.should eq('b'.ord) @@ -150,24 +170,24 @@ describe IO do it "copies" do string = "abあぼ" - src = IO::Memory.new(string) - dst = IO::Memory.new + src = SimpleIOMemory.new(string) + dst = SimpleIOMemory.new IO.copy(src, dst).should eq(string.bytesize) dst.to_s.should eq(string) end it "copies with limit" do string = "abcあぼ" - src = IO::Memory.new(string) - dst = IO::Memory.new + src = SimpleIOMemory.new(string) + dst = SimpleIOMemory.new IO.copy(src, dst, 3).should eq(3) dst.to_s.should eq("abc") end it "raises on copy with negative limit" do string = "abcあぼ" - src = IO::Memory.new(string) - dst = IO::Memory.new + src = SimpleIOMemory.new(string) + dst = SimpleIOMemory.new expect_raises(ArgumentError, "negative limit") do IO.copy(src, dst, -10) end @@ -177,7 +197,7 @@ describe IO do File.open("#{__DIR__}/../data/test_file.txt") do |file1| File.open("#{__DIR__}/../data/test_file.ini") do |file2| file2.reopen(file1) - file2.gets.should eq("Hello World\n") + file2.gets.should eq("Hello World") end end end @@ -185,15 +205,30 @@ describe IO do describe "read operations" do it "does gets" do io = SimpleIOMemory.new("hello\nworld\n") - io.gets.should eq("hello\n") - io.gets.should eq("world\n") + io.gets.should eq("hello") + io.gets.should eq("world") + io.gets.should be_nil + end + + it "does gets with \\r\\n" do + io = SimpleIOMemory.new("hello\r\nworld\r\nfoo\rbar\n") + io.gets.should eq("hello") + io.gets.should eq("world") + io.gets.should eq("foo\rbar") io.gets.should be_nil end + it "does gets with chomp false" do + io = SimpleIOMemory.new("hello\nworld\n") + io.gets(chomp: false).should eq("hello\n") + io.gets(chomp: false).should eq("world\n") + io.gets(chomp: false).should be_nil + end + it "does gets with big line" do big_line = "a" * 20_000 io = SimpleIOMemory.new("#{big_line}\nworld\n") - io.gets.should eq("#{big_line}\n") + io.gets.should eq(big_line) end it "does gets with char delimiter" do @@ -218,6 +253,13 @@ describe IO do io.gets("foo").should eq("d") end + it "gets with string as delimiter and chomp = true" do + io = SimpleIOMemory.new("hello world") + io.gets("lo", chomp: true).should eq("hel") + io.gets("rl", chomp: true).should eq(" wo") + io.gets("foo", chomp: true).should eq("d") + end + it "gets with empty string as delimiter" do io = SimpleIOMemory.new("hello\nworld\n") io.gets("").should eq("hello\nworld\n") @@ -271,7 +313,7 @@ describe IO do it "reads all remaining content" do io = SimpleIOMemory.new("foo\nbar\nbaz\n") - io.gets.should eq("foo\n") + io.gets.should eq("foo") io.gets_to_end.should eq("bar\nbaz\n") end @@ -311,9 +353,9 @@ describe IO do io.each_line do |line| case counter when 0 - line.should eq("a\n") + line.should eq("a") when 1 - line.should eq("bb\n") + line.should eq("bb") when 2 line.should eq("cc") end @@ -428,22 +470,32 @@ describe IO do end it "gets" do - str = "Hello world\nFoo\nBar" + str = "Hello world\r\nFoo\nBar" io = SimpleIOMemory.new(str.encode("UCS-2LE")) io.set_encoding("UCS-2LE") - io.gets.should eq("Hello world\n") - io.gets.should eq("Foo\n") + io.gets.should eq("Hello world") + io.gets.should eq("Foo") io.gets.should eq("Bar") io.gets.should be_nil end + it "gets with chomp = false" do + str = "Hello world\r\nFoo\nBar" + io = SimpleIOMemory.new(str.encode("UCS-2LE")) + io.set_encoding("UCS-2LE") + io.gets(chomp: false).should eq("Hello world\r\n") + io.gets(chomp: false).should eq("Foo\n") + io.gets(chomp: false).should eq("Bar") + io.gets(chomp: false).should be_nil + end + it "gets big string" do str = "Hello\nWorld\n" * 10_000 io = SimpleIOMemory.new(str.encode("UCS-2LE")) io.set_encoding("UCS-2LE") 10_000.times do |i| - io.gets.should eq("Hello\n") - io.gets.should eq("World\n") + io.gets.should eq("Hello") + io.gets.should eq("World") end end @@ -453,7 +505,7 @@ describe IO do io = SimpleIOMemory.new(str) io.set_encoding("GB2312") 1000.times do - io.gets.should eq("你好我是人\n") + io.gets.should eq("你好我是人") end end end diff --git a/spec/std/io/memory_spec.cr b/spec/std/io/memory_spec.cr index 4712aa055a4a..a2c433fd09e9 100644 --- a/spec/std/io/memory_spec.cr +++ b/spec/std/io/memory_spec.cr @@ -57,12 +57,19 @@ describe IO::Memory do end it "reads each line" do - io = IO::Memory.new("foo\r\nbar\r\n") - io.gets.should eq("foo\r\n") - io.gets.should eq("bar\r\n") + io = IO::Memory.new("foo\r\nbar\n") + io.gets.should eq("foo") + io.gets.should eq("bar") io.gets.should eq(nil) end + it "reads each line with chomp = false" do + io = IO::Memory.new("foo\r\nbar\r\n") + io.gets(chomp: false).should eq("foo\r\n") + io.gets(chomp: false).should eq("bar\r\n") + io.gets(chomp: false).should eq(nil) + end + it "gets with char as delimiter" do io = IO::Memory.new("hello world") io.gets('w').should eq("hello w") @@ -301,16 +308,25 @@ describe IO::Memory do str = "Hello world\nFoo\nBar\n" + ("1234567890" * 1000) io = IO::Memory.new(str.encode("UCS-2LE")) io.set_encoding("UCS-2LE") - io.gets.should eq("Hello world\n") - io.gets.should eq("Foo\n") - io.gets.should eq("Bar\n") + io.gets(chomp: false).should eq("Hello world\n") + io.gets(chomp: false).should eq("Foo\n") + io.gets(chomp: false).should eq("Bar\n") + end + + it "gets with chomp = false" do + str = "Hello world\nFoo\nBar\n" + ("1234567890" * 1000) + io = IO::Memory.new(str.encode("UCS-2LE")) + io.set_encoding("UCS-2LE") + io.gets.should eq("Hello world") + io.gets.should eq("Foo") + io.gets.should eq("Bar") end it "reads char" do str = "x\nHello world" + ("1234567890" * 1000) io = IO::Memory.new(str.encode("UCS-2LE")) io.set_encoding("UCS-2LE") - io.gets.should eq("x\n") + io.gets(chomp: false).should eq("x\n") str = str[2..-1] str.each_char do |char| io.read_char.should eq(char) diff --git a/spec/std/io/sized_spec.cr b/spec/std/io/sized_spec.cr index c2f8c54056de..4e43018006dc 100644 --- a/spec/std/io/sized_spec.cr +++ b/spec/std/io/sized_spec.cr @@ -94,9 +94,18 @@ describe "IO::Sized" do it "gets" do io = IO::Memory.new "foo\nbar\nbaz" sized = IO::Sized.new(io, read_size: 9) - sized.gets.should eq("foo\n") - sized.gets.should eq("bar\n") + sized.gets.should eq("foo") + sized.gets.should eq("bar") sized.gets.should eq("b") sized.gets.should be_nil end + + it "gets with chomp = false" do + io = IO::Memory.new "foo\nbar\nbaz" + sized = IO::Sized.new(io, read_size: 9) + sized.gets(chomp: false).should eq("foo\n") + sized.gets(chomp: false).should eq("bar\n") + sized.gets(chomp: false).should eq("b") + sized.gets(chomp: false).should be_nil + end end diff --git a/spec/std/logger_spec.cr b/spec/std/logger_spec.cr index b8ac57a6ed25..33d20f28c34f 100644 --- a/spec/std/logger_spec.cr +++ b/spec/std/logger_spec.cr @@ -37,7 +37,7 @@ describe "Logger" do logger.progname = "crystal" logger.warn "message" - r.gets.should match(/W, \[.+? #\d+\] WARN -- crystal: message\n/) + r.gets(chomp: false).should match(/W, \[.+? #\d+\] WARN -- crystal: message\n/) end end @@ -49,7 +49,7 @@ describe "Logger" do end logger.warn "message", "prog" - r.gets.should eq("W prog: message\n") + r.gets(chomp: false).should eq("W prog: message\n") end end @@ -59,8 +59,8 @@ describe "Logger" do logger.error { "message" } logger.unknown { "another message" } - r.gets.should match(/ERROR -- : message\n/) - r.gets.should match(/ ANY -- : another message\n/) + r.gets(chomp: false).should match(/ERROR -- : message\n/) + r.gets(chomp: false).should match(/ ANY -- : another message\n/) end end @@ -70,8 +70,8 @@ describe "Logger" do logger.error("crystal") { "message" } logger.unknown("shard") { "another message" } - r.gets.should match(/ERROR -- crystal: message\n/) - r.gets.should match(/ ANY -- shard: another message\n/) + r.gets(chomp: false).should match(/ERROR -- crystal: message\n/) + r.gets(chomp: false).should match(/ ANY -- shard: another message\n/) end end diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index f5a56e33ca85..8b54667c0e33 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -494,6 +494,7 @@ describe "String" do assert { "かたな\r\n".chomp.should eq("かたな") } assert { "hello\n\n".chomp.should eq("hello\n") } assert { "hello\r\n\n".chomp.should eq("hello\r\n") } + assert { "hello\r\n".chomp('\n').should eq("hello") } assert { "hello".chomp('a').should eq("hello") } assert { "hello".chomp('o').should eq("hell") } @@ -1819,20 +1820,52 @@ describe "String" do end it "gets lines" do + "".lines.should eq([] of String) + "\n".lines.should eq([""] of String) + "\r".lines.should eq(["\r"] of String) + "\r\n".lines.should eq([""] of String) "foo".lines.should eq(["foo"]) - "foo\nbar\nbaz\n".lines.should eq(["foo\n", "bar\n", "baz\n"]) + "foo\n".lines.should eq(["foo"]) + "foo\r\n".lines.should eq(["foo"]) + "foo\nbar\r\nbaz\n".lines.should eq(["foo", "bar", "baz"]) + "foo\nbar\r\nbaz\r\n".lines.should eq(["foo", "bar", "baz"]) + end + + it "gets lines with chomp = false" do + "foo".lines(chomp: false).should eq(["foo"]) + "foo\nbar\r\nbaz\n".lines(chomp: false).should eq(["foo\n", "bar\r\n", "baz\n"]) + "foo\nbar\r\nbaz\r\n".lines(chomp: false).should eq(["foo\n", "bar\r\n", "baz\r\n"]) end it "gets each_line" do lines = [] of String - "foo\n\nbar\nbaz\n".each_line do |line| + "foo\n\nbar\r\nbaz\n".each_line do |line| + lines << line + end + lines.should eq(["foo", "", "bar", "baz"]) + end + + it "gets each_line with chomp = false" do + lines = [] of String + "foo\n\nbar\r\nbaz\r\n".each_line(chomp: false) do |line| lines << line end - lines.should eq(["foo\n", "\n", "bar\n", "baz\n"]) + lines.should eq(["foo\n", "\n", "bar\r\n", "baz\r\n"]) end it "gets each_line iterator" do - iter = "foo\nbar\nbaz\n".each_line + iter = "foo\nbar\r\nbaz\r\n".each_line + iter.next.should eq("foo") + iter.next.should eq("bar") + iter.next.should eq("baz") + iter.next.should be_a(Iterator::Stop) + + iter.rewind + iter.next.should eq("foo") + end + + it "gets each_line iterator with chomp = false" do + iter = "foo\nbar\nbaz\n".each_line(chomp: false) iter.next.should eq("foo\n") iter.next.should eq("bar\n") iter.next.should eq("baz\n") diff --git a/spec/std/tempfile_spec.cr b/spec/std/tempfile_spec.cr index 82440c3ac4e7..7e3cb6b61e08 100644 --- a/spec/std/tempfile_spec.cr +++ b/spec/std/tempfile_spec.cr @@ -48,9 +48,9 @@ describe Tempfile do tempfile.seek(0, IO::Seek::Set) tempfile.tell.should eq(0) tempfile.pos.should eq(0) - tempfile.gets.should eq("Hello!\n") + tempfile.gets(chomp: false).should eq("Hello!\n") tempfile.pos = 0 - tempfile.gets.should eq("Hello!\n") + tempfile.gets(chomp: false).should eq("Hello!\n") tempfile.close end diff --git a/src/file.cr b/src/file.cr index 7e5e06edc478..13c6883cdd10 100644 --- a/src/file.cr +++ b/src/file.cr @@ -440,17 +440,17 @@ class File < IO::FileDescriptor # # loop # end # ``` - def self.each_line(filename, encoding = nil, invalid = nil) + def self.each_line(filename, encoding = nil, invalid = nil, chomp = true) File.open(filename, "r", encoding: encoding, invalid: invalid) do |file| - file.each_line do |line| + file.each_line(chomp: chomp) do |line| yield line end end end # Returns an `Iterator` for each line in *filename*. - def self.each_line(filename, encoding = nil, invalid = nil) - File.open(filename, "r", encoding: encoding, invalid: invalid).each_line + def self.each_line(filename, encoding = nil, invalid = nil, chomp = true) + File.open(filename, "r", encoding: encoding, invalid: invalid).each_line(chomp: chomp) end # Returns all lines in *filename* as an array of strings. @@ -459,9 +459,9 @@ class File < IO::FileDescriptor # File.write("foobar", "foo\nbar") # File.read_lines("foobar") # => ["foo\n", "bar\n"] # ``` - def self.read_lines(filename, encoding = nil, invalid = nil) : Array(String) + def self.read_lines(filename, encoding = nil, invalid = nil, chomp = true) : Array(String) lines = [] of String - each_line(filename, encoding: encoding, invalid: invalid) do |line| + each_line(filename, encoding: encoding, invalid: invalid, chomp: chomp) do |line| lines << line end lines diff --git a/src/http/common.cr b/src/http/common.cr index f2efe0e86015..590af5de6860 100644 --- a/src/http/common.cr +++ b/src/http/common.cr @@ -17,7 +17,7 @@ module HTTP headers = Headers.new while line = io.gets - if line == "\r\n" || line == "\n" + if line.empty? body = nil if body_type.prohibited? body = nil diff --git a/src/http/content.cr b/src/http/content.cr index b9db82bd4a3d..a2312330f112 100644 --- a/src/http/content.cr +++ b/src/http/content.cr @@ -34,10 +34,10 @@ module HTTP @io.read_byte end - def gets(delimiter : Char, limit : Int) : String? + def gets(delimiter : Char, limit : Int, chomp = false) : String? return super if @encoding - @io.gets(delimiter, limit) + @io.gets(delimiter, limit, chomp) end def write(slice : Slice(UInt8)) diff --git a/src/io.cr b/src/io.cr index c2a2c34db610..d0877ae2c567 100644 --- a/src/io.cr +++ b/src/io.cr @@ -534,17 +534,22 @@ module IO # Reads a line from this IO. A line is terminated by the `\n` character. # Returns `nil` if called at the end of this IO. # + # By default the newline is removed from the returned string, + # unless *chomp* is false. + # # ``` - # io = IO::Memory.new "hello\nworld" - # io.gets # => "hello\n" - # io.gets # => "world" - # io.gets # => nil + # io = IO::Memory.new "hello\nworld\nfoo\n" + # io.gets # => "hello" + # io.gets(chomp: false) # => "world\n" + # io.gets # => "foo" + # io.gets # => nil # ``` - def gets : String? - gets '\n' + def gets(chomp = true) : String? + gets '\n', chomp: chomp end - # Reads a line of at most `limit` bytes from this IO. A line is terminated by the `\n` character. + # Reads a line of at most `limit` bytes from this IO. + # A line is terminated by the `\n` character. # Returns `nil` if called at the end of this IO. # # ``` @@ -555,8 +560,8 @@ module IO # io.gets(3) # => "ld" # io.gets(3) # => nil # ``` - def gets(limit : Int) : String? - gets '\n', limit + def gets(limit : Int, chomp = false) : String? + gets '\n', limit: limit, chomp: chomp end # Reads until *delimiter* is found, or the end of the IO is reached. @@ -569,8 +574,8 @@ module IO # io.gets('z') # => "ld" # io.gets('w') # => nil # ``` - def gets(delimiter : Char) : String? - gets delimiter, Int32::MAX + def gets(delimiter : Char, chomp = false) : String? + gets delimiter, Int32::MAX, chomp: chomp end # Reads until *delimiter* is found, `limit` bytes are read, or the end of the IO is reached. @@ -583,15 +588,17 @@ module IO # io.gets('z', 10) # => "ld" # io.gets('w', 10) # => nil # ``` - def gets(delimiter : Char, limit : Int) : String? + def gets(delimiter : Char, limit : Int, chomp = false) : String? raise ArgumentError.new "negative limit" if limit < 0 # # If the char's representation is a single byte and we have an encoding, # search the delimiter in the buffer if delimiter.ascii? && (decoder = decoder()) - return decoder.gets(self, delimiter.ord.to_u8, limit) + return decoder.gets(self, delimiter.ord.to_u8, limit: limit, chomp: chomp) end + chomp_rn = delimiter == '\n' && chomp + buffer = String::Builder.new total = 0 while true @@ -602,8 +609,31 @@ module IO char, char_bytesize = info - buffer << char - break if char == delimiter + # Consider the case of \r\n when the delimiter is \n and chomp = true + if chomp_rn && char == '\r' + info2 = read_char_with_bytesize + unless info2 + buffer << char + break + end + + char2, char_bytesize2 = info2 + if char2 == '\n' + break + end + + buffer << '\r' + total += char_bytesize + break if total >= limit + + buffer << char2 + total += char_bytesize2 + elsif char == delimiter + buffer << char unless chomp + break + else + buffer << char + end total += char_bytesize break if total >= limit @@ -620,7 +650,7 @@ module IO # io.gets("wo") # => "rld" # io.gets("wo") # => nil # ``` - def gets(delimiter : String) : String? + def gets(delimiter : String, chomp = false) : String? # Empty string: read all if delimiter.empty? return gets_to_end @@ -628,12 +658,12 @@ module IO # One byte: use gets(Char) if delimiter.bytesize == 1 - return gets(delimiter.unsafe_byte_at(0).unsafe_chr) + return gets(delimiter.unsafe_byte_at(0).unsafe_chr, chomp: chomp) end # One char: use gets(Char) if delimiter.size == 1 - return gets(delimiter[0]) + return gets(delimiter[0], chomp: chomp) end # The 'hard' case: we read until we match the last byte, @@ -649,9 +679,12 @@ module IO buffer.write_byte(byte) total_bytes += 1 - break if (byte == last_byte) && - (buffer.bytesize >= delimiter.bytesize) && - (buffer.buffer + total_bytes - delimiter.bytesize).memcmp(delimiter.to_unsafe, delimiter.bytesize) == 0 + if (byte == last_byte) && + (buffer.bytesize >= delimiter.bytesize) && + (buffer.buffer + total_bytes - delimiter.bytesize).memcmp(delimiter.to_unsafe, delimiter.bytesize) == 0 + buffer.back(delimiter.bytesize) if chomp + break + end end buffer.to_s end @@ -771,7 +804,7 @@ module IO # iter.next # => "world" # ``` def each_line(*args, **options) - LineIterator.new(self, args, **options) + LineIterator.new(self, args, options) end # Inovkes the given block with each `Char` in this IO. @@ -918,14 +951,14 @@ module IO limit - remaining end - private struct LineIterator(I, A) + private struct LineIterator(I, A, N) include Iterator(String) - def initialize(@io : I, @args : A) + def initialize(@io : I, @args : A, @nargs : N) end def next - @io.gets(*@args) || stop + @io.gets(*@args, **@nargs) || stop end def rewind diff --git a/src/io/argf.cr b/src/io/argf.cr index 13a14b4f7bad..481554e3508c 100644 --- a/src/io/argf.cr +++ b/src/io/argf.cr @@ -32,25 +32,25 @@ class IO::ARGF end # :nodoc: - def gets(delimiter : Char, limit : Int) : String? + def gets(delimiter : Char, limit : Int, chomp = false) : String? return super if @encoding first_initialize unless @initialized if current_io = @current_io - string = current_io.gets(delimiter, limit) + string = current_io.gets(delimiter, limit, chomp) if !string && !@read_from_stdin current_io.close if @argv.empty? @current_io = nil else read_next_argv - string = gets(delimiter, limit) + string = gets(delimiter, limit, chomp) end end elsif !@read_from_stdin && !@argv.empty? read_next_argv - string = gets(delimiter, limit) + string = gets(delimiter, limit, chomp) else string = nil end diff --git a/src/io/buffered.cr b/src/io/buffered.cr index 6a264240ea2b..ce641ad82bba 100644 --- a/src/io/buffered.cr +++ b/src/io/buffered.cr @@ -30,7 +30,7 @@ module IO::Buffered abstract def unbuffered_rewind # :nodoc: - def gets(delimiter : Char, limit : Int) + def gets(delimiter : Char, limit : Int, chomp = false) check_open if delimiter.ord >= 128 || @encoding @@ -61,8 +61,18 @@ module IO::Buffered index += 1 end + advance = index + + if chomp && index > 0 && @in_buffer_rem[index - 1] === delimiter_byte + index -= 1 + + if delimiter == '\n' && index > 0 && @in_buffer_rem[index - 1] === '\r' + index -= 1 + end + end + string = String.new(@in_buffer_rem[0, index]) - @in_buffer_rem += index + @in_buffer_rem += advance return string end @@ -101,6 +111,7 @@ module IO::Buffered break end end + buffer.chomp!(delimiter_byte) if chomp end end diff --git a/src/io/encoding.cr b/src/io/encoding.cr index 012397b67b02..7db14e147bc5 100644 --- a/src/io/encoding.cr +++ b/src/io/encoding.cr @@ -156,7 +156,7 @@ module IO count end - def gets(io, delimiter : UInt8, limit : Int) + def gets(io, delimiter : UInt8, limit : Int, chomp) read(io) return nil if @out_slice.empty? @@ -169,16 +169,12 @@ module IO index += 1 end - string = String.new(@out_slice[0, index]) - advance(index) - return string + return gets_index(index, delimiter, chomp) end # Check if there's limit bytes in the out slice if @out_slice.size >= limit - string = String.new(@out_slice[0, limit]) - advance(limit) - return string + return gets_index(limit, delimiter, chomp) end # We need to read from the out_slice into a String until we find that byte, @@ -208,9 +204,26 @@ module IO end end end + str.chomp!(delimiter) if chomp end end + private def gets_index(index, delimiter, chomp) + advance_increment = index + + if chomp && index > 0 && @out_slice[index - 1] === delimiter + index -= 1 + + if delimiter === '\n' && index > 0 && @out_slice[index - 1] === '\r' + index -= 1 + end + end + + string = String.new(@out_slice[0, index]) + advance(advance_increment) + string + end + def write(io) io.write @out_slice @out_slice = Slice.new(Pointer(UInt8).null, 0) diff --git a/src/io/memory.cr b/src/io/memory.cr index 48f9c671ba31..1d00cef38d5f 100644 --- a/src/io/memory.cr +++ b/src/io/memory.cr @@ -132,7 +132,7 @@ class IO::Memory end # :nodoc: - def gets(delimiter : Char, limit : Int32) + def gets(delimiter : Char, limit : Int32, chomp = false) return super if @encoding || delimiter.ord >= 128 check_open @@ -155,8 +155,18 @@ class IO::Memory end end + advance = index + + if chomp && index > 0 && (@buffer + @pos + index - 1).value === delimiter + index -= 1 + + if delimiter == '\n' && index > 0 && (@buffer + @pos + index - 1).value === '\r' + index -= 1 + end + end + string = String.new(@buffer + @pos, index) - @pos += index + @pos += advance string end diff --git a/src/io/sized.cr b/src/io/sized.cr index dfc0b87191bb..0e1a30bc4085 100644 --- a/src/io/sized.cr +++ b/src/io/sized.cr @@ -49,14 +49,19 @@ module IO end end - def gets(delimiter : Char, limit : Int) : String? + def gets(delimiter : Char, limit : Int, chomp = false) : String? check_open return super if @encoding return nil if @read_remaining == 0 + # We can't pass chomp here, because it will remove part of the delimiter + # and then we won't know how much we consumed from @io, so we chomp later string = @io.gets(delimiter, Math.min(limit, @read_remaining)) - @read_remaining -= string.bytesize if string + if string + @read_remaining -= string.bytesize + string = string.chomp(delimiter) if chomp + end string end diff --git a/src/string.cr b/src/string.cr index b4b6effe8802..b8a7ab212191 100644 --- a/src/string.cr +++ b/src/string.cr @@ -994,7 +994,9 @@ class String # "hello".chomp('a') # => "hello" # ``` def chomp(char : Char) - if ends_with?(char) + if char == '\n' + chomp + elsif ends_with?(char) unsafe_byte_slice_string(0, bytesize - char.bytesize) else self @@ -2657,9 +2659,9 @@ class String ary end - def lines + def lines(chomp = true) lines = [] of String - each_line do |line| + each_line(chomp: chomp) do |line| lines << line end lines @@ -2678,11 +2680,21 @@ class String # # => EVEN THE MONKEY SEEMS TO want # # => A LITTLE COAT OF STRAW # ``` - def each_line + def each_line(chomp = true) + return if empty? + offset = 0 while byte_index = byte_index('\n'.ord.to_u8, offset) - yield unsafe_byte_slice_string(offset, byte_index + 1 - offset) + count = byte_index - offset + 1 + if chomp + count -= 1 + if offset + count > 0 && to_unsafe[offset + count - 1] === '\r' + count -= 1 + end + end + + yield unsafe_byte_slice_string(offset, count) offset = byte_index + 1 end @@ -2692,8 +2704,8 @@ class String end # Returns an `Iterator` which yields each line of this string (see `String#each_line`). - def each_line - LineIterator.new(self) + def each_line(chomp = true) + LineIterator.new(self, chomp) end # Converts camelcase boundaries to underscores. @@ -3452,11 +3464,7 @@ class String private class LineIterator include Iterator(String) - @string : String - @offset : Int32 - @end : Bool - - def initialize(@string) + def initialize(@string : String, @chomp : Bool) @offset = 0 @end = false end @@ -3466,7 +3474,15 @@ class String byte_index = @string.byte_index('\n'.ord.to_u8, @offset) if byte_index - value = @string.unsafe_byte_slice_string(@offset, byte_index + 1 - @offset) + count = byte_index - @offset + 1 + if @chomp + count -= 1 + if @offset + count > 0 && @string.to_unsafe[@offset + count - 1] === '\r' + count -= 1 + end + end + + value = @string.unsafe_byte_slice_string(@offset, count) @offset = byte_index + 1 else if @offset == @string.bytesize diff --git a/src/string/builder.cr b/src/string/builder.cr index bb13678057a4..2f5f691d1557 100644 --- a/src/string/builder.cr +++ b/src/string/builder.cr @@ -7,8 +7,8 @@ class String::Builder include IO getter bytesize : Int32 - @capacity : Int32 - @buffer : Pointer(UInt8) + getter capacity : Int32 + getter buffer : Pointer(UInt8) def initialize(capacity : Int = 64) String.check_capacity_in_bounds(capacity) @@ -61,6 +61,29 @@ class String::Builder @bytesize == 0 end + # Chomps the last byte from the string buffer. + # If the byte is '\n' and there's a '\r' before it, it is + # also removed. + def chomp!(byte : UInt8) + if bytesize > 0 && buffer[bytesize - 1] == byte + back(1) + + if byte === '\n' && bytesize > 0 && buffer[bytesize - 1] === '\r' + back(1) + end + end + end + + # Moves the write pointer, and the resulting string bytesize, + # by the given amount + def back(amount : Int) + unless 0 <= amount < @bytesize + raise ArgumentError.new "invalid back amount" + end + + @bytesize -= amount + end + def to_s raise "can only invoke 'to_s' once on String::Builder" if @finished @finished = true