Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow CSV.open with StringIO argument #302

Merged
merged 16 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 56 additions & 18 deletions lib/csv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1508,10 +1508,8 @@ def generate_lines(rows, **options)

#
# :call-seq:
# open(file_path, mode = "rb", **options ) -> new_csv
# open(io, mode = "rb", **options ) -> new_csv
# open(file_path, mode = "rb", **options ) { |csv| ... } -> object
# open(io, mode = "rb", **options ) { |csv| ... } -> object
# open(path_or_io, mode = "rb", **options ) -> new_csv
# open(path_or_io, mode = "rb", **options ) { |csv| ... } -> object
#
# possible options elements:
# keyword form:
Expand All @@ -1520,7 +1518,7 @@ def generate_lines(rows, **options)
# :undef => :replace # replace undefined conversion
# :replace => string # replacement string ("?" or "\uFFFD" if not specified)
#
# * Argument +path+, if given, must be the path to a file.
# * Argument +path_or_io+, must be a file path or an \IO stream.
# :include: ../doc/csv/arguments/io.rdoc
# * Argument +mode+, if given, must be a \File mode.
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
Expand All @@ -1544,6 +1542,9 @@ def generate_lines(rows, **options)
# path = 't.csv'
# File.write(path, string)
#
# string_io = StringIO.new
# string_io << "foo,0\nbar,1\nbaz,2\n"
#
# ---
#
# With no block given, returns a new \CSV object.
Expand All @@ -1556,6 +1557,9 @@ def generate_lines(rows, **options)
# csv = CSV.open(File.open(path))
# csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
#
# Create a \CSV object using a \StringIO:
# csv = CSV.open(string_io)
# csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
# ---
#
# With a block given, calls the block with the created \CSV object;
Expand All @@ -1573,16 +1577,24 @@ def generate_lines(rows, **options)
# Output:
# #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
#
# Using a \StringIO:
# csv = CSV.open(string_io) {|csv| p csv}
# csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
# Output:
# #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
# ---
#
# Raises an exception if the argument is not a \String object or \IO object:
# # Raises TypeError (no implicit conversion of Symbol into String)
# CSV.open(:foo)
def open(filename, mode="r", **options)
def open(filename_or_io, mode="r", **options)
# wrap a File opened with the remaining +args+ with no newline
# decorator
file_opts = {}
may_enable_bom_deletection_automatically(mode, options, file_opts)
may_enable_bom_detection_automatically(filename_or_io,
mode,
options,
file_opts)
file_opts.merge!(options)
unless file_opts.key?(:newline)
file_opts[:universal_newline] ||= false
Expand All @@ -1592,14 +1604,19 @@ def open(filename, mode="r", **options)
options.delete(:replace)
options.delete_if {|k, _| /newline\z/.match?(k)}

begin
f = File.open(filename, mode, **file_opts)
rescue ArgumentError => e
raise unless /needs binmode/.match?(e.message) and mode == "r"
mode = "rb"
file_opts = {encoding: Encoding.default_external}.merge(file_opts)
retry
if filename_or_io.is_a?(StringIO)
f = create_stringio(filename_or_io.string, mode, **file_opts)
else
begin
f = File.open(filename_or_io, mode, **file_opts)
rescue ArgumentError => e
raise unless /needs binmode/.match?(e.message) and mode == "r"
mode = "rb"
file_opts = {encoding: Encoding.default_external}.merge(file_opts)
retry
end
end

begin
csv = new(f, **options)
rescue Exception
Expand Down Expand Up @@ -1886,16 +1903,37 @@ def table(path, **options)
private_constant :ON_WINDOWS

private
def may_enable_bom_deletection_automatically(mode, options, file_opts)
# "bom|utf-8" may be buggy on Windows:
# https://bugs.ruby-lang.org/issues/20526
return if ON_WINDOWS
def may_enable_bom_detection_automatically(filename_or_io,
mode,
options,
file_opts)
if filename_or_io.is_a?(StringIO)
# Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support:
# https://github.com/ruby/stringio/pull/47
return if RUBY_VERSION < "2.7"
else
# "bom|utf-8" may be buggy on Windows:
# https://bugs.ruby-lang.org/issues/20526
return if ON_WINDOWS
end
return unless Encoding.default_external == Encoding::UTF_8
return if options.key?(:encoding)
return if options.key?(:external_encoding)
return if mode.include?(":")
file_opts[:encoding] = "bom|utf-8"
end

if RUBY_VERSION < "2.7"
def create_stringio(str, mode, opts)
opts.delete_if { |k, _| k == :universal_newline or DEFAULT_OPTIONS.key?(k) }
kou marked this conversation as resolved.
Show resolved Hide resolved
raise ArgumentError, "Unsupported options parsing StringIO: #{opts.keys}" unless opts.empty?
StringIO.new(str, mode)
end
else
def create_stringio(str, mode, opts)
StringIO.new(str, mode, **opts)
end
end
end

# :call-seq:
Expand Down
18 changes: 18 additions & 0 deletions test/csv/interface/test_read.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,24 @@ def test_foreach
assert_equal(@rows, rows)
end

def test_foreach_stringio
string_io = StringIO.new(@data)
rows = CSV.foreach(string_io, col_sep: "\t", row_sep: "\r\n").to_a
assert_equal(@rows, rows)
end

def test_foreach_stringio_with_bom
if RUBY_VERSION < "2.7"
# Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support:
# https://github.com/ruby/stringio/pull/47
omit("StringIO's BOM support isn't available with Ruby < 2.7")
end

string_io = StringIO.new("\ufeff#{@data}") # U+FEFF ZERO WIDTH NO-BREAK SPACE
rows = CSV.foreach(string_io, col_sep: "\t", row_sep: "\r\n").to_a
assert_equal(@rows, rows)
end

if respond_to?(:ractor)
ractor
def test_foreach_in_ractor
Expand Down