From e29ffe7f822eb226e7f7a483f1a7be80d5c5b8a0 Mon Sep 17 00:00:00 2001 From: chopraanmol1 Date: Fri, 5 Oct 2018 13:03:30 +0530 Subject: [PATCH] Restore support of URL for CSV ### Benchmark ``` file_name = 'test/files/Bibelbund.csv' MemoryProfiler.report{ Roo::Spreadsheet.open(file_name).tap{|x|(2..x.last_row).each{|i| x.row(i)}} } puts Benchmark.measure{ Roo::Spreadsheet.open(file_name).tap{|x|(2..x.last_row).each{|i| x.row(i)}} } ``` Master ``` Total allocated: 39705265 bytes (561479 objects) Total retained: 768 bytes (4 objects) 0.300000 0.000000 0.300000 ( 0.304877) ``` Modified: ``` Total allocated: 16952085 bytes (234487 objects) Total retained: 768 bytes (4 objects) 0.190000 0.000000 0.190000 ( 0.181199) ``` --- lib/roo/csv.rb | 32 ++++++++++++++++++++------------ test/roo/test_csv.rb | 28 ++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/lib/roo/csv.rb b/lib/roo/csv.rb index 51a28d00..516def6a 100644 --- a/lib/roo/csv.rb +++ b/lib/roo/csv.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require "csv" require "time" @@ -63,25 +65,31 @@ def celltype_class(value) def read_cells(sheet = default_sheet) sheet ||= default_sheet return if @cells_read[sheet] - set_row_count(sheet) - set_column_count(sheet) - row_num = 1 + row_num = 0 + max_col_num = 0 each_row csv_options do |row| - row.each_with_index do |elem, col_num| - coordinate = [row_num, col_num + 1] + row_num += 1 + col_num = 0 + + row.each do |elem| + col_num += 1 + coordinate = [row_num, col_num] @cell[coordinate] = elem @cell_type[coordinate] = celltype_class(elem) end - row_num += 1 + + max_col_num = col_num if col_num > max_col_num end + set_row_count(sheet, row_num) + set_column_count(sheet, max_col_num) @cells_read[sheet] = true end def each_row(options, &block) if uri?(filename) - each_row_using_temp_dir(filename) + each_row_using_tempdir(options, &block) elsif is_stream?(filename_or_stream) ::CSV.new(filename_or_stream, options).each(&block) else @@ -89,24 +97,24 @@ def each_row(options, &block) end end - def each_row_using_tempdir + def each_row_using_tempdir(options, &block) ::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV["ROO_TMP"]) do |tmpdir| tmp_filename = download_uri(filename, tmpdir) ::CSV.foreach(tmp_filename, options, &block) end end - def set_row_count(sheet) + def set_row_count(sheet, last_row) @first_row[sheet] = 1 - @last_row[sheet] = ::CSV.readlines(@filename, csv_options).size + @last_row[sheet] = last_row @last_row[sheet] = @first_row[sheet] if @last_row[sheet].zero? nil end - def set_column_count(sheet) + def set_column_count(sheet, last_col) @first_column[sheet] = 1 - @last_column[sheet] = (::CSV.readlines(@filename, csv_options).max_by(&:length) || []).size + @last_column[sheet] = last_col @last_column[sheet] = @first_column[sheet] if @last_column[sheet].zero? nil diff --git a/test/roo/test_csv.rb b/test/roo/test_csv.rb index c5e251df..16fa6911 100644 --- a/test/roo/test_csv.rb +++ b/test/roo/test_csv.rb @@ -13,6 +13,34 @@ def test_sheets end end + def test_download_uri_with_query_string + file = filename("Bibelbund") + port = 12_344 + url = "#{local_server(port)}/#{file}?query-param=value" + + start_local_server(file, port) do + csv = roo_class.new(url) + assert_equal "Aktuelle Seite", csv.cell("h", 12) + assert_equal 1, csv.first_row + assert_equal 3735, csv.last_row + assert_equal 1, csv.first_column + assert_equal 8, csv.last_column + end + end + + def test_open_stream + file = filename("Bibelbund") + file_contents = File.read File.join(TESTDIR, file) + stream = StringIO.new(file_contents) + csv = roo_class.new(stream) + + assert_equal "Aktuelle Seite", csv.cell("h", 12) + assert_equal 1, csv.first_row + assert_equal 3735, csv.last_row + assert_equal 1, csv.first_column + assert_equal 8, csv.last_column + end + def test_nil_rows_and_lines_csv # x_123 oo = Roo::CSV.new(File.join(TESTDIR,'Bibelbund.csv'))