From 6ef3352fb81970f925dd33eac52b20d035bc1051 Mon Sep 17 00:00:00 2001 From: Zeljko Predjeskovic Date: Mon, 9 Dec 2024 20:11:13 +0100 Subject: [PATCH] added updated parser_to_h and added test cases --- spec/std/csv/csv_parse_spec.cr | 31 +++++++++++++++++++++++++++---- src/csv/lexer.cr | 3 +++ src/csv/parser.cr | 20 +++++++++++++------- 3 files changed, 43 insertions(+), 11 deletions(-) diff --git a/spec/std/csv/csv_parse_spec.cr b/spec/std/csv/csv_parse_spec.cr index c93ae5be2b53..3e505cd349ea 100644 --- a/spec/std/csv/csv_parse_spec.cr +++ b/spec/std/csv/csv_parse_spec.cr @@ -50,16 +50,39 @@ describe CSV do end it "parses to hashes" do - csv_text = "Index,Customer Id,First Name,Last Name - 1,DD37Cf93aecA6Dc,Sheryl,Baxter - 2,1Ef7b82A4CAAD10,Preston,Lozano - 3,6F94879bDAfE5a6,,Berry" + csv_text = "Index,Customer Id,First Name,Last Name\n\n1,DD37Cf93aecA6Dc,Sheryl,Baxter\n2,1Ef7b82A4CAAD10,Preston,Lozano\n3,6F94879bDAfE5a6,,Berry, Jerry, \n" CSV.parse_to_h(csv_text).should eq([{"Index" => "1", "Customer Id" => "DD37Cf93aecA6Dc", "First Name" => "Sheryl", "Last Name" => "Baxter"}, {"Index" => "2", "Customer Id" => "1Ef7b82A4CAAD10", "First Name" => "Preston", "Last Name" => "Lozano"}, {"Index" => "3", "Customer Id" => "6F94879bDAfE5a6", "First Name" => "", "Last Name" => "Berry"}]) end + it "parses to hashes with no headers" do + csv_text = "\n1,DD37Cf93aecA6Dc,Sheryl,Baxter\n2,1Ef7b82A4CAAD10,Preston,Lozano\n3,6F94879bDAfE5a6,,Berry" + + actual = [{} of String => String, {} of String => String, {} of String => String] + + CSV.parse_to_h(csv_text).should eq(actual) + end + + it "parses to hashes with only headers" do + csv_text = "Index,Customer Id,First Name,Last Name" + + CSV.parse_to_h(csv_text).should eq([] of Hash(String, String)) + end + + it "parses to hashes remaining rows" do + csv_text = "Index,Customer Id,First Name,Last Name\n1,DD37Cf93aecA6Dc,Sheryl,Baxter\n2,1Ef7b82A4CAAD10,Preston,Lozano\n3,6F94879bDAfE5a6,,Berry" + parser = CSV::Parser.new(csv_text) + # skip header + parser.next_row + # skip rows + parser.next_row + parser.next_row + + parser.parse_to_h.should eq([{"Index" => "3", "Customer Id" => "6F94879bDAfE5a6", "First Name" => "", "Last Name" => "Berry"}]) + end + it "raises if single quote in the middle" do expect_raises CSV::MalformedCSVError, "Unexpected quote at line 1, column 4" do CSV.parse(%(hel"lo)) diff --git a/src/csv/lexer.cr b/src/csv/lexer.cr index 9d3d04c68c0f..5109f630a7e0 100644 --- a/src/csv/lexer.cr +++ b/src/csv/lexer.cr @@ -29,6 +29,9 @@ abstract class CSV::Lexer getter separator : Char getter quote_char : Char + # :nodoc: + protected getter line_number : Int32 + # :nodoc: def initialize(@separator : Char = DEFAULT_SEPARATOR, @quote_char : Char = DEFAULT_QUOTE_CHAR) @token = Token.new diff --git a/src/csv/parser.cr b/src/csv/parser.cr index 32d0931d921e..a3460b302fe7 100644 --- a/src/csv/parser.cr +++ b/src/csv/parser.cr @@ -21,9 +21,17 @@ class CSV::Parser def parse_to_h : Array(Hash(String, String)) rows = [] of Hash(String, String) + row_number = @lexer.line_number + + rewind if headers = next_row - while row = next_row - rows << parse_row_to_h_internal(headers, row) + while @lexer.line_number < row_number + next_row + end + each_row do |row| + if parsed_row = parse_row_to_h_internal(headers, row) + rows << parsed_row + end end end rows @@ -81,12 +89,10 @@ class CSV::Parser end end - private def parse_row_to_h_internal(headers : Array(String), row : Array(String)) : Hash(String, String) + private def parse_row_to_h_internal(headers : Array(String), row : Array(String)) : Hash(String, String) | Nil h = {} of String => String - headers.each_with_index do |header, i| - h[header] = row[i].strip || "" - end - h + row.empty? ? return nil : headers.each_with_index { |header, i| h[header] = row[i] } + return h end private struct RowIterator