From ff5ed33cf94fc3304bb18a0237976744772da495 Mon Sep 17 00:00:00 2001 From: zeljko predjeskovic Date: Sun, 8 Dec 2024 11:17:42 +0100 Subject: [PATCH] CSV.parse - Support Headers --- spec/std/csv/csv_parse_spec.cr | 11 +++++++++++ src/csv.cr | 18 ++++++++++++++++++ src/csv/parser.cr | 18 ++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/spec/std/csv/csv_parse_spec.cr b/spec/std/csv/csv_parse_spec.cr index 5f9906f35333..c93ae5be2b53 100644 --- a/spec/std/csv/csv_parse_spec.cr +++ b/spec/std/csv/csv_parse_spec.cr @@ -49,6 +49,17 @@ describe CSV do CSV.parse(%("","")).should eq([["", ""]]) end + it "parses to hashes" do + csv_text = "Index,Customer Id,First Name,Last Name + 1,DD37Cf93aecA6Dc,Sheryl,Baxter + 2,1Ef7b82A4CAAD10,Preston,Lozano + 3,6F94879bDAfE5a6,,Berry" + + CSV.parse_to_h(csv_text).should eq([{"Index" => "1", "Customer Id" => "DD37Cf93aecA6Dc", "First Name" => "Sheryl", "Last Name" => "Baxter"}, + {"Index" => "2", "Customer Id" => "1Ef7b82A4CAAD10", "First Name" => "Preston", "Last Name" => "Lozano"}, + {"Index" => "3", "Customer Id" => "6F94879bDAfE5a6", "First Name" => "", "Last Name" => "Berry"}]) + end + it "raises if single quote in the middle" do expect_raises CSV::MalformedCSVError, "Unexpected quote at line 1, column 4" do CSV.parse(%(hel"lo)) diff --git a/src/csv.cr b/src/csv.cr index 6751085d28cc..2a3c5dd79f67 100644 --- a/src/csv.cr +++ b/src/csv.cr @@ -78,6 +78,24 @@ class CSV Parser.new(string_or_io, separator, quote_char).parse end + # Parses a CSV or `IO` into an array of hashes using the first row as headers. + # + # Takes optional *separator* and *quote_char* arguments for defining + # non-standard csv cell separators and quote characters. + # + # ``` + # require "csv" + # + # CSV.parse_to_hashes("name,age,city\nJohn,30,New York\nJane,25,San Francisco") + # # => [ + # # {"name" => "John", "age" => "30", "city" => "New York"}, + # # {"name" => "Jane", "age" => "25", "city" => "San Francisco"} + # # ] + # ``` + def self.parse_to_h(string_or_io : String | IO, separator : Char = DEFAULT_SEPARATOR, quote_char : Char = DEFAULT_QUOTE_CHAR) : Array(Hash(String, String)) + Parser.new(string_or_io, separator, quote_char).parse_to_h + end + # Yields each of a CSV's rows as an `Array(String)`. # # See `CSV.parse` about the *separator* and *quote_char* arguments. diff --git a/src/csv/parser.cr b/src/csv/parser.cr index 57491b726dce..32d0931d921e 100644 --- a/src/csv/parser.cr +++ b/src/csv/parser.cr @@ -19,6 +19,16 @@ class CSV::Parser rows end + def parse_to_h : Array(Hash(String, String)) + rows = [] of Hash(String, String) + if headers = next_row + while row = next_row + rows << parse_row_to_h_internal(headers, row) + end + end + rows + end + # Yields each of the remaining rows as an `Array(String)`. def each_row(&) : Nil while row = next_row @@ -71,6 +81,14 @@ class CSV::Parser end end + private def parse_row_to_h_internal(headers : Array(String), row : Array(String)) : Hash(String, String) + h = {} of String => String + headers.each_with_index do |header, i| + h[header] = row[i].strip || "" + end + h + end + private struct RowIterator include Iterator(Array(String))