Skip to content

Commit

Permalink
Merge pull request #1486 from bonjune/csv-schema-multiline
Browse files Browse the repository at this point in the history
  • Loading branch information
cartermp authored May 28, 2023
2 parents bbe54ed + 21c5f26 commit 5088a47
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 9 deletions.
44 changes: 36 additions & 8 deletions src/FSharp.Data.Csv.Core/CsvInference.fs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module FSharp.Data.Runtime.CsvInference

open System
open System.IO
open System.Text
open System.Text.RegularExpressions
open FSharp.Data
open FSharp.Data.Runtime
Expand Down Expand Up @@ -145,28 +146,55 @@ let internal parseHeaders headers numberOfColumns schema unitsOfMeasureProvider
header)
| None -> Array.init numberOfColumns (fun i -> "Column" + (i + 1).ToString())

let readSchema (reader: StringReader) =
let schemas = ResizeArray<string>()
let chars = StringBuilder()

let (|Comma|_|) chr = if char chr = ',' then Some() else None
let (|Quote|_|) chr = if char chr = '"' then Some() else None
let (|Char|) c = char c

let rec iter () =
match reader.Read() with
| -1 ->
schemas.Add(chars.ToString())
()
// Skips quote character ('"')
| Quote -> iter ()
// At comma(,), commits the current characters in the builder
| Comma ->
schemas.Add(chars.ToString())
chars.Clear() |> ignore
iter ()
// Skips CR/LF characters
| Char '\r'
| Char '\n' -> iter ()
| Char c ->
chars.Append(c) |> ignore
iter ()

iter ()
schemas

// If the schema is specified explicitly, then parse the schema
// (This can specify just types, names of columns or a mix of both)
let schema =
if String.IsNullOrWhiteSpace schema then
Array.zeroCreate headers.Length
else
use reader = new StringReader(schema.Replace("\n", ""))
use reader = new StringReader(schema)

let schemaStr =
CsvReader.readCsvFile reader "," '"'
|> Seq.exactlyOne
|> fst
let schemaStr = readSchema reader

if schemaStr.Length > headers.Length then
if schemaStr.Count > headers.Length then
failwithf
"The provided schema contains %d columns, the inference found %d columns - please check the number of columns and the separator "
schemaStr.Length
schemaStr.Count
headers.Length

let schema = Array.zeroCreate headers.Length

for index = 0 to schemaStr.Length - 1 do
for index = 0 to schemaStr.Count - 1 do
let item = schemaStr.[index].Trim()

match item with
Expand Down
3 changes: 2 additions & 1 deletion tests/FSharp.Data.Tests/CsvProvider.fs
Original file line number Diff line number Diff line change
Expand Up @@ -670,5 +670,6 @@ let ``Can infer from a multiline schema`` () =
ProductQuantity (string),
ProductPrice (string)">.GetSample ()
let firstRow = csv.Rows |> Seq.head
csv.NumberOfColumns |> should equal 16
firstRow.OrderCreated |> should equal "2022-01-01 10:00:00"

firstRow.FioFull |> should equal "John Smith"

0 comments on commit 5088a47

Please sign in to comment.