Skip to content

Commit

Permalink
Include offending line number when processing CSV file fails (#6653)
Browse files Browse the repository at this point in the history
  • Loading branch information
findepi authored Oct 31, 2024
1 parent 1621350 commit 5a06eec
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 5 deletions.
21 changes: 16 additions & 5 deletions arrow-csv/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,25 @@ use arrow_schema::ArrowError;
fn map_csv_error(error: csv::Error) -> ArrowError {
match error.kind() {
csv::ErrorKind::Io(error) => ArrowError::CsvError(error.to_string()),
csv::ErrorKind::Utf8 { pos: _, err } => ArrowError::CsvError(format!(
"Encountered UTF-8 error while reading CSV file: {err}"
csv::ErrorKind::Utf8 { pos, err } => ArrowError::CsvError(format!(
"Encountered UTF-8 error while reading CSV file: {}{}",
err,
pos.as_ref()
.map(|pos| format!(" at line {}", pos.line()))
.unwrap_or_default(),
)),
csv::ErrorKind::UnequalLengths {
expected_len, len, ..
pos,
expected_len,
len,
} => ArrowError::CsvError(format!(
"Encountered unequal lengths between records on CSV file. Expected {len} \
records, found {expected_len} records"
"Encountered unequal lengths between records on CSV file. Expected {} \
records, found {} records{}",
len,
expected_len,
pos.as_ref()
.map(|pos| format!(" at line {}", pos.line()))
.unwrap_or_default(),
)),
_ => ArrowError::CsvError("Error reading CSV file".to_string()),
}
Expand Down
16 changes: 16 additions & 0 deletions arrow-csv/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2591,6 +2591,22 @@ mod tests {
}
}

#[test]
fn test_record_length_mismatch() {
let csv = "\
a,b,c\n\
1,2,3\n\
4,5\n\
6,7,8";
let mut read = Cursor::new(csv.as_bytes());
let result = Format::default()
.with_header(true)
.infer_schema(&mut read, None);
assert!(result.is_err());
// Include line number in the error message to help locate and fix the issue
assert_eq!(result.err().unwrap().to_string(), "Csv error: Encountered unequal lengths between records on CSV file. Expected 2 records, found 3 records at line 3");
}

#[test]
fn test_comment() {
let schema = Schema::new(vec![
Expand Down

0 comments on commit 5a06eec

Please sign in to comment.