Skip to content

Commit

Permalink
Fix issue with inline stings. (#11907)
Browse files Browse the repository at this point in the history
Inline strings in Excel files were not processed correctly.
Now fixed and test added.

(cherry picked from commit bfa52b7)
  • Loading branch information
jdunkerley committed Dec 18, 2024
1 parent 6075dd7 commit 38f2582
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ public void startElement(String uri, String localName, String qName, Attributes
break;
case "is": // Inline String
isIsOpen = true;
value.setLength(0);
break;
}
}
Expand Down
Binary file added test/Table_Tests/data/Sales_Sample_Data.xlsx
Binary file not shown.
23 changes: 23 additions & 0 deletions test/Table_Tests/src/IO/Excel_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -1218,6 +1218,29 @@ add_specs suite_builder =
workbook.close . should_equal Nothing
workbook.read "Sheet1" . should_fail_with Illegal_State

group_builder.specify "should be able to read a XLSX with inline strings" <|
workbook = (enso_project.data / "Sales_Sample_Data.xlsx") . read
workbook.sheet_count . should_equal 29
workbook.sheet_names . should_equal ['202201', '202202', '202203', '202204', '202205', '202206', '202207', '202208', '202209', '202210', '202211', '202212', '202301', '202302', '202303', '202304', '202305', '202306', '202307', '202308', '202309', '202310', '202311', '202312', '202401', '202402', '202403', '202404', '202405']

column_names = ['TERRITORY', 'DEALSIZE', 'PRODUCTLINE', 'OrderCount', 'SALES']

first_sheet = workbook.read '202201'
first_sheet.column_names . should_equal column_names
first_sheet.at 'TERRITORY' . to_vector . should_equal ['EMEA', 'NA', 'EMEA', 'EMEA', 'EMEA', 'EMEA', 'NA', 'EMEA', 'EMEA', 'EMEA']
first_sheet.at 'DEALSIZE' . to_vector . should_equal ['Medium', 'Medium', 'Small', 'Small', 'Small', 'Medium', 'Small', 'Medium', 'Small', 'Medium']
first_sheet.at 'PRODUCTLINE' . to_vector . should_equal ['Classic Cars', 'Vintage Cars', 'Classic Cars', 'Vintage Cars', 'Trucks and Buses', 'Trains', 'Vintage Cars', 'Vintage Cars', 'Trains', 'Trucks and Buses']
first_sheet.at 'OrderCount' . to_vector . should_equal [7, 3, 3, 7, 2, 1, 3, 3, 1, 9]
first_sheet.at 'SALES' . to_vector . should_equal [34585.61, 13349.31, 6606.17, 15482.34, 2856.68, 3227.63, 5647.99, 12347.2, 1705.92, 33944.75]

last_sheet = workbook.read '202405'
last_sheet.column_names . should_equal column_names
last_sheet.at 'TERRITORY' . to_vector . should_equal ['NA', 'EMEA', 'EMEA', 'NA', 'EMEA', 'EMEA', 'NA', 'EMEA', 'EMEA', 'EMEA', 'NA', 'NA', 'EMEA', 'EMEA', 'EMEA', 'NA', 'EMEA', 'NA']
last_sheet.at 'DEALSIZE' . to_vector . should_equal ['Large', 'Medium', 'Small', 'Medium', 'Medium', 'Small', 'Small', 'Medium', 'Medium', 'Small', 'Medium', 'Small', 'Small', 'Small', 'Large', 'Medium', 'Large', 'Small']
last_sheet.at 'PRODUCTLINE' . to_vector . should_equal ['Classic Cars', 'Planes', 'Trucks and Buses', 'Classic Cars', 'Trucks and Buses', 'Vintage Cars', 'Vintage Cars', 'Classic Cars', 'Motorcycles', 'Classic Cars', 'Trucks and Buses', 'Trains', 'Planes', 'Motorcycles', 'Classic Cars', 'Trains', 'Trucks and Buses', 'Trucks and Buses']
last_sheet.at 'OrderCount' . to_vector . should_equal [2, 4, 2, 5, 3, 2, 1, 10, 1, 6, 3, 1, 6, 1, 3, 1, 1, 2]
last_sheet.at 'SALES' . to_vector . should_equal [16914.55, 17742.36, 3838.83, 22429.29, 14910.28, 3513.13, 2095.45, 46915.8, 4764.6, 8776.53, 14680.72, 1193.04, 12686.65, 2803.2, 28823.4, 3256.35, 8498.0, 4772.04]

group_builder.specify "should be able to read a mixed Date and DateTime column" <|
table = (enso_project.data / "MixedExcel.xlsx") . read ..Sheet
table.row_count . should_equal 5
Expand Down

0 comments on commit 38f2582

Please sign in to comment.