-
Notifications
You must be signed in to change notification settings - Fork 9
/
DataSourceExamples.java
159 lines (127 loc) · 6.59 KB
/
DataSourceExamples.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
package io.zulia.data.test;
import io.zulia.data.input.FileDataInputStream;
import io.zulia.data.source.spreadsheet.SpreadsheetRecord;
import io.zulia.data.source.spreadsheet.SpreadsheetSource;
import io.zulia.data.source.spreadsheet.SpreadsheetSourceFactory;
import io.zulia.data.source.spreadsheet.csv.CSVRecord;
import io.zulia.data.source.spreadsheet.csv.CSVSource;
import io.zulia.data.source.spreadsheet.csv.CSVSourceConfig;
import io.zulia.data.source.spreadsheet.excel.DefaultExcelCellHandler;
import io.zulia.data.source.spreadsheet.excel.ExcelRecord;
import io.zulia.data.source.spreadsheet.excel.ExcelSource;
import io.zulia.data.source.spreadsheet.excel.ExcelSourceConfig;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import java.io.IOException;
import java.util.Date;
import java.util.List;
import java.util.SequencedSet;
public class DataSourceExamples {
public static void genericSpreadsheetHandling() throws IOException {
FileDataInputStream dataInputStream = FileDataInputStream.from("/data/test.csv"); // xls, xlsx and tsv also supported by SpreadsheetSourceFactory
try (SpreadsheetSource<?> dataSource = SpreadsheetSourceFactory.fromStreamWithHeaders(dataInputStream)) { //reads first line as headers
// optionally do something with the headers but not required to use headers below
SequencedSet<String> headers = dataSource.getHeaders();
for (SpreadsheetRecord spreadsheetRecord : dataSource) {
String firstColumn = spreadsheetRecord.getString(0); // access value in first column not relying on headers
String title = spreadsheetRecord.getString("title"); // can access by header name because headers were read on open
Integer year = spreadsheetRecord.getInt("year");
Float rating = spreadsheetRecord.getFloat("rating");
Boolean recommended = spreadsheetRecord.getBoolean("recommended");
Date dateAdded = spreadsheetRecord.getDate("dateAdded");
List<String> labels = spreadsheetRecord.getList("labels", String.class);
}
}
dataInputStream = FileDataInputStream.from("/data/test.tsv"); // xls, xlsx and csv also supported by SpreadsheetSourceFactory
try (SpreadsheetSource<?> dataSource = SpreadsheetSourceFactory.fromStreamWithHeaders(dataInputStream)) { //reads first line as headers
// optionally do something with the headers but not required to use headers below
SequencedSet<String> headers = dataSource.getHeaders();
for (SpreadsheetRecord spreadsheetRecord : dataSource) {
String firstColumn = spreadsheetRecord.getString(0); // access value in first column not relying on headers
String title = spreadsheetRecord.getString("title"); // can access by header name because headers were read on open
Integer year = spreadsheetRecord.getInt("year");
Float rating = spreadsheetRecord.getFloat("rating");
Boolean recommended = spreadsheetRecord.getBoolean("recommended");
Date dateAdded = spreadsheetRecord.getDate("dateAdded");
List<String> labels = spreadsheetRecord.getList("labels", String.class);
}
}
try (SpreadsheetSource<?> dataSource = SpreadsheetSourceFactory.fromFileWithHeaders("/data/test.csv")) { // concise version of above
// two (or more) passes are supported by the iterator
int count = 0;
for (SpreadsheetRecord spreadsheetRecord : dataSource) {
count++;
}
// second more in depth pass using the count for progress for example
for (SpreadsheetRecord spreadsheetRecord : dataSource) {
}
}
}
public static void manualConfigurationOfCSV() throws IOException {
// manual configuration allows more flexibility than generic by more verbose
FileDataInputStream dataInputStream = FileDataInputStream.from("/data/test.csv");
CSVSourceConfig csvSourceConfig = CSVSourceConfig.from(dataInputStream);
csvSourceConfig.withHeaders();
//optionally configure these below
csvSourceConfig.withDelimiter('|'); // set alternate delimiter
csvSourceConfig.withListDelimiter(';'); // if reading a cell as a list, split on this, defaults to ;
csvSourceConfig.withDateParser(s -> {
// by default dates in format yyyy-mm-dd are supported;
// implement specialized date parsing here
return null;
});
csvSourceConfig.withBooleanParser(s -> {
// by default true,t,1,yes,y,false,f,0,no,n are supported
// implement specialized boolean parsing here
return null;
});
try (CSVSource csvSource = CSVSource.withConfig(csvSourceConfig)) {
for (CSVRecord csvRecord : csvSource) {
// Standard handling
String firstColumn = csvRecord.getString(0); // access value in first column not relying on headers
String title = csvRecord.getString("title"); // can access by header name because headers were read on open
Integer year = csvRecord.getInt("year");
Float rating = csvRecord.getFloat("rating");
Boolean recommended = csvRecord.getBoolean("recommended");
Date dateAdded = csvRecord.getDate("dateAdded");
List<String> labels = csvRecord.getList("labels", String.class);
// no special handling for CSV
}
}
}
public static void manualConfigurationWithExcel() throws IOException {
FileDataInputStream dataInputStream = FileDataInputStream.from("/data/test.xlsx"); // xlsx and xls are supported;
ExcelSourceConfig excelSourceConfig = ExcelSourceConfig.from(dataInputStream).withHeaders();
excelSourceConfig.withListDelimiter(';');
// default is DefaultExcelCellHandler but a complete custom implementation can be given or can override individual methods
excelSourceConfig.withExcelCellHandler(new DefaultExcelCellHandler() {
@Override
public Boolean cellToBoolean(Cell cell) {
// override boolean handling
return false;
}
@Override
public Float cellToFloat(Cell cell) {
// override boolean handling
return 0f;
}
});
try (ExcelSource excelSource = ExcelSource.withConfig(excelSourceConfig)) {
for (ExcelRecord excelRecord : excelSource) {
// Standard handling
String firstColumn = excelRecord.getString(0); // access value in first column not relying on headers
String title = excelRecord.getString("title"); // can access by header name because headers were read on open
Integer year = excelRecord.getInt("year");
Float rating = excelRecord.getFloat("rating");
Boolean recommended = excelRecord.getBoolean("recommended");
Date dateAdded = excelRecord.getDate("dateAdded");
List<String> labels = excelRecord.getList("labels", String.class);
//Excel specific
Row nativeRow = excelRecord.getNativeRow();
Cell titleCell = excelRecord.getCell("title");
}
}
}
public static void main(String[] args) throws IOException {
}
}