forked from linkedin/avro-util
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[fast-avro] FastGenericDatumReader forwards setSchema() to coldDeseri…
…alizer (linkedin#534) * TDD approach - adding unit test which should pass but it fails. * Some minor code cleanup. * [fast-avro][bugfix] Delegating setSchema() call to coldDeserializer from FastGenericDatumReader. It's needed to deserialize 1st record(s) from file using DataFileStream.
- Loading branch information
1 parent
58c9203
commit 9487a8b
Showing
9 changed files
with
145 additions
and
18 deletions.
There are no files selected for viewing
22 changes: 22 additions & 0 deletions
22
fastserde/avro-fastserde-tests-common/src/test/avro/simpleTestRecord.avsc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
"type": "record", | ||
"name": "SimpleTestRecord", | ||
"namespace": "com.linkedin.avro.fastserde.generated.avro", | ||
"doc": "Used in tests of fast-serde to verify writing records by DataFileWriter and reading by DataFileReader/DataFileStream", | ||
"fields": [ | ||
{ | ||
"name": "text", | ||
"type": "string", | ||
"default": "" | ||
}, | ||
{ | ||
"name": "fiveBytes", | ||
"type": { | ||
"name": "Fixed5", | ||
"type": "fixed", | ||
"size": 5 | ||
}, | ||
"default": "Fizyk" | ||
} | ||
] | ||
} |
88 changes: 88 additions & 0 deletions
88
...ommon/src/test/java/com/linkedin/avro/fastserde/file/FastSerdeWithDataFileStreamTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
package com.linkedin.avro.fastserde.file; | ||
|
||
import java.io.ByteArrayInputStream; | ||
import java.io.ByteArrayOutputStream; | ||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.apache.avro.Schema; | ||
import org.apache.avro.file.DataFileStream; | ||
import org.apache.avro.file.DataFileWriter; | ||
import org.apache.avro.generic.GenericRecord; | ||
import org.apache.avro.generic.IndexedRecord; | ||
import org.apache.avro.io.DatumReader; | ||
import org.testng.Assert; | ||
import org.testng.annotations.DataProvider; | ||
import org.testng.annotations.Test; | ||
|
||
import com.linkedin.avro.fastserde.FastGenericDatumReader; | ||
import com.linkedin.avro.fastserde.FastSpecificDatumReader; | ||
import com.linkedin.avro.fastserde.FastSpecificDatumWriter; | ||
import com.linkedin.avro.fastserde.generated.avro.Fixed5; | ||
import com.linkedin.avro.fastserde.generated.avro.SimpleTestRecord; | ||
import com.linkedin.avroutil1.compatibility.AvroRecordUtil; | ||
|
||
public class FastSerdeWithDataFileStreamTest { | ||
|
||
@DataProvider | ||
private Object[][] dataFileStreamDeserializationTestCases() { | ||
Schema readerSchema = SimpleTestRecord.SCHEMA$; | ||
return new Object[][]{ | ||
new Object[]{11, new FastSpecificDatumReader<>(null, readerSchema)}, | ||
new Object[]{12, new FastGenericDatumReader<GenericRecord>(null, readerSchema)}, | ||
}; | ||
} | ||
|
||
@Test(groups = "deserializationTest", dataProvider = "dataFileStreamDeserializationTestCases") | ||
<D extends IndexedRecord> void dataFileStreamShouldReadDataUsingSpecificReader(int recordsToWrite, | ||
DatumReader<D> datumReader) throws IOException { | ||
// given: records to be written to one file | ||
List<SimpleTestRecord> records = new ArrayList<>(recordsToWrite); | ||
for (byte i = 0; i < recordsToWrite; i++) { | ||
Fixed5 fiveBytes = new Fixed5(); | ||
fiveBytes.bytes(new byte[]{'K', 'r', 'i', 's', i}); | ||
|
||
SimpleTestRecord simpleTestRecord = new SimpleTestRecord(); | ||
AvroRecordUtil.setField(simpleTestRecord, "fiveBytes", fiveBytes); | ||
AvroRecordUtil.setField(simpleTestRecord, "text", "text-" + i); | ||
|
||
records.add(simpleTestRecord); | ||
} | ||
|
||
// given: bytes array representing content of persistent file with schema and multiple records | ||
byte[] bytes = writeTestRecordsToFile(records); | ||
|
||
// when: pre-populated bytes array is consumed by DataFileStream (in tests more convenient than DataFileReader | ||
// because SeekableByteArrayInput is not available for older Avro versions) | ||
ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes); | ||
DataFileStream<D> dataFileStream = new DataFileStream<>(inputStream, datumReader); | ||
|
||
// then: records read from file are the same as records sent to file | ||
int idx = 0; | ||
for (IndexedRecord recordReadFromFile : dataFileStream) { | ||
Assert.assertEquals(recordReadFromFile.toString(), records.get(idx++).toString()); | ||
} | ||
} | ||
|
||
/** | ||
* @return bytes array representing file content | ||
*/ | ||
private static byte[] writeTestRecordsToFile(List<SimpleTestRecord> records) throws IOException { | ||
Schema schema = SimpleTestRecord.SCHEMA$; | ||
FastSpecificDatumWriter<SimpleTestRecord> datumWriter = new FastSpecificDatumWriter<>(schema); | ||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); | ||
|
||
try (DataFileWriter<SimpleTestRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { | ||
dataFileWriter.create(schema, outputStream); | ||
|
||
for (SimpleTestRecord record : records) { | ||
dataFileWriter.append(record); | ||
} | ||
|
||
dataFileWriter.flush(); | ||
} | ||
|
||
return outputStream.toByteArray(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters