Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ORC-1741: Respect decimal reader isRepeating flag #1960

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -1551,7 +1551,6 @@ private void nextVector(DecimalColumnVector result,
HiveDecimalWritable[] vector = result.vector;
HiveDecimalWritable decWritable;
if (result.noNulls) {
result.isRepeating = true;
for (int r = 0; r < batchSize; ++r) {
decWritable = vector[r];
if (!decWritable.serializationUtilsRead(
Expand All @@ -1563,7 +1562,6 @@ private void nextVector(DecimalColumnVector result,
setIsRepeatingIfNeeded(result, r);
}
} else if (!result.isRepeating || !result.isNull[0]) {
result.isRepeating = true;
for (int r = 0; r < batchSize; ++r) {
if (!result.isNull[r]) {
decWritable = vector[r];
Expand Down Expand Up @@ -1595,7 +1593,6 @@ private void nextVector(DecimalColumnVector result,
HiveDecimalWritable[] vector = result.vector;
HiveDecimalWritable decWritable;
if (result.noNulls) {
result.isRepeating = true;
int previousIdx = 0;
for (int r = 0; r != filterContext.getSelectedSize(); ++r) {
int idx = filterContext.getSelected()[r];
Expand All @@ -1614,7 +1611,6 @@ private void nextVector(DecimalColumnVector result,
}
skipStreamRows(batchSize - previousIdx);
} else if (!result.isRepeating || !result.isNull[0]) {
result.isRepeating = true;
int previousIdx = 0;
for (int r = 0; r != filterContext.getSelectedSize(); ++r) {
int idx = filterContext.getSelected()[r];
Expand Down Expand Up @@ -1651,14 +1647,12 @@ private void nextVector(Decimal64ColumnVector result,
// read the scales
scaleReader.nextVector(result, scratchScaleVector, batchSize);
if (result.noNulls) {
result.isRepeating = true;
for (int r = 0; r < batchSize; ++r) {
final long scaleFactor = powerOfTenTable[scale - scratchScaleVector[r]];
result.vector[r] = SerializationUtils.readVslong(valueStream) * scaleFactor;
setIsRepeatingIfNeeded(result, r);
}
} else if (!result.isRepeating || !result.isNull[0]) {
result.isRepeating = true;
for (int r = 0; r < batchSize; ++r) {
if (!result.isNull[r]) {
final long scaleFactor = powerOfTenTable[scale - scratchScaleVector[r]];
Expand Down Expand Up @@ -1686,7 +1680,6 @@ private void nextVector(Decimal64ColumnVector result,
// Read all the scales
scaleReader.nextVector(result, scratchScaleVector, batchSize);
if (result.noNulls) {
result.isRepeating = true;
int previousIdx = 0;
for (int r = 0; r != filterContext.getSelectedSize(); r++) {
int idx = filterContext.getSelected()[r];
Expand All @@ -1702,7 +1695,6 @@ private void nextVector(Decimal64ColumnVector result,
}
skipStreamRows(batchSize - previousIdx);
} else if (!result.isRepeating || !result.isNull[0]) {
result.isRepeating = true;
int previousIdx = 0;
for (int r = 0; r != filterContext.getSelectedSize(); r++) {
int idx = filterContext.getSelected()[r];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ private void readDecimalInNullStripe(String typeString, Class<?> expectedColumnT
assertTrue(batch.cols[0].isRepeating);
StringBuilder sb = new StringBuilder();
batch.cols[0].stringifyValue(sb, 1023);
assertEquals(sb.toString(), expectedResult[0]);
assertEquals(expectedResult[0], sb.toString());

rows.nextBatch(batch);
assertEquals(1024, batch.size);
Expand All @@ -717,17 +717,17 @@ private void readDecimalInNullStripe(String typeString, Class<?> expectedColumnT
assertFalse(batch.cols[0].isRepeating);
StringBuilder sb2 = new StringBuilder();
batch.cols[0].stringifyValue(sb2, 1023);
assertEquals(sb2.toString(), expectedResult[1]);
assertEquals(expectedResult[1], sb2.toString());

rows.nextBatch(batch);
assertEquals(1024, batch.size);
assertEquals(expected, options.toString());
assertEquals(batch.cols.length, 1);
assertEquals(batch.cols[0].getClass(), expectedColumnType);
assertTrue(batch.cols[0].isRepeating);
assertFalse(batch.cols[0].isRepeating);
StringBuilder sb3 = new StringBuilder();
batch.cols[0].stringifyValue(sb3, 1023);
assertEquals(sb3.toString(), expectedResult[2]);
assertEquals(expectedResult[2], sb3.toString());
}

private void testDecimalConvertToLongInNullStripe() throws Exception {
Expand Down
68 changes: 68 additions & 0 deletions java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.hadoop.hive.common.io.DiskRangeList;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
Expand Down Expand Up @@ -2732,4 +2733,71 @@ public void testHadoopVectoredIO() throws Exception {

verify(spyFSDataInputStream, atLeastOnce()).readVectored(any(), any());
}

@Test
public void testDecimalIsRepeatingFlag() throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path testFilePath = new Path(workDir, "testDecimalIsRepeatingFlag.orc");
fs.delete(testFilePath, true);

Configuration decimalConf = new Configuration(conf);
decimalConf.set(OrcConf.STRIPE_ROW_COUNT.getAttribute(), "1024");
decimalConf.set(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute(), "1");
String typeStr = "decimal(20,10)";
TypeDescription schema = TypeDescription.fromString("struct<col1:" + typeStr + ">");
Writer w = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(decimalConf).setSchema(schema));

VectorizedRowBatch b = schema.createRowBatch();
DecimalColumnVector f1 = (DecimalColumnVector) b.cols[0];
for (int i = 0; i < 1024; i++) {
f1.set(i, HiveDecimal.create("-119.4594594595"));
}
b.size = 1024;
w.addRowBatch(b);

b.reset();
for (int i = 0; i < 1024; i++) {
f1.set(i, HiveDecimal.create("9318.4351351351"));
}
b.size = 1024;
w.addRowBatch(b);

b.reset();
for (int i = 0; i < 1024; i++) {
f1.set(i, HiveDecimal.create("-4298.1513513514"));
}
b.size = 1024;
w.addRowBatch(b);

b.reset();
w.close();

Reader.Options options = new Reader.Options();
try (Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
RecordReader rows = reader.rows(options)) {
VectorizedRowBatch batch = schema.createRowBatch();

rows.nextBatch(batch);
assertEquals(1024, batch.size);
assertFalse(batch.cols[0].isRepeating);
for (HiveDecimalWritable hiveDecimalWritable : ((DecimalColumnVector) batch.cols[0]).vector) {
assertEquals(HiveDecimal.create("-119.4594594595"), hiveDecimalWritable.getHiveDecimal());
}

rows.nextBatch(batch);
assertEquals(1024, batch.size);
assertFalse(batch.cols[0].isRepeating);
for (HiveDecimalWritable hiveDecimalWritable : ((DecimalColumnVector) batch.cols[0]).vector) {
assertEquals(HiveDecimal.create("9318.4351351351"), hiveDecimalWritable.getHiveDecimal());
}

rows.nextBatch(batch);
assertEquals(1024, batch.size);
assertFalse(batch.cols[0].isRepeating);
for (HiveDecimalWritable hiveDecimalWritable : ((DecimalColumnVector) batch.cols[0]).vector) {
assertEquals(HiveDecimal.create("-4298.1513513514"), hiveDecimalWritable.getHiveDecimal());
}
}
}
}
Loading