Skip to content

Commit

Permalink
ORC-1741: Respect decimal reader isRepeating flag
Browse files Browse the repository at this point in the history
Decimal type, when `isRepeating` itself is false, do not try to change it.

apache/hive#5218 (comment)

[ORC-1266](https://issues.apache.org/jira/browse/ORC-1266): DecimalColumnVector resets the isRepeating flag in the nextVector method

Add UT

No

Closes #1960 from cxzl25/decimal_isRepeating.

Authored-by: sychen <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit e818d56)
Signed-off-by: Dongjoon Hyun <[email protected]>
  • Loading branch information
cxzl25 authored and dongjoon-hyun committed Jul 24, 2024
1 parent 343efc0 commit 71e3407
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 12 deletions.
8 changes: 0 additions & 8 deletions java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -1551,7 +1551,6 @@ private void nextVector(DecimalColumnVector result,
HiveDecimalWritable[] vector = result.vector;
HiveDecimalWritable decWritable;
if (result.noNulls) {
result.isRepeating = true;
for (int r = 0; r < batchSize; ++r) {
decWritable = vector[r];
if (!decWritable.serializationUtilsRead(
Expand All @@ -1563,7 +1562,6 @@ private void nextVector(DecimalColumnVector result,
setIsRepeatingIfNeeded(result, r);
}
} else if (!result.isRepeating || !result.isNull[0]) {
result.isRepeating = true;
for (int r = 0; r < batchSize; ++r) {
if (!result.isNull[r]) {
decWritable = vector[r];
Expand Down Expand Up @@ -1595,7 +1593,6 @@ private void nextVector(DecimalColumnVector result,
HiveDecimalWritable[] vector = result.vector;
HiveDecimalWritable decWritable;
if (result.noNulls) {
result.isRepeating = true;
int previousIdx = 0;
for (int r = 0; r != filterContext.getSelectedSize(); ++r) {
int idx = filterContext.getSelected()[r];
Expand All @@ -1614,7 +1611,6 @@ private void nextVector(DecimalColumnVector result,
}
skipStreamRows(batchSize - previousIdx);
} else if (!result.isRepeating || !result.isNull[0]) {
result.isRepeating = true;
int previousIdx = 0;
for (int r = 0; r != filterContext.getSelectedSize(); ++r) {
int idx = filterContext.getSelected()[r];
Expand Down Expand Up @@ -1651,14 +1647,12 @@ private void nextVector(Decimal64ColumnVector result,
// read the scales
scaleReader.nextVector(result, scratchScaleVector, batchSize);
if (result.noNulls) {
result.isRepeating = true;
for (int r = 0; r < batchSize; ++r) {
final long scaleFactor = powerOfTenTable[scale - scratchScaleVector[r]];
result.vector[r] = SerializationUtils.readVslong(valueStream) * scaleFactor;
setIsRepeatingIfNeeded(result, r);
}
} else if (!result.isRepeating || !result.isNull[0]) {
result.isRepeating = true;
for (int r = 0; r < batchSize; ++r) {
if (!result.isNull[r]) {
final long scaleFactor = powerOfTenTable[scale - scratchScaleVector[r]];
Expand Down Expand Up @@ -1686,7 +1680,6 @@ private void nextVector(Decimal64ColumnVector result,
// Read all the scales
scaleReader.nextVector(result, scratchScaleVector, batchSize);
if (result.noNulls) {
result.isRepeating = true;
int previousIdx = 0;
for (int r = 0; r != filterContext.getSelectedSize(); r++) {
int idx = filterContext.getSelected()[r];
Expand All @@ -1702,7 +1695,6 @@ private void nextVector(Decimal64ColumnVector result,
}
skipStreamRows(batchSize - previousIdx);
} else if (!result.isRepeating || !result.isNull[0]) {
result.isRepeating = true;
int previousIdx = 0;
for (int r = 0; r != filterContext.getSelectedSize(); r++) {
int idx = filterContext.getSelected()[r];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ private void readDecimalInNullStripe(String typeString, Class<?> expectedColumnT
assertTrue(batch.cols[0].isRepeating);
StringBuilder sb = new StringBuilder();
batch.cols[0].stringifyValue(sb, 1023);
assertEquals(sb.toString(), expectedResult[0]);
assertEquals(expectedResult[0], sb.toString());

rows.nextBatch(batch);
assertEquals(1024, batch.size);
Expand All @@ -717,17 +717,17 @@ private void readDecimalInNullStripe(String typeString, Class<?> expectedColumnT
assertFalse(batch.cols[0].isRepeating);
StringBuilder sb2 = new StringBuilder();
batch.cols[0].stringifyValue(sb2, 1023);
assertEquals(sb2.toString(), expectedResult[1]);
assertEquals(expectedResult[1], sb2.toString());

rows.nextBatch(batch);
assertEquals(1024, batch.size);
assertEquals(expected, options.toString());
assertEquals(batch.cols.length, 1);
assertEquals(batch.cols[0].getClass(), expectedColumnType);
assertTrue(batch.cols[0].isRepeating);
assertFalse(batch.cols[0].isRepeating);
StringBuilder sb3 = new StringBuilder();
batch.cols[0].stringifyValue(sb3, 1023);
assertEquals(sb3.toString(), expectedResult[2]);
assertEquals(expectedResult[2], sb3.toString());
}

private void testDecimalConvertToLongInNullStripe() throws Exception {
Expand Down
68 changes: 68 additions & 0 deletions java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.hadoop.hive.common.io.DiskRangeList;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
Expand Down Expand Up @@ -2711,4 +2712,71 @@ public void testRowIndexStrideNegativeFilter() throws Exception {
base += batch.size;
}
}

@Test
public void testDecimalIsRepeatingFlag() throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path testFilePath = new Path(workDir, "testDecimalIsRepeatingFlag.orc");
fs.delete(testFilePath, true);

Configuration decimalConf = new Configuration(conf);
decimalConf.set(OrcConf.STRIPE_ROW_COUNT.getAttribute(), "1024");
decimalConf.set(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute(), "1");
String typeStr = "decimal(20,10)";
TypeDescription schema = TypeDescription.fromString("struct<col1:" + typeStr + ">");
Writer w = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(decimalConf).setSchema(schema));

VectorizedRowBatch b = schema.createRowBatch();
DecimalColumnVector f1 = (DecimalColumnVector) b.cols[0];
for (int i = 0; i < 1024; i++) {
f1.set(i, HiveDecimal.create("-119.4594594595"));
}
b.size = 1024;
w.addRowBatch(b);

b.reset();
for (int i = 0; i < 1024; i++) {
f1.set(i, HiveDecimal.create("9318.4351351351"));
}
b.size = 1024;
w.addRowBatch(b);

b.reset();
for (int i = 0; i < 1024; i++) {
f1.set(i, HiveDecimal.create("-4298.1513513514"));
}
b.size = 1024;
w.addRowBatch(b);

b.reset();
w.close();

Reader.Options options = new Reader.Options();
try (Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
RecordReader rows = reader.rows(options)) {
VectorizedRowBatch batch = schema.createRowBatch();

rows.nextBatch(batch);
assertEquals(1024, batch.size);
assertFalse(batch.cols[0].isRepeating);
for (HiveDecimalWritable hiveDecimalWritable : ((DecimalColumnVector) batch.cols[0]).vector) {
assertEquals(HiveDecimal.create("-119.4594594595"), hiveDecimalWritable.getHiveDecimal());
}

rows.nextBatch(batch);
assertEquals(1024, batch.size);
assertFalse(batch.cols[0].isRepeating);
for (HiveDecimalWritable hiveDecimalWritable : ((DecimalColumnVector) batch.cols[0]).vector) {
assertEquals(HiveDecimal.create("9318.4351351351"), hiveDecimalWritable.getHiveDecimal());
}

rows.nextBatch(batch);
assertEquals(1024, batch.size);
assertFalse(batch.cols[0].isRepeating);
for (HiveDecimalWritable hiveDecimalWritable : ((DecimalColumnVector) batch.cols[0]).vector) {
assertEquals(HiveDecimal.create("-4298.1513513514"), hiveDecimalWritable.getHiveDecimal());
}
}
}
}

0 comments on commit 71e3407

Please sign in to comment.