From 6018880919b77caadf98aa9c05a08e06243113a2 Mon Sep 17 00:00:00 2001 From: James Petty Date: Tue, 21 Dec 2021 16:40:16 -0500 Subject: [PATCH 1/4] Extend Block interface to include fixed width size information Adds a positionCount argument to Block#getPositionSizeInBytes and adds a new method: Block#fixedSizeInBytesPerPosition() to reduce the overhead associated with calculating DictionaryBlock size in bytes when the underlying dictionary size in bytes can be calculated without specific information about which positions are referenced. --- .../io/trino/operator/GroupByIdBlock.java | 11 +- .../io/trino/block/AbstractTestBlock.java | 6 +- .../io/trino/block/TestDictionaryBlock.java | 4 +- .../block/TestRunLengthEncodedBlock.java | 21 ++++ .../trino/spi/block/AbstractArrayBlock.java | 53 +++++--- .../io/trino/spi/block/AbstractMapBlock.java | 79 ++++++++---- .../io/trino/spi/block/AbstractRowBlock.java | 119 +++++++++++++++--- .../spi/block/AbstractSingleArrayBlock.java | 2 +- .../spi/block/AbstractSingleMapBlock.java | 2 +- .../spi/block/AbstractSingleRowBlock.java | 2 +- .../main/java/io/trino/spi/block/Block.java | 19 +-- .../java/io/trino/spi/block/BlockUtil.java | 26 +++- .../io/trino/spi/block/ByteArrayBlock.java | 13 +- .../spi/block/ByteArrayBlockBuilder.java | 20 +-- .../io/trino/spi/block/DictionaryBlock.java | 79 ++++++++++-- .../io/trino/spi/block/Int128ArrayBlock.java | 19 +-- .../spi/block/Int128ArrayBlockBuilder.java | 20 +-- .../io/trino/spi/block/Int96ArrayBlock.java | 19 +-- .../spi/block/Int96ArrayBlockBuilder.java | 16 ++- .../io/trino/spi/block/IntArrayBlock.java | 19 +-- .../trino/spi/block/IntArrayBlockBuilder.java | 20 +-- .../java/io/trino/spi/block/LazyBlock.java | 14 ++- .../io/trino/spi/block/LongArrayBlock.java | 19 +-- .../spi/block/LongArrayBlockBuilder.java | 20 +-- .../spi/block/RunLengthEncodedBlock.java | 11 +- .../io/trino/spi/block/ShortArrayBlock.java | 19 +-- .../spi/block/ShortArrayBlockBuilder.java | 20 +-- .../spi/block/SingleArrayBlockWriter.java | 7 ++ .../io/trino/spi/block/SingleMapBlock.java | 7 ++ .../trino/spi/block/SingleMapBlockWriter.java | 7 ++ .../io/trino/spi/block/SingleRowBlock.java | 7 ++ .../trino/spi/block/SingleRowBlockWriter.java | 7 ++ .../trino/spi/block/VariableWidthBlock.java | 19 ++- .../spi/block/VariableWidthBlockBuilder.java | 13 +- 34 files changed, 552 insertions(+), 187 deletions(-) diff --git a/core/trino-main/src/main/java/io/trino/operator/GroupByIdBlock.java b/core/trino-main/src/main/java/io/trino/operator/GroupByIdBlock.java index 46fa4b9093c3..1f6124b18b84 100644 --- a/core/trino-main/src/main/java/io/trino/operator/GroupByIdBlock.java +++ b/core/trino-main/src/main/java/io/trino/operator/GroupByIdBlock.java @@ -19,6 +19,7 @@ import org.openjdk.jol.info.ClassLayout; import java.util.List; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static com.google.common.base.MoreObjects.toStringHelper; @@ -64,9 +65,15 @@ public long getRegionSizeInBytes(int positionOffset, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public OptionalInt fixedSizeInBytesPerPosition() { - return block.getPositionsSizeInBytes(positions); + return block.fixedSizeInBytesPerPosition(); + } + + @Override + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionCount) + { + return block.getPositionsSizeInBytes(positions, selectedPositionCount); } @Override diff --git a/core/trino-main/src/test/java/io/trino/block/AbstractTestBlock.java b/core/trino-main/src/test/java/io/trino/block/AbstractTestBlock.java index 294e2294437f..9728cb10b83f 100644 --- a/core/trino-main/src/test/java/io/trino/block/AbstractTestBlock.java +++ b/core/trino-main/src/test/java/io/trino/block/AbstractTestBlock.java @@ -221,11 +221,11 @@ private void assertBlockSize(Block block) boolean[] positions = new boolean[block.getPositionCount()]; fill(positions, 0, firstHalf.getPositionCount(), true); - assertEquals(block.getPositionsSizeInBytes(positions), expectedFirstHalfSize); + assertEquals(block.getPositionsSizeInBytes(positions, firstHalf.getPositionCount()), expectedFirstHalfSize); fill(positions, true); - assertEquals(block.getPositionsSizeInBytes(positions), expectedBlockSize); + assertEquals(block.getPositionsSizeInBytes(positions, positions.length), expectedBlockSize); fill(positions, 0, firstHalf.getPositionCount(), false); - assertEquals(block.getPositionsSizeInBytes(positions), expectedSecondHalfSize); + assertEquals(block.getPositionsSizeInBytes(positions, positions.length - firstHalf.getPositionCount()), expectedSecondHalfSize); } // expectedValueType is required since otherwise the expected value type is unknown when expectedValue is null. diff --git a/core/trino-main/src/test/java/io/trino/block/TestDictionaryBlock.java b/core/trino-main/src/test/java/io/trino/block/TestDictionaryBlock.java index 0f947964fcfe..3be65538a01e 100644 --- a/core/trino-main/src/test/java/io/trino/block/TestDictionaryBlock.java +++ b/core/trino-main/src/test/java/io/trino/block/TestDictionaryBlock.java @@ -252,12 +252,12 @@ public void testNestedCompact() assertEquals( dictionary.getSizeInBytes(), - valuesBlock.getPositionsSizeInBytes(new boolean[] {true, false, true, false, false, false}) + 4 * Integer.BYTES); + valuesBlock.getPositionsSizeInBytes(new boolean[] {true, false, true, false, false, false}, 2) + 4 * Integer.BYTES); assertFalse(dictionary.isCompact()); assertEquals( dictionaryWithAllPositionsUsed.getSizeInBytes(), - valuesBlock.getPositionsSizeInBytes(new boolean[] {true, true, true, false, true, true}) + 6 * Integer.BYTES); + valuesBlock.getPositionsSizeInBytes(new boolean[] {true, true, true, false, true, true}, 5) + 6 * Integer.BYTES); // dictionary is not compact (even though all positions were used) because it's unnested assertFalse(dictionaryWithAllPositionsUsed.isCompact()); diff --git a/core/trino-main/src/test/java/io/trino/block/TestRunLengthEncodedBlock.java b/core/trino-main/src/test/java/io/trino/block/TestRunLengthEncodedBlock.java index 3c0c3a89ed05..800a4ebefba4 100644 --- a/core/trino-main/src/test/java/io/trino/block/TestRunLengthEncodedBlock.java +++ b/core/trino-main/src/test/java/io/trino/block/TestRunLengthEncodedBlock.java @@ -26,6 +26,7 @@ import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; public class TestRunLengthEncodedBlock extends AbstractTestBlock @@ -61,6 +62,26 @@ private static BlockBuilder createBlockBuilder() return new VariableWidthBlockBuilder(null, 1, 1); } + @Test + public void testPositionsSizeInBytes() + { + Block valueBlock = createSingleValueBlock(createExpectedValue(10)); + Block rleBlock = new RunLengthEncodedBlock(valueBlock, 10); + // Size in bytes is not fixed per position + assertTrue(rleBlock.fixedSizeInBytesPerPosition().isEmpty()); + // Accepts specific position selection + boolean[] positions = new boolean[rleBlock.getPositionCount()]; + positions[0] = true; + positions[1] = true; + assertEquals(rleBlock.getPositionsSizeInBytes(positions, 2), valueBlock.getSizeInBytes()); + // Accepts null positions array with count only + assertEquals(rleBlock.getPositionsSizeInBytes(null, 2), valueBlock.getSizeInBytes()); + // Always reports the same size in bytes regardless of positions + for (int positionCount = 0; positionCount < rleBlock.getPositionCount(); positionCount++) { + assertEquals(rleBlock.getPositionsSizeInBytes(null, positionCount), valueBlock.getSizeInBytes()); + } + } + @Test public void testBuildingFromLongArrayBlockBuilder() { diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractArrayBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractArrayBlock.java index 10e95c45d695..ed4eb76b5cc4 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractArrayBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractArrayBlock.java @@ -16,6 +16,7 @@ import javax.annotation.Nullable; import java.util.List; +import java.util.OptionalInt; import static io.trino.spi.block.ArrayBlock.createArrayBlockInternal; import static io.trino.spi.block.BlockUtil.checkArrayRange; @@ -23,6 +24,8 @@ import static io.trino.spi.block.BlockUtil.checkValidRegion; import static io.trino.spi.block.BlockUtil.compactArray; import static io.trino.spi.block.BlockUtil.compactOffsets; +import static io.trino.spi.block.BlockUtil.countAndMarkSelectedPositionsFromOffsets; +import static io.trino.spi.block.BlockUtil.countSelectedPositionsFromOffsets; import static java.util.Collections.singletonList; public abstract class AbstractArrayBlock @@ -103,6 +106,12 @@ public Block getRegion(int position, int length) getRawElementBlock()); } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.empty(); // size per position is variable based on the number of entries in each array + } + @Override public long getRegionSizeInBytes(int position, int length) { @@ -116,22 +125,36 @@ public long getRegionSizeInBytes(int position, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) - { - checkValidPositions(positions, getPositionCount()); - boolean[] used = new boolean[getRawElementBlock().getPositionCount()]; - int usedPositionCount = 0; - for (int i = 0; i < positions.length; ++i) { - if (positions[i]) { - usedPositionCount++; - int valueStart = getOffsets()[getOffsetBase() + i]; - int valueEnd = getOffsets()[getOffsetBase() + i + 1]; - for (int j = valueStart; j < valueEnd; ++j) { - used[j] = true; - } - } + public final long getPositionsSizeInBytes(boolean[] positions, int selectedArrayPositions) + { + int positionCount = getPositionCount(); + checkValidPositions(positions, positionCount); + if (selectedArrayPositions == 0) { + return 0; + } + if (selectedArrayPositions == positionCount) { + return getSizeInBytes(); + } + + Block rawElementBlock = getRawElementBlock(); + OptionalInt fixedPerElementSizeInBytes = rawElementBlock.fixedSizeInBytesPerPosition(); + int[] offsets = getOffsets(); + int offsetBase = getOffsetBase(); + long elementsSizeInBytes; + + if (fixedPerElementSizeInBytes.isPresent()) { + elementsSizeInBytes = fixedPerElementSizeInBytes.getAsInt() * (long) countSelectedPositionsFromOffsets(positions, offsets, offsetBase); + } + else if (rawElementBlock instanceof RunLengthEncodedBlock) { + // RLE blocks don't have fixed size per position, but accept null for the positions array + elementsSizeInBytes = rawElementBlock.getPositionsSizeInBytes(null, countSelectedPositionsFromOffsets(positions, offsets, offsetBase)); + } + else { + boolean[] selectedElements = new boolean[rawElementBlock.getPositionCount()]; + int selectedElementCount = countAndMarkSelectedPositionsFromOffsets(positions, offsets, offsetBase, selectedElements); + elementsSizeInBytes = rawElementBlock.getPositionsSizeInBytes(selectedElements, selectedElementCount); } - return getRawElementBlock().getPositionsSizeInBytes(used) + ((Integer.BYTES + Byte.BYTES) * (long) usedPositionCount); + return elementsSizeInBytes + ((Integer.BYTES + Byte.BYTES) * (long) selectedArrayPositions); } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractMapBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractMapBlock.java index e410348fe3e8..f77ea74eb132 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractMapBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractMapBlock.java @@ -21,12 +21,15 @@ import java.util.Arrays; import java.util.List; import java.util.Optional; +import java.util.OptionalInt; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidPositions; import static io.trino.spi.block.BlockUtil.checkValidRegion; import static io.trino.spi.block.BlockUtil.compactArray; import static io.trino.spi.block.BlockUtil.compactOffsets; +import static io.trino.spi.block.BlockUtil.countAndMarkSelectedPositionsFromOffsets; +import static io.trino.spi.block.BlockUtil.countSelectedPositionsFromOffsets; import static io.trino.spi.block.MapBlock.createMapBlockInternal; import static io.trino.spi.block.MapHashTables.HASH_MULTIPLIER; import static java.util.Objects.requireNonNull; @@ -180,33 +183,63 @@ public long getRegionSizeInBytes(int position, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.empty(); // size per row is variable on the number of entries in each row + } + + private OptionalInt keyAndValueFixedSizeInBytesPerRow() + { + OptionalInt keyFixedSizePerRow = getRawKeyBlock().fixedSizeInBytesPerPosition(); + if (!keyFixedSizePerRow.isPresent()) { + return OptionalInt.empty(); + } + OptionalInt valueFixedSizePerRow = getRawValueBlock().fixedSizeInBytesPerPosition(); + if (!valueFixedSizePerRow.isPresent()) { + return OptionalInt.empty(); + } + + return OptionalInt.of(keyFixedSizePerRow.getAsInt() + valueFixedSizePerRow.getAsInt()); + } + + @Override + public final long getPositionsSizeInBytes(boolean[] positions, int selectedMapPositions) { - // We can use either the getRegionSizeInBytes or getPositionsSizeInBytes - // from the underlying raw blocks to implement this function. We chose - // getPositionsSizeInBytes with the assumption that constructing a - // positions array is cheaper than calling getRegionSizeInBytes for each - // used position. int positionCount = getPositionCount(); checkValidPositions(positions, positionCount); - boolean[] entryPositions = new boolean[getRawKeyBlock().getPositionCount()]; - int usedEntryCount = 0; - int usedPositionCount = 0; - for (int i = 0; i < positions.length; ++i) { - if (positions[i]) { - usedPositionCount++; - int entriesStart = getOffsets()[getOffsetBase() + i]; - int entriesEnd = getOffsets()[getOffsetBase() + i + 1]; - for (int j = entriesStart; j < entriesEnd; j++) { - entryPositions[j] = true; - } - usedEntryCount += (entriesEnd - entriesStart); - } + if (selectedMapPositions == 0) { + return 0; } - return getRawKeyBlock().getPositionsSizeInBytes(entryPositions) + - getRawValueBlock().getPositionsSizeInBytes(entryPositions) + - (Integer.BYTES + Byte.BYTES) * (long) usedPositionCount + - Integer.BYTES * HASH_MULTIPLIER * (long) usedEntryCount; + if (selectedMapPositions == positionCount) { + return getSizeInBytes(); + } + + int[] offsets = getOffsets(); + int offsetBase = getOffsetBase(); + OptionalInt fixedKeyAndValueSizePerRow = keyAndValueFixedSizeInBytesPerRow(); + + int selectedEntryCount; + long keyAndValuesSizeInBytes; + if (fixedKeyAndValueSizePerRow.isPresent()) { + // no new positions array need be created, we can just count the number of elements + selectedEntryCount = countSelectedPositionsFromOffsets(positions, offsets, offsetBase); + keyAndValuesSizeInBytes = fixedKeyAndValueSizePerRow.getAsInt() * (long) selectedEntryCount; + } + else { + // We can use either the getRegionSizeInBytes or getPositionsSizeInBytes + // from the underlying raw blocks to implement this function. We chose + // getPositionsSizeInBytes with the assumption that constructing a + // positions array is cheaper than calling getRegionSizeInBytes for each + // used position. + boolean[] entryPositions = new boolean[getRawKeyBlock().getPositionCount()]; + selectedEntryCount = countAndMarkSelectedPositionsFromOffsets(positions, offsets, offsetBase, entryPositions); + keyAndValuesSizeInBytes = getRawKeyBlock().getPositionsSizeInBytes(entryPositions, selectedEntryCount) + + getRawValueBlock().getPositionsSizeInBytes(entryPositions, selectedEntryCount); + } + + return keyAndValuesSizeInBytes + + (Integer.BYTES + Byte.BYTES) * (long) selectedMapPositions + + Integer.BYTES * HASH_MULTIPLIER * (long) selectedEntryCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractRowBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractRowBlock.java index 6523e7019ca5..5a9dc49ce003 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractRowBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractRowBlock.java @@ -16,6 +16,7 @@ import javax.annotation.Nullable; import java.util.List; +import java.util.OptionalInt; import static io.trino.spi.block.BlockUtil.arraySame; import static io.trino.spi.block.BlockUtil.checkArrayRange; @@ -130,6 +131,41 @@ public Block getRegion(int position, int length) return createRowBlockInternal(position + getOffsetBase(), length, getRowIsNull(), getFieldBlockOffsets(), getRawFieldBlocks()); } + @Override + public final OptionalInt fixedSizeInBytesPerPosition() + { + if (!mayHaveNull()) { + // when null rows are present, we can't use the fixed field sizes to infer the correct + // size for arbitrary position selection + OptionalInt fieldSize = fixedSizeInBytesPerFieldPosition(); + if (fieldSize.isPresent()) { + // must include the row block overhead in addition to the per position size in bytes + return OptionalInt.of(fieldSize.getAsInt() + (Integer.BYTES + Byte.BYTES)); // offsets + rowIsNull + } + } + return OptionalInt.empty(); + } + + /** + * Returns the combined {@link Block#fixedSizeInBytesPerPosition()} value for all fields, assuming all + * are fixed size. If any field is not fixed size, then no value will be returned. This does not + * include the size-per-position overhead associated with the {@link AbstractRowBlock} itself, only of + * the constituent field members. + */ + private OptionalInt fixedSizeInBytesPerFieldPosition() + { + Block[] rawFieldBlocks = getRawFieldBlocks(); + int fixedSizePerRow = 0; + for (int i = 0; i < numFields; i++) { + OptionalInt fieldFixedSize = rawFieldBlocks[i].fixedSizeInBytesPerPosition(); + if (fieldFixedSize.isEmpty()) { + return OptionalInt.empty(); // found a block without a single per-position size + } + fixedSizePerRow += fieldFixedSize.getAsInt(); + } + return OptionalInt.of(fixedSizePerRow); + } + @Override public long getRegionSizeInBytes(int position, int length) { @@ -148,27 +184,80 @@ public long getRegionSizeInBytes(int position, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public final long getPositionsSizeInBytes(boolean[] positions, int selectedRowPositions) + { + int positionCount = getPositionCount(); + checkValidPositions(positions, positionCount); + if (selectedRowPositions == 0) { + return 0; + } + if (selectedRowPositions == positionCount) { + return getSizeInBytes(); + } + + OptionalInt fixedSizePerFieldPosition = fixedSizeInBytesPerFieldPosition(); + if (fixedSizePerFieldPosition.isPresent()) { + // All field blocks are fixed size per position, no specific position mapping is necessary + int selectedFieldPositionCount = selectedRowPositions; + boolean[] rowIsNull = getRowIsNull(); + if (rowIsNull != null) { + // Some positions in usedPositions may be null which must be removed from the selectedFieldPositionCount + int offsetBase = getOffsetBase(); + for (int i = 0; i < positions.length; i++) { + if (positions[i] && rowIsNull[i + offsetBase]) { + selectedFieldPositionCount--; // selected row is null, don't include it in the selected field positions + } + } + if (selectedFieldPositionCount < 0) { + throw new IllegalStateException("Invalid field position selection after nulls removed: " + selectedFieldPositionCount); + } + } + return ((Integer.BYTES + Byte.BYTES) * (long) selectedRowPositions) + (fixedSizePerFieldPosition.getAsInt() * (long) selectedFieldPositionCount); + } + + // Fall back to specific position size calculations + return getSpecificPositionsSizeInBytes(positions, selectedRowPositions); + } + + private long getSpecificPositionsSizeInBytes(boolean[] positions, int selectedRowPositions) { - checkValidPositions(positions, getPositionCount()); - - int usedPositionCount = 0; - boolean[] fieldPositions = new boolean[getRawFieldBlocks()[0].getPositionCount()]; - for (int i = 0; i < positions.length; i++) { - if (positions[i]) { - usedPositionCount++; - int startFieldBlockOffset = getFieldBlockOffset(i); - int endFieldBlockOffset = getFieldBlockOffset(i + 1); - for (int j = startFieldBlockOffset; j < endFieldBlockOffset; j++) { - fieldPositions[j] = true; + int positionCount = getPositionCount(); + int offsetBase = getOffsetBase(); + boolean[] rowIsNull = getRowIsNull(); + // No fixed width size per row, specific positions used must be tracked + int totalFieldPositions = getRawFieldBlocks()[0].getPositionCount(); + boolean[] fieldPositions; + int selectedFieldPositionCount; + if (rowIsNull == null) { + // No nulls, so the same number of positions are used + selectedFieldPositionCount = selectedRowPositions; + if (offsetBase == 0 && positionCount == totalFieldPositions) { + // No need to adapt the positions array at all, reuse it directly + fieldPositions = positions; + } + else { + // no nulls present, so we can just shift the positions array into alignment with the elements block with other positions unused + fieldPositions = new boolean[totalFieldPositions]; + System.arraycopy(positions, 0, fieldPositions, offsetBase, positions.length); + } + } + else { + fieldPositions = new boolean[totalFieldPositions]; + selectedFieldPositionCount = 0; + for (int i = 0; i < positions.length; i++) { + if (positions[i] && !rowIsNull[offsetBase + i]) { + selectedFieldPositionCount++; + fieldPositions[getFieldBlockOffset(i)] = true; } } } - long sizeInBytes = 0; + + Block[] rawFieldBlocks = getRawFieldBlocks(); + long sizeInBytes = ((Integer.BYTES + Byte.BYTES) * (long) selectedRowPositions); // offsets + rowIsNull for (int j = 0; j < numFields; j++) { - sizeInBytes += getRawFieldBlocks()[j].getPositionsSizeInBytes(fieldPositions); + sizeInBytes += rawFieldBlocks[j].getPositionsSizeInBytes(fieldPositions, selectedFieldPositionCount); } - return sizeInBytes + (Integer.BYTES + Byte.BYTES) * (long) usedPositionCount; + return sizeInBytes; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleArrayBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleArrayBlock.java index ed4928cd679d..77df57233577 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleArrayBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleArrayBlock.java @@ -182,7 +182,7 @@ public long getRegionSizeInBytes(int position, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { throw new UnsupportedOperationException(); } diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleMapBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleMapBlock.java index d585d28267a3..0121df7ba258 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleMapBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleMapBlock.java @@ -243,7 +243,7 @@ public long getRegionSizeInBytes(int position, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { throw new UnsupportedOperationException(); } diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleRowBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleRowBlock.java index f4dc616d34b7..22d736dd54da 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleRowBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/AbstractSingleRowBlock.java @@ -159,7 +159,7 @@ public long getRegionSizeInBytes(int position, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { throw new UnsupportedOperationException(); } diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/Block.java b/core/trino-spi/src/main/java/io/trino/spi/block/Block.java index 99ece8231dfc..4df4877740f5 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/Block.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/Block.java @@ -17,6 +17,7 @@ import java.util.Collections; import java.util.List; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.trino.spi.block.BlockUtil.checkArrayRange; @@ -188,21 +189,23 @@ default long getLogicalSizeInBytes() long getRegionSizeInBytes(int position, int length); /** - * Returns the size of all positions marked true in the positions array. - * This is equivalent to multiple calls of {@code block.getRegionSizeInBytes(position, length)} - * where you mark all positions for the regions first. + * Returns the number of bytes (in terms of {@link Block#getSizeInBytes()}) required per position + * that this block contains, assuming that the number of bytes required is a known static quantity + * and not dependent on any particular specific position. This allows for some complex block wrappings + * to potentially avoid having to call {@link Block#getPositionsSizeInBytes(boolean[], int)} which + * would require computing the specific positions selected + * @return The size in bytes, per position, if this block type does not require specific position information to compute its size */ - long getPositionsSizeInBytes(boolean[] positions); + OptionalInt fixedSizeInBytesPerPosition(); /** * Returns the size of all positions marked true in the positions array. + * This is equivalent to multiple calls of {@code block.getRegionSizeInBytes(position, length)} + * where you mark all positions for the regions first. * The 'selectedPositionsCount' variable may be used to skip iterating through * the positions array in case this is a fixed-width block */ - default long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) - { - return getPositionsSizeInBytes(positions); - } + long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount); /** * Returns the retained size of this block in memory, including over-allocations. diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/BlockUtil.java b/core/trino-spi/src/main/java/io/trino/spi/block/BlockUtil.java index 69d12c5c5121..01a814cf10dd 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/BlockUtil.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/BlockUtil.java @@ -181,13 +181,29 @@ static long[] compactArray(long[] array, int index, int length) return Arrays.copyOfRange(array, index, index + length); } - static int countUsedPositions(boolean[] positions) + static int countSelectedPositionsFromOffsets(boolean[] positions, int[] offsets, int offsetBase) { + checkArrayRange(offsets, offsetBase, positions.length); int used = 0; - for (boolean position : positions) { - // Avoid branching by casting boolean to integer. - // This improves CPU utilization by avoiding branch mispredictions. - used += position ? 1 : 0; + for (int i = 0; i < positions.length; i++) { + int offsetStart = offsets[offsetBase + i]; + int offsetEnd = offsets[offsetBase + i + 1]; + used += ((positions[i] ? 1 : 0) * (offsetEnd - offsetStart)); + } + return used; + } + + static int countAndMarkSelectedPositionsFromOffsets(boolean[] positions, int[] offsets, int offsetBase, boolean[] elementPositions) + { + checkArrayRange(offsets, offsetBase, positions.length); + int used = 0; + for (int i = 0; i < positions.length; i++) { + int offsetStart = offsets[offsetBase + i]; + int offsetEnd = offsets[offsetBase + i + 1]; + if (positions[i]) { + used += (offsetEnd - offsetStart); + Arrays.fill(elementPositions, offsetStart, offsetEnd, true); + } } return used; } diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/ByteArrayBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/ByteArrayBlock.java index 6bb663d6e502..27adcaecb925 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/ByteArrayBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/ByteArrayBlock.java @@ -20,18 +20,19 @@ import javax.annotation.Nullable; import java.util.Optional; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; import static io.trino.spi.block.BlockUtil.compactArray; -import static io.trino.spi.block.BlockUtil.countUsedPositions; public class ByteArrayBlock implements Block { private static final int INSTANCE_SIZE = ClassLayout.parseClass(ByteArrayBlock.class).instanceSize(); + public static final int SIZE_IN_BYTES_PER_POSITION = Byte.BYTES + Byte.BYTES; private final int arrayOffset; private final int positionCount; @@ -79,21 +80,21 @@ public long getSizeInBytes() } @Override - public long getRegionSizeInBytes(int position, int length) + public OptionalInt fixedSizeInBytesPerPosition() { - return (Byte.BYTES + Byte.BYTES) * (long) length; + return OptionalInt.of(SIZE_IN_BYTES_PER_POSITION); } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getRegionSizeInBytes(int position, int length) { - return getPositionsSizeInBytes(positions, countUsedPositions(positions)); + return SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (long) (Byte.BYTES + Byte.BYTES) * selectedPositionsCount; + return (long) SIZE_IN_BYTES_PER_POSITION * selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/ByteArrayBlockBuilder.java b/core/trino-spi/src/main/java/io/trino/spi/block/ByteArrayBlockBuilder.java index afe44b4e1d02..66dac250709a 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/ByteArrayBlockBuilder.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/ByteArrayBlockBuilder.java @@ -20,13 +20,13 @@ import javax.annotation.Nullable; import java.util.Arrays; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; import static io.trino.spi.block.BlockUtil.calculateBlockResetSize; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; -import static io.trino.spi.block.BlockUtil.countUsedPositions; import static java.lang.Math.max; public class ByteArrayBlockBuilder @@ -70,7 +70,7 @@ public BlockBuilder writeByte(int value) hasNonNullValue = true; positionCount++; if (blockBuilderStatus != null) { - blockBuilderStatus.addBytes(Byte.BYTES + Byte.BYTES); + blockBuilderStatus.addBytes(ByteArrayBlock.SIZE_IN_BYTES_PER_POSITION); } return this; } @@ -93,7 +93,7 @@ public BlockBuilder appendNull() hasNullValue = true; positionCount++; if (blockBuilderStatus != null) { - blockBuilderStatus.addBytes(Byte.BYTES + Byte.BYTES); + blockBuilderStatus.addBytes(ByteArrayBlock.SIZE_IN_BYTES_PER_POSITION); } return this; } @@ -137,22 +137,28 @@ private void updateDataSize() } } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.of(ByteArrayBlock.SIZE_IN_BYTES_PER_POSITION); + } + @Override public long getSizeInBytes() { - return (Byte.BYTES + Byte.BYTES) * (long) positionCount; + return ByteArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) positionCount; } @Override public long getRegionSizeInBytes(int position, int length) { - return (Byte.BYTES + Byte.BYTES) * (long) length; + return ByteArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (Byte.BYTES + Byte.BYTES) * (long) countUsedPositions(positions); + return (long) ByteArrayBlock.SIZE_IN_BYTES_PER_POSITION * selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java index 1adb280a52db..043699063a95 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java @@ -19,6 +19,7 @@ import java.util.Arrays; import java.util.List; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; @@ -26,7 +27,6 @@ import static io.trino.spi.block.BlockUtil.checkValidPosition; import static io.trino.spi.block.BlockUtil.checkValidPositions; import static io.trino.spi.block.BlockUtil.checkValidRegion; -import static io.trino.spi.block.BlockUtil.countUsedPositions; import static io.trino.spi.block.DictionaryId.randomDictionaryId; import static java.lang.Math.min; import static java.util.Collections.singletonList; @@ -216,6 +216,21 @@ public int getPositionCount() return positionCount; } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + if (uniqueIds == positionCount) { + // Each position is unique, so the per-position fixed size of the dictionary plus the dictionary id overhead + // is our fixed size per position + OptionalInt dictionarySizePerPosition = dictionary.fixedSizeInBytesPerPosition(); + if (dictionarySizePerPosition.isPresent()) { + // Add overhead for a per-position dictionary id entry + return OptionalInt.of(dictionarySizePerPosition.getAsInt() + Integer.BYTES); + } + } + return OptionalInt.empty(); + } + @Override public long getSizeInBytes() { @@ -289,10 +304,13 @@ private long getCompactedDictionarySizeInBytes() */ private long getCompactedDictionaryPositionsSizeInBytes(boolean[] positions) { + int usedIds = 0; boolean[] used = new boolean[dictionary.getPositionCount()]; for (int i = 0; i < positions.length; i++) { + int id = getId(i); if (positions[i]) { - used[getId(i)] = true; + usedIds += used[id] ? 0 : 1; + used[id] = true; } } @@ -300,7 +318,7 @@ private long getCompactedDictionaryPositionsSizeInBytes(boolean[] positions) return ((DictionaryBlock) dictionary).getCompactedDictionaryPositionsSizeInBytes(used); } - return dictionary.getPositionsSizeInBytes(used); + return dictionary.getPositionsSizeInBytes(used, usedIds); } @Override @@ -336,25 +354,68 @@ public long getRegionSizeInBytes(int positionOffset, int length) return getSizeInBytes(); } + OptionalInt fixedSizeInBytesPerPosition = fixedSizeInBytesPerPosition(); + if (fixedSizeInBytesPerPosition.isPresent()) { + // no ids repeat and the dictionary block has a fixed size per position + return fixedSizeInBytesPerPosition.getAsInt() * (long) length; + } + + int usedIds = 0; boolean[] used = new boolean[dictionary.getPositionCount()]; - for (int i = positionOffset; i < positionOffset + length; i++) { - used[getId(i)] = true; + int startOffset = idsOffset + positionOffset; + for (int i = 0; i < length; i++) { + int id = ids[startOffset + i]; + usedIds += used[id] ? 0 : 1; + used[id] = true; + } + + long dictionarySize; + if (usedIds == used.length) { + // discovered dictionary is compact + dictionarySize = dictionary.getSizeInBytes(); + if (sizeInBytes < 0) { + // save the information about compactness + this.uniqueIds = usedIds; + this.sizeInBytes = dictionarySize + (Integer.BYTES * (long) positionCount); + } + } + else { + dictionarySize = dictionary.getPositionsSizeInBytes(used, usedIds); } - return dictionary.getPositionsSizeInBytes(used) + Integer.BYTES * (long) length; + return dictionarySize + (Integer.BYTES * (long) length); } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { checkValidPositions(positions, positionCount); + if (selectedPositionsCount == 0) { + return 0; + } + if (selectedPositionsCount == positionCount) { + return getSizeInBytes(); + } + OptionalInt fixedSizeInBytesPerPosition = fixedSizeInBytesPerPosition(); + if (fixedSizeInBytesPerPosition.isPresent()) { + // no ids repeat and the dictionary block has a fixed sizer per position + return fixedSizeInBytesPerPosition.getAsInt() * (long) selectedPositionsCount; + } + int usedIds = 0; boolean[] used = new boolean[dictionary.getPositionCount()]; for (int i = 0; i < positions.length; i++) { + int id = ids[idsOffset + i]; if (positions[i]) { - used[getId(i)] = true; + usedIds += used[id] ? 0 : 1; + used[id] = true; } } - return dictionary.getPositionsSizeInBytes(used) + (Integer.BYTES * (long) countUsedPositions(positions)); + long dictionarySize = dictionary.getPositionsSizeInBytes(used, usedIds); + if (usedIds == used.length) { + // dictionary is discovered to be compact, store updated size information + this.uniqueIds = usedIds; + } + return dictionarySize + (Integer.BYTES * (long) selectedPositionsCount); } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/Int128ArrayBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/Int128ArrayBlock.java index bde239a5dfaf..75cd2e4571bf 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/Int128ArrayBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/Int128ArrayBlock.java @@ -20,19 +20,20 @@ import javax.annotation.Nullable; import java.util.Optional; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; import static io.trino.spi.block.BlockUtil.compactArray; -import static io.trino.spi.block.BlockUtil.countUsedPositions; public class Int128ArrayBlock implements Block { private static final int INSTANCE_SIZE = ClassLayout.parseClass(Int128ArrayBlock.class).instanceSize(); public static final int INT128_BYTES = Long.BYTES + Long.BYTES; + public static final int SIZE_IN_BYTES_PER_POSITION = INT128_BYTES + Byte.BYTES; private final int positionOffset; private final int positionCount; @@ -69,32 +70,32 @@ public Int128ArrayBlock(int positionCount, Optional valueIsNull, long } this.valueIsNull = valueIsNull; - sizeInBytes = (INT128_BYTES + Byte.BYTES) * (long) positionCount; + sizeInBytes = SIZE_IN_BYTES_PER_POSITION * (long) positionCount; retainedSizeInBytes = INSTANCE_SIZE + sizeOf(valueIsNull) + sizeOf(values); } @Override - public long getSizeInBytes() + public OptionalInt fixedSizeInBytesPerPosition() { - return sizeInBytes; + return OptionalInt.of(SIZE_IN_BYTES_PER_POSITION); } @Override - public long getRegionSizeInBytes(int position, int length) + public long getSizeInBytes() { - return (INT128_BYTES + Byte.BYTES) * (long) length; + return sizeInBytes; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getRegionSizeInBytes(int position, int length) { - return getPositionsSizeInBytes(positions, countUsedPositions(positions)); + return SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (long) (INT128_BYTES + Byte.BYTES) * selectedPositionsCount; + return (long) SIZE_IN_BYTES_PER_POSITION * selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/Int128ArrayBlockBuilder.java b/core/trino-spi/src/main/java/io/trino/spi/block/Int128ArrayBlockBuilder.java index cdbb4d6ccb37..8beac16b1b19 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/Int128ArrayBlockBuilder.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/Int128ArrayBlockBuilder.java @@ -20,6 +20,7 @@ import javax.annotation.Nullable; import java.util.Arrays; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.SIZE_OF_LONG; @@ -28,7 +29,6 @@ import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; import static io.trino.spi.block.BlockUtil.compactArray; -import static io.trino.spi.block.BlockUtil.countUsedPositions; import static io.trino.spi.block.Int128ArrayBlock.INT128_BYTES; import static java.lang.Math.max; @@ -87,7 +87,7 @@ public BlockBuilder closeEntry() positionCount++; entryPositionCount = 0; if (blockBuilderStatus != null) { - blockBuilderStatus.addBytes(Byte.BYTES + INT128_BYTES); + blockBuilderStatus.addBytes(Int128ArrayBlock.SIZE_IN_BYTES_PER_POSITION); } return this; } @@ -107,7 +107,7 @@ public BlockBuilder appendNull() hasNullValue = true; positionCount++; if (blockBuilderStatus != null) { - blockBuilderStatus.addBytes(Byte.BYTES + INT128_BYTES); + blockBuilderStatus.addBytes(Int128ArrayBlock.SIZE_IN_BYTES_PER_POSITION); } return this; } @@ -151,22 +151,28 @@ private void updateDataSize() } } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.of(Int128ArrayBlock.SIZE_IN_BYTES_PER_POSITION); + } + @Override public long getSizeInBytes() { - return (INT128_BYTES + Byte.BYTES) * (long) positionCount; + return Int128ArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) positionCount; } @Override public long getRegionSizeInBytes(int position, int length) { - return (INT128_BYTES + Byte.BYTES) * (long) length; + return Int128ArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (INT128_BYTES + Byte.BYTES) * (long) countUsedPositions(positions); + return Int128ArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/Int96ArrayBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/Int96ArrayBlock.java index 70ea24829d7a..d34042bd7518 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/Int96ArrayBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/Int96ArrayBlock.java @@ -20,19 +20,20 @@ import javax.annotation.Nullable; import java.util.Optional; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; import static io.trino.spi.block.BlockUtil.compactArray; -import static io.trino.spi.block.BlockUtil.countUsedPositions; public class Int96ArrayBlock implements Block { private static final int INSTANCE_SIZE = ClassLayout.parseClass(Int96ArrayBlock.class).instanceSize(); public static final int INT96_BYTES = Long.BYTES + Integer.BYTES; + public static final int SIZE_IN_BYTES_PER_POSITION = INT96_BYTES + Byte.BYTES; private final int positionOffset; private final int positionCount; @@ -75,32 +76,32 @@ public Int96ArrayBlock(int positionCount, Optional valueIsNull, long[ } this.valueIsNull = valueIsNull; - sizeInBytes = (INT96_BYTES + Byte.BYTES) * (long) positionCount; + sizeInBytes = SIZE_IN_BYTES_PER_POSITION * (long) positionCount; retainedSizeInBytes = INSTANCE_SIZE + sizeOf(valueIsNull) + sizeOf(high) + sizeOf(low); } @Override - public long getSizeInBytes() + public OptionalInt fixedSizeInBytesPerPosition() { - return sizeInBytes; + return OptionalInt.of(SIZE_IN_BYTES_PER_POSITION); } @Override - public long getRegionSizeInBytes(int position, int length) + public long getSizeInBytes() { - return (INT96_BYTES + Byte.BYTES) * (long) length; + return sizeInBytes; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getRegionSizeInBytes(int position, int length) { - return getPositionsSizeInBytes(positions, countUsedPositions(positions)); + return SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (long) (INT96_BYTES + Byte.BYTES) * selectedPositionsCount; + return (long) SIZE_IN_BYTES_PER_POSITION * selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/Int96ArrayBlockBuilder.java b/core/trino-spi/src/main/java/io/trino/spi/block/Int96ArrayBlockBuilder.java index d405a821cbd2..5521fcb0c3f4 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/Int96ArrayBlockBuilder.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/Int96ArrayBlockBuilder.java @@ -20,6 +20,7 @@ import javax.annotation.Nullable; import java.util.Arrays; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.SIZE_OF_LONG; @@ -28,7 +29,6 @@ import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; import static io.trino.spi.block.BlockUtil.compactArray; -import static io.trino.spi.block.BlockUtil.countUsedPositions; import static io.trino.spi.block.Int96ArrayBlock.INT96_BYTES; import static java.lang.Math.max; @@ -174,22 +174,28 @@ private void updateDataSize() } } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.of(Int96ArrayBlock.SIZE_IN_BYTES_PER_POSITION); + } + @Override public long getSizeInBytes() { - return (INT96_BYTES + Byte.BYTES) * (long) positionCount; + return Int96ArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) positionCount; } @Override public long getRegionSizeInBytes(int position, int length) { - return (INT96_BYTES + Byte.BYTES) * (long) length; + return Int96ArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (INT96_BYTES + Byte.BYTES) * (long) countUsedPositions(positions); + return Int96ArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/IntArrayBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/IntArrayBlock.java index b187f66a421b..b84917eb5ef0 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/IntArrayBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/IntArrayBlock.java @@ -20,18 +20,19 @@ import javax.annotation.Nullable; import java.util.Optional; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; import static io.trino.spi.block.BlockUtil.compactArray; -import static io.trino.spi.block.BlockUtil.countUsedPositions; public class IntArrayBlock implements Block { private static final int INSTANCE_SIZE = ClassLayout.parseClass(IntArrayBlock.class).instanceSize(); + public static final int SIZE_IN_BYTES_PER_POSITION = Integer.BYTES + Byte.BYTES; private final int arrayOffset; private final int positionCount; @@ -68,32 +69,32 @@ public IntArrayBlock(int positionCount, Optional valueIsNull, int[] v } this.valueIsNull = valueIsNull; - sizeInBytes = (Integer.BYTES + Byte.BYTES) * (long) positionCount; + sizeInBytes = SIZE_IN_BYTES_PER_POSITION * (long) positionCount; retainedSizeInBytes = INSTANCE_SIZE + sizeOf(valueIsNull) + sizeOf(values); } @Override - public long getSizeInBytes() + public OptionalInt fixedSizeInBytesPerPosition() { - return sizeInBytes; + return OptionalInt.of(SIZE_IN_BYTES_PER_POSITION); } @Override - public long getRegionSizeInBytes(int position, int length) + public long getSizeInBytes() { - return (Integer.BYTES + Byte.BYTES) * (long) length; + return sizeInBytes; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getRegionSizeInBytes(int position, int length) { - return getPositionsSizeInBytes(positions, countUsedPositions(positions)); + return SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (long) (Integer.BYTES + Byte.BYTES) * selectedPositionsCount; + return (long) SIZE_IN_BYTES_PER_POSITION * selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/IntArrayBlockBuilder.java b/core/trino-spi/src/main/java/io/trino/spi/block/IntArrayBlockBuilder.java index 5f612c4ac5d2..064a6910dd35 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/IntArrayBlockBuilder.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/IntArrayBlockBuilder.java @@ -20,13 +20,13 @@ import javax.annotation.Nullable; import java.util.Arrays; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; import static io.trino.spi.block.BlockUtil.calculateBlockResetSize; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; -import static io.trino.spi.block.BlockUtil.countUsedPositions; import static java.lang.Math.max; public class IntArrayBlockBuilder @@ -70,7 +70,7 @@ public BlockBuilder writeInt(int value) hasNonNullValue = true; positionCount++; if (blockBuilderStatus != null) { - blockBuilderStatus.addBytes(Byte.BYTES + Integer.BYTES); + blockBuilderStatus.addBytes(IntArrayBlock.SIZE_IN_BYTES_PER_POSITION); } return this; } @@ -93,7 +93,7 @@ public BlockBuilder appendNull() hasNullValue = true; positionCount++; if (blockBuilderStatus != null) { - blockBuilderStatus.addBytes(Byte.BYTES + Integer.BYTES); + blockBuilderStatus.addBytes(IntArrayBlock.SIZE_IN_BYTES_PER_POSITION); } return this; } @@ -137,22 +137,28 @@ private void updateDataSize() } } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.of(IntArrayBlock.SIZE_IN_BYTES_PER_POSITION); + } + @Override public long getSizeInBytes() { - return (Integer.BYTES + Byte.BYTES) * (long) positionCount; + return IntArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) positionCount; } @Override public long getRegionSizeInBytes(int position, int length) { - return (Integer.BYTES + Byte.BYTES) * (long) length; + return IntArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (Integer.BYTES + Byte.BYTES) * (long) countUsedPositions(positions); + return IntArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/LazyBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/LazyBlock.java index 0661df2cc01b..6ab6c476b0aa 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/LazyBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/LazyBlock.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.OptionalInt; import java.util.function.BiConsumer; import java.util.function.Consumer; @@ -154,6 +155,15 @@ public Block getSingleValueBlock(int position) return getBlock().getSingleValueBlock(position); } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + if (!isLoaded()) { + return OptionalInt.empty(); + } + return getBlock().fixedSizeInBytesPerPosition(); + } + @Override public long getSizeInBytes() { @@ -173,12 +183,12 @@ public long getRegionSizeInBytes(int position, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { if (!isLoaded()) { return 0; } - return getBlock().getPositionsSizeInBytes(positions); + return getBlock().getPositionsSizeInBytes(positions, selectedPositionsCount); } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/LongArrayBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/LongArrayBlock.java index 14fa6630902e..fda48810c5ba 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/LongArrayBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/LongArrayBlock.java @@ -20,19 +20,20 @@ import javax.annotation.Nullable; import java.util.Optional; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; import static io.trino.spi.block.BlockUtil.compactArray; -import static io.trino.spi.block.BlockUtil.countUsedPositions; import static java.lang.Math.toIntExact; public class LongArrayBlock implements Block { private static final int INSTANCE_SIZE = ClassLayout.parseClass(LongArrayBlock.class).instanceSize(); + public static final int SIZE_IN_BYTES_PER_POSITION = Long.BYTES + Byte.BYTES; private final int arrayOffset; private final int positionCount; @@ -69,32 +70,32 @@ public LongArrayBlock(int positionCount, Optional valueIsNull, long[] } this.valueIsNull = valueIsNull; - sizeInBytes = (Long.BYTES + Byte.BYTES) * (long) positionCount; + sizeInBytes = SIZE_IN_BYTES_PER_POSITION * (long) positionCount; retainedSizeInBytes = INSTANCE_SIZE + sizeOf(valueIsNull) + sizeOf(values); } @Override - public long getSizeInBytes() + public OptionalInt fixedSizeInBytesPerPosition() { - return sizeInBytes; + return OptionalInt.of(SIZE_IN_BYTES_PER_POSITION); } @Override - public long getRegionSizeInBytes(int position, int length) + public long getSizeInBytes() { - return (Long.BYTES + Byte.BYTES) * (long) length; + return sizeInBytes; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getRegionSizeInBytes(int position, int length) { - return getPositionsSizeInBytes(positions, countUsedPositions(positions)); + return SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (long) (Long.BYTES + Byte.BYTES) * selectedPositionsCount; + return (long) SIZE_IN_BYTES_PER_POSITION * selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/LongArrayBlockBuilder.java b/core/trino-spi/src/main/java/io/trino/spi/block/LongArrayBlockBuilder.java index 041fdf7b70bf..161ee2332a7d 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/LongArrayBlockBuilder.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/LongArrayBlockBuilder.java @@ -20,13 +20,13 @@ import javax.annotation.Nullable; import java.util.Arrays; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; import static io.trino.spi.block.BlockUtil.calculateBlockResetSize; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; -import static io.trino.spi.block.BlockUtil.countUsedPositions; import static java.lang.Math.max; import static java.lang.Math.toIntExact; @@ -71,7 +71,7 @@ public BlockBuilder writeLong(long value) hasNonNullValue = true; positionCount++; if (blockBuilderStatus != null) { - blockBuilderStatus.addBytes(Byte.BYTES + Long.BYTES); + blockBuilderStatus.addBytes(LongArrayBlock.SIZE_IN_BYTES_PER_POSITION); } return this; } @@ -94,7 +94,7 @@ public BlockBuilder appendNull() hasNullValue = true; positionCount++; if (blockBuilderStatus != null) { - blockBuilderStatus.addBytes(Byte.BYTES + Long.BYTES); + blockBuilderStatus.addBytes(LongArrayBlock.SIZE_IN_BYTES_PER_POSITION); } return this; } @@ -138,22 +138,28 @@ private void updateDataSize() } } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.of(LongArrayBlock.SIZE_IN_BYTES_PER_POSITION); + } + @Override public long getSizeInBytes() { - return (Long.BYTES + Byte.BYTES) * (long) positionCount; + return LongArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) positionCount; } @Override public long getRegionSizeInBytes(int position, int length) { - return (Long.BYTES + Byte.BYTES) * (long) length; + return LongArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (Long.BYTES + Byte.BYTES) * (long) countUsedPositions(positions); + return LongArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/RunLengthEncodedBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/RunLengthEncodedBlock.java index a438690ea79a..1c81f97bcd6e 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/RunLengthEncodedBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/RunLengthEncodedBlock.java @@ -18,7 +18,10 @@ import io.trino.spi.type.Type; import org.openjdk.jol.info.ClassLayout; +import javax.annotation.Nullable; + import java.util.List; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.trino.spi.block.BlockUtil.checkArrayRange; @@ -83,6 +86,12 @@ public int getPositionCount() return positionCount; } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.empty(); // size does not vary per position selected + } + @Override public long getSizeInBytes() { @@ -154,7 +163,7 @@ public long getRegionSizeInBytes(int position, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(@Nullable boolean[] positions, int selectedPositionCount) { return value.getSizeInBytes(); } diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/ShortArrayBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/ShortArrayBlock.java index 5a29e15837fc..9e2ed8dcbd2e 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/ShortArrayBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/ShortArrayBlock.java @@ -20,18 +20,19 @@ import javax.annotation.Nullable; import java.util.Optional; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; import static io.trino.spi.block.BlockUtil.compactArray; -import static io.trino.spi.block.BlockUtil.countUsedPositions; public class ShortArrayBlock implements Block { private static final int INSTANCE_SIZE = ClassLayout.parseClass(ShortArrayBlock.class).instanceSize(); + public static final int SIZE_IN_BYTES_PER_POSITION = Short.BYTES + Byte.BYTES; private final int arrayOffset; private final int positionCount; @@ -68,32 +69,32 @@ public ShortArrayBlock(int positionCount, Optional valueIsNull, short } this.valueIsNull = valueIsNull; - sizeInBytes = (Short.BYTES + Byte.BYTES) * (long) positionCount; + sizeInBytes = SIZE_IN_BYTES_PER_POSITION * (long) positionCount; retainedSizeInBytes = INSTANCE_SIZE + sizeOf(valueIsNull) + sizeOf(values); } @Override - public long getSizeInBytes() + public OptionalInt fixedSizeInBytesPerPosition() { - return sizeInBytes; + return OptionalInt.of(SIZE_IN_BYTES_PER_POSITION); } @Override - public long getRegionSizeInBytes(int position, int length) + public long getSizeInBytes() { - return (Short.BYTES + Byte.BYTES) * (long) length; + return sizeInBytes; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getRegionSizeInBytes(int position, int length) { - return getPositionsSizeInBytes(positions, countUsedPositions(positions)); + return SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { - return (long) (Short.BYTES + Byte.BYTES) * selectedPositionsCount; + return (long) SIZE_IN_BYTES_PER_POSITION * selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/ShortArrayBlockBuilder.java b/core/trino-spi/src/main/java/io/trino/spi/block/ShortArrayBlockBuilder.java index a04cd9bd0825..7f97b06d074b 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/ShortArrayBlockBuilder.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/ShortArrayBlockBuilder.java @@ -20,13 +20,13 @@ import javax.annotation.Nullable; import java.util.Arrays; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; import static io.trino.spi.block.BlockUtil.calculateBlockResetSize; import static io.trino.spi.block.BlockUtil.checkArrayRange; import static io.trino.spi.block.BlockUtil.checkValidRegion; -import static io.trino.spi.block.BlockUtil.countUsedPositions; import static java.lang.Math.max; public class ShortArrayBlockBuilder @@ -70,7 +70,7 @@ public BlockBuilder writeShort(int value) hasNonNullValue = true; positionCount++; if (blockBuilderStatus != null) { - blockBuilderStatus.addBytes(Byte.BYTES + Short.BYTES); + blockBuilderStatus.addBytes(ShortArrayBlock.SIZE_IN_BYTES_PER_POSITION); } return this; } @@ -93,7 +93,7 @@ public BlockBuilder appendNull() hasNullValue = true; positionCount++; if (blockBuilderStatus != null) { - blockBuilderStatus.addBytes(Byte.BYTES + Short.BYTES); + blockBuilderStatus.addBytes(ShortArrayBlock.SIZE_IN_BYTES_PER_POSITION); } return this; } @@ -137,22 +137,28 @@ private void updateDataSize() } } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.of(ShortArrayBlock.SIZE_IN_BYTES_PER_POSITION); + } + @Override public long getSizeInBytes() { - return (Short.BYTES + Byte.BYTES) * (long) positionCount; + return ShortArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) positionCount; } @Override public long getRegionSizeInBytes(int position, int length) { - return (Short.BYTES + Byte.BYTES) * (long) length; + return ShortArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) length; } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionCount) { - return (Short.BYTES + Byte.BYTES) * (long) countUsedPositions(positions); + return ShortArrayBlock.SIZE_IN_BYTES_PER_POSITION * (long) selectedPositionCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/SingleArrayBlockWriter.java b/core/trino-spi/src/main/java/io/trino/spi/block/SingleArrayBlockWriter.java index 1eb9c88bb1d7..0e823affc93f 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/SingleArrayBlockWriter.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/SingleArrayBlockWriter.java @@ -16,6 +16,7 @@ import io.airlift.slice.Slice; import org.openjdk.jol.info.ClassLayout; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static java.lang.String.format; @@ -43,6 +44,12 @@ protected Block getBlock() return blockBuilder; } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.empty(); + } + @Override public long getSizeInBytes() { diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/SingleMapBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/SingleMapBlock.java index d14216d7e022..caa2e8ab09a4 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/SingleMapBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/SingleMapBlock.java @@ -20,6 +20,7 @@ import java.lang.invoke.MethodHandle; import java.util.Optional; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOfIntArray; @@ -56,6 +57,12 @@ public int getPositionCount() return positionCount; } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.empty(); + } + @Override public long getSizeInBytes() { diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/SingleMapBlockWriter.java b/core/trino-spi/src/main/java/io/trino/spi/block/SingleMapBlockWriter.java index 00627e1c3b37..ec4321d03cfb 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/SingleMapBlockWriter.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/SingleMapBlockWriter.java @@ -16,6 +16,7 @@ import io.airlift.slice.Slice; import org.openjdk.jol.info.ClassLayout; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static java.lang.String.format; @@ -68,6 +69,12 @@ Block getRawValueBlock() return valueBlockBuilder; } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.empty(); + } + @Override public long getSizeInBytes() { diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/SingleRowBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/SingleRowBlock.java index d1159f6a788a..9bf75fdbba5e 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/SingleRowBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/SingleRowBlock.java @@ -16,6 +16,7 @@ import org.openjdk.jol.info.ClassLayout; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.trino.spi.block.BlockUtil.ensureBlocksAreLoaded; @@ -58,6 +59,12 @@ public int getPositionCount() return fieldBlocks.length; } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.empty(); + } + @Override public long getSizeInBytes() { diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/SingleRowBlockWriter.java b/core/trino-spi/src/main/java/io/trino/spi/block/SingleRowBlockWriter.java index 926938dcf16e..16a7a91c9434 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/SingleRowBlockWriter.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/SingleRowBlockWriter.java @@ -16,6 +16,7 @@ import io.airlift.slice.Slice; import org.openjdk.jol.info.ClassLayout; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static java.lang.String.format; @@ -73,6 +74,12 @@ protected int getRowIndex() return rowIndex; } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.empty(); + } + @Override public long getSizeInBytes() { diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlock.java index 00e72eab8bd8..c7f65201d9ef 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlock.java @@ -21,6 +21,7 @@ import javax.annotation.Nullable; import java.util.Optional; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.sizeOf; @@ -112,6 +113,12 @@ public int getPositionCount() return positionCount; } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.empty(); // size varies per element and is not fixed + } + @Override public long getSizeInBytes() { @@ -125,17 +132,21 @@ public long getRegionSizeInBytes(int position, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount) { + if (selectedPositionsCount == 0) { + return 0; + } + if (selectedPositionsCount == positionCount) { + return getSizeInBytes(); + } long sizeInBytes = 0; - int usedPositionCount = 0; for (int i = 0; i < positions.length; ++i) { if (positions[i]) { - usedPositionCount++; sizeInBytes += offsets[arrayOffset + i + 1] - offsets[arrayOffset + i]; } } - return sizeInBytes + (Integer.BYTES + Byte.BYTES) * (long) usedPositionCount; + return sizeInBytes + (Integer.BYTES + Byte.BYTES) * (long) selectedPositionsCount; } @Override diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlockBuilder.java b/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlockBuilder.java index 01460cabb98a..d9ebebb7cecb 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlockBuilder.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlockBuilder.java @@ -22,6 +22,7 @@ import javax.annotation.Nullable; import java.util.Arrays; +import java.util.OptionalInt; import java.util.function.BiConsumer; import static io.airlift.slice.SizeOf.SIZE_OF_BYTE; @@ -101,6 +102,12 @@ public int getPositionCount() return positions; } + @Override + public OptionalInt fixedSizeInBytesPerPosition() + { + return OptionalInt.empty(); // size varies per element and is not fixed + } + @Override public long getSizeInBytes() { @@ -118,18 +125,16 @@ public long getRegionSizeInBytes(int positionOffset, int length) } @Override - public long getPositionsSizeInBytes(boolean[] positions) + public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionCount) { checkValidPositions(positions, getPositionCount()); long sizeInBytes = 0; - int usedPositionCount = 0; for (int i = 0; i < positions.length; ++i) { if (positions[i]) { - usedPositionCount++; sizeInBytes += getOffset(i + 1) - getOffset(i); } } - return sizeInBytes + (Integer.BYTES + Byte.BYTES) * (long) usedPositionCount; + return sizeInBytes + (Integer.BYTES + Byte.BYTES) * (long) selectedPositionCount; } @Override From 321330393df68c4ddcd59bdf7be322a33d48a62a Mon Sep 17 00:00:00 2001 From: James Petty Date: Tue, 21 Dec 2021 16:49:26 -0500 Subject: [PATCH 2/4] Populate size and uniqueIds inside of DictionaryBlock#getPositions --- .../io/trino/spi/block/DictionaryBlock.java | 40 +++++++++++++------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java index 043699063a95..2acafd8cdc47 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java @@ -410,10 +410,19 @@ public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCo used[id] = true; } } - long dictionarySize = dictionary.getPositionsSizeInBytes(used, usedIds); + + long dictionarySize; if (usedIds == used.length) { - // dictionary is discovered to be compact, store updated size information - this.uniqueIds = usedIds; + // discovered dictionary is compact + dictionarySize = dictionary.getSizeInBytes(); + if (sizeInBytes < 0) { + // save the information about compactness + this.uniqueIds = usedIds; + this.sizeInBytes = dictionarySize + (Integer.BYTES * (long) positionCount); + } + } + else { + dictionarySize = dictionary.getPositionsSizeInBytes(used, usedIds); } return dictionarySize + (Integer.BYTES * (long) selectedPositionsCount); } @@ -540,20 +549,25 @@ public Block getPositions(int[] positions, int offset, int length) int[] newIds = new int[length]; boolean isCompact = length >= dictionary.getPositionCount() && isCompact(); - boolean[] seen = null; - if (isCompact) { - seen = new boolean[dictionary.getPositionCount()]; - } + boolean[] usedIds = isCompact ? new boolean[dictionary.getPositionCount()] : null; + int uniqueIds = 0; for (int i = 0; i < length; i++) { - newIds[i] = getId(positions[offset + i]); - if (isCompact) { - seen[newIds[i]] = true; + int id = getId(positions[offset + i]); + newIds[i] = id; + if (usedIds != null) { + uniqueIds += usedIds[id] ? 0 : 1; + usedIds[id] = true; } } - for (int i = 0; i < dictionary.getPositionCount() && isCompact; i++) { - isCompact &= seen[i]; + // All positions must have been referenced in order to be compact + isCompact &= (usedIds != null && usedIds.length == uniqueIds); + DictionaryBlock result = new DictionaryBlock(newIds.length, dictionary, newIds, isCompact, getDictionarySourceId()); + if (usedIds != null && !isCompact) { + // resulting dictionary is not compact, but we know the number of unique ids and which positions are used + result.uniqueIds = uniqueIds; + result.sizeInBytes = dictionary.getPositionsSizeInBytes(usedIds, uniqueIds) + (Integer.BYTES * (long) length); } - return new DictionaryBlock(newIds.length, getDictionary(), newIds, isCompact, getDictionarySourceId()); + return result; } @Override From e42636b6b7c3e4ce881dc9f9159b079d606fad68 Mon Sep 17 00:00:00 2001 From: James Petty Date: Tue, 8 Feb 2022 10:01:45 -0500 Subject: [PATCH 3/4] Add more DictionaryBlock benchmarks --- .../project/BenchmarkDictionaryBlock.java | 111 ++++++++++++++++-- 1 file changed, 103 insertions(+), 8 deletions(-) diff --git a/core/trino-main/src/test/java/io/trino/operator/project/BenchmarkDictionaryBlock.java b/core/trino-main/src/test/java/io/trino/operator/project/BenchmarkDictionaryBlock.java index e2d167e5be5c..4b453116a634 100644 --- a/core/trino-main/src/test/java/io/trino/operator/project/BenchmarkDictionaryBlock.java +++ b/core/trino-main/src/test/java/io/trino/operator/project/BenchmarkDictionaryBlock.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableList; import io.airlift.slice.Slice; import io.trino.spi.block.Block; +import io.trino.spi.block.BlockBuilder; import io.trino.spi.block.DictionaryBlock; import io.trino.spi.type.MapType; import io.trino.spi.type.StandardTypes; @@ -43,6 +44,7 @@ import static io.airlift.slice.Slices.utf8Slice; import static io.trino.block.BlockAssertions.createSlicesBlock; import static io.trino.jmh.Benchmarks.benchmark; +import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static java.util.concurrent.TimeUnit.MICROSECONDS; @@ -62,18 +64,31 @@ public long getSizeInBytes(BenchmarkData data) return data.getDictionaryBlock().getSizeInBytes(); } + @Benchmark + public long getPositionsSizeInBytes(BenchmarkData data) + { + return data.getAllPositionsDictionaryBlock().getPositionsSizeInBytes(data.getSelectedPositionsMask(), data.getSelectedPositionCount()); + } + + @Benchmark + public long getPositionsThenGetSizeInBytes(BenchmarkData data) + { + int[] positionIds = data.getPositionsIds(); + return data.getAllPositionsDictionaryBlock().getPositions(positionIds, 0, positionIds.length).getSizeInBytes(); + } + @Benchmark public Block copyPositions(BenchmarkData data) { int[] positionIds = data.getPositionsIds(); - return data.getAllPositionsDictionaryBlock().copyPositions(data.getPositionsIds(), 0, positionIds.length); + return data.getAllPositionsDictionaryBlock().copyPositions(positionIds, 0, positionIds.length); } @Benchmark public Block copyPositionsCompactDictionary(BenchmarkData data) { int[] positionIds = data.getPositionsIds(); - return data.getAllPositionsCompactDictionaryBlock().copyPositions(data.getPositionsIds(), 0, positionIds.length); + return data.getAllPositionsCompactDictionaryBlock().copyPositions(positionIds, 0, positionIds.length); } @State(Scope.Thread) @@ -83,27 +98,49 @@ public static class BenchmarkData @Param({"100", "1000", "10000", "100000"}) private String selectedPositions = "100"; + @Param({"varchar", "integer"}) + private String valueType = "integer"; + private int[] positionsIds; private DictionaryBlock dictionaryBlock; private DictionaryBlock allPositionsDictionaryBlock; private DictionaryBlock allPositionsCompactDictionaryBlock; + private boolean[] selectedPositionsMask; + private int selectedPositionCount; - @Setup(Level.Invocation) + @Setup(Level.Trial) public void setup() { positionsIds = generateIds(Integer.parseInt(selectedPositions), POSITIONS); - Block mapBlock = createMapBlock(POSITIONS); + selectedPositionsMask = new boolean[POSITIONS]; + for (int position : positionsIds) { + if (!selectedPositionsMask[position]) { + selectedPositionsMask[position] = true; + selectedPositionCount++; + } + } + Block mapBlock; + switch (valueType) { + case "varchar": + mapBlock = createVarcharMapBlock(POSITIONS); + break; + case "integer": + mapBlock = createIntMapBlock(POSITIONS); + break; + default: + throw new IllegalArgumentException("Unrecognized value type: " + valueType); + } dictionaryBlock = new DictionaryBlock(mapBlock, positionsIds); int[] allPositions = IntStream.range(0, POSITIONS).toArray(); allPositionsDictionaryBlock = new DictionaryBlock(mapBlock, allPositions); allPositionsCompactDictionaryBlock = new DictionaryBlock(POSITIONS, mapBlock, allPositions, true); } - private static Block createMapBlock(int positionCount) + private static Block createVarcharMapBlock(int positionCount) { MapType mapType = (MapType) TESTING_TYPE_MANAGER.getType(new TypeSignature(StandardTypes.MAP, TypeSignatureParameter.typeParameter(VARCHAR.getTypeSignature()), TypeSignatureParameter.typeParameter(VARCHAR.getTypeSignature()))); - Block keyBlock = createDictionaryBlock(generateList("key", positionCount)); - Block valueBlock = createDictionaryBlock(generateList("value", positionCount)); + Block keyBlock = createVarcharDictionaryBlock(generateList("key", positionCount)); + Block valueBlock = createVarcharDictionaryBlock(generateList("value", positionCount)); int[] offsets = new int[positionCount + 1]; int mapSize = keyBlock.getPositionCount() / positionCount; for (int i = 0; i < offsets.length; i++) { @@ -112,7 +149,7 @@ private static Block createMapBlock(int positionCount) return mapType.createBlockFromKeyValue(Optional.empty(), offsets, keyBlock, valueBlock); } - private static Block createDictionaryBlock(List values) + private static Block createVarcharDictionaryBlock(List values) { Block dictionary = createSliceArrayBlock(values); int[] ids = new int[values.size()]; @@ -122,6 +159,38 @@ private static Block createDictionaryBlock(List values) return new DictionaryBlock(dictionary, ids); } + private static Block createIntMapBlock(int positionCount) + { + MapType mapType = (MapType) TESTING_TYPE_MANAGER.getType(new TypeSignature(StandardTypes.MAP, TypeSignatureParameter.typeParameter(INTEGER.getTypeSignature()), TypeSignatureParameter.typeParameter(INTEGER.getTypeSignature()))); + Block keyBlock = createIntDictionaryBlock(positionCount); + Block valueBlock = createIntDictionaryBlock(positionCount); + int[] offsets = new int[positionCount + 1]; + int mapSize = keyBlock.getPositionCount() / positionCount; + for (int i = 0; i < offsets.length; i++) { + offsets[i] = mapSize * i; + } + return mapType.createBlockFromKeyValue(Optional.empty(), offsets, keyBlock, valueBlock); + } + + private static Block createIntDictionaryBlock(int positionCount) + { + Block dictionary = createIntBlock(positionCount); + int[] ids = new int[positionCount]; + for (int i = 0; i < ids.length; i++) { + ids[i] = i; + } + return new DictionaryBlock(dictionary, ids); + } + + private static Block createIntBlock(int positionCount) + { + BlockBuilder builder = INTEGER.createFixedSizeBlockBuilder(positionCount); + for (int i = 0; i < positionCount; i++) { + INTEGER.writeLong(builder, i); + } + return builder.build(); + } + private static Block createSliceArrayBlock(List values) { // last position is reserved for null @@ -151,6 +220,16 @@ public int[] getPositionsIds() return positionsIds; } + public boolean[] getSelectedPositionsMask() + { + return selectedPositionsMask; + } + + public int getSelectedPositionCount() + { + return selectedPositionCount; + } + public DictionaryBlock getDictionaryBlock() { return dictionaryBlock; @@ -175,6 +254,22 @@ public void testGetSizeInBytes() getSizeInBytes(data); } + @Test + public void testGetPositionsSizeInBytes() + { + BenchmarkData data = new BenchmarkData(); + data.setup(); + getPositionsSizeInBytes(data); + } + + @Test + public void testGetPositionsThenGetSizeInBytes() + { + BenchmarkData data = new BenchmarkData(); + data.setup(); + getPositionsThenGetSizeInBytes(data); + } + @Test public void testCopyPositions() { From 79ea564ef6e6d90e68ee888d37075411f27cc799 Mon Sep 17 00:00:00 2001 From: James Petty Date: Thu, 10 Feb 2022 18:14:51 -0500 Subject: [PATCH 4/4] Refactor and simplify all nested DictionaryBlock size methods --- .../io/trino/block/TestDictionaryBlock.java | 113 +++++++++++++++ .../io/trino/spi/block/DictionaryBlock.java | 129 +++++------------- 2 files changed, 144 insertions(+), 98 deletions(-) diff --git a/core/trino-main/src/test/java/io/trino/block/TestDictionaryBlock.java b/core/trino-main/src/test/java/io/trino/block/TestDictionaryBlock.java index 3be65538a01e..95519637e4ab 100644 --- a/core/trino-main/src/test/java/io/trino/block/TestDictionaryBlock.java +++ b/core/trino-main/src/test/java/io/trino/block/TestDictionaryBlock.java @@ -19,10 +19,15 @@ import io.trino.spi.block.BlockBuilder; import io.trino.spi.block.DictionaryBlock; import io.trino.spi.block.DictionaryId; +import io.trino.spi.block.IntArrayBlock; import io.trino.spi.block.VariableWidthBlock; import io.trino.spi.block.VariableWidthBlockBuilder; import org.testng.annotations.Test; +import java.util.Arrays; +import java.util.Optional; +import java.util.stream.IntStream; + import static io.airlift.slice.SizeOf.SIZE_OF_INT; import static io.airlift.testing.Assertions.assertInstanceOf; import static io.trino.block.BlockAssertions.createSlicesBlock; @@ -392,6 +397,114 @@ public void testEstimatedDataSizeForStats() } } + @Test + public void testNestedDictionarySizes() + { + // fixed width block + Block fixedWidthBlock = new IntArrayBlock(100, Optional.empty(), IntStream.range(0, 100).toArray()); + assertDictionarySizeMethods(fixedWidthBlock); + assertDictionarySizeMethods(new DictionaryBlock(fixedWidthBlock, IntStream.range(0, 50).toArray())); + assertDictionarySizeMethods( + new DictionaryBlock( + new DictionaryBlock(fixedWidthBlock, IntStream.range(0, 50).toArray()), + IntStream.range(0, 10).toArray())); + + // variable width block + Block variableWidthBlock = createSlicesBlock(createExpectedValues(100)); + assertDictionarySizeMethods(variableWidthBlock); + assertDictionarySizeMethods(new DictionaryBlock(variableWidthBlock, IntStream.range(0, 50).toArray())); + assertDictionarySizeMethods( + new DictionaryBlock( + new DictionaryBlock(variableWidthBlock, IntStream.range(0, 50).toArray()), + IntStream.range(0, 10).toArray())); + } + + private static void assertDictionarySizeMethods(Block block) + { + int positions = block.getPositionCount(); + + int[] allIds = IntStream.range(0, positions).toArray(); + if (block instanceof DictionaryBlock) { + assertEquals( + new DictionaryBlock(block, allIds).getSizeInBytes(), + block.getSizeInBytes(), + "nested dictionary size should not be counted"); + } + else { + assertEquals(new DictionaryBlock(block, allIds).getSizeInBytes(), block.getSizeInBytes() + (Integer.BYTES * (long) positions)); + } + + if (positions > 0) { + int firstHalfLength = positions / 2; + int secondHalfLength = positions - firstHalfLength; + int[] firstHalfIds = IntStream.range(0, firstHalfLength).toArray(); + int[] secondHalfIds = IntStream.range(firstHalfLength, positions).toArray(); + + boolean[] selectedPositions = new boolean[positions]; + selectedPositions[0] = true; + if (block instanceof DictionaryBlock) { + assertEquals( + new DictionaryBlock(block, allIds).getPositionsSizeInBytes(selectedPositions, 1), + block.getPositionsSizeInBytes(selectedPositions, 1), + "nested dictionary blocks must not include nested id overhead"); + assertEquals( + new DictionaryBlock(block, new int[]{0}).getSizeInBytes(), + block.getPositionsSizeInBytes(selectedPositions, 1), + "nested dictionary blocks must not include nested id overhead"); + + Arrays.fill(selectedPositions, true); + assertEquals( + new DictionaryBlock(block, allIds).getPositionsSizeInBytes(selectedPositions, positions), + block.getSizeInBytes(), + "nested dictionary blocks must not include nested id overhead"); + + assertEquals( + new DictionaryBlock(block, firstHalfIds).getSizeInBytes(), + block.getRegionSizeInBytes(0, firstHalfLength), + "nested dictionary blocks must not include nested id overhead"); + assertEquals( + new DictionaryBlock(block, secondHalfIds).getSizeInBytes(), + block.getRegionSizeInBytes(firstHalfLength, secondHalfLength), + "nested dictionary blocks must not include nested id overhead"); + assertEquals( + new DictionaryBlock(block, allIds).getRegionSizeInBytes(0, firstHalfLength), + block.getRegionSizeInBytes(0, firstHalfLength), + "nested dictionary blocks must not include nested id overhead"); + assertEquals( + new DictionaryBlock(block, allIds).getRegionSizeInBytes(firstHalfLength, secondHalfLength), + block.getRegionSizeInBytes(firstHalfLength, secondHalfLength), + "nested dictionary blocks must not include nested id overhead"); + } + else { + assertEquals( + new DictionaryBlock(block, allIds).getPositionsSizeInBytes(selectedPositions, 1), + block.getPositionsSizeInBytes(selectedPositions, 1) + Integer.BYTES); + + assertEquals( + new DictionaryBlock(block, new int[]{0}).getSizeInBytes(), + block.getPositionsSizeInBytes(selectedPositions, 1) + Integer.BYTES); + + Arrays.fill(selectedPositions, true); + assertEquals( + new DictionaryBlock(block, allIds).getPositionsSizeInBytes(selectedPositions, positions), + block.getSizeInBytes() + (Integer.BYTES * (long) positions)); + + assertEquals( + new DictionaryBlock(block, firstHalfIds).getSizeInBytes(), + block.getRegionSizeInBytes(0, firstHalfLength) + (Integer.BYTES * (long) firstHalfLength)); + assertEquals( + new DictionaryBlock(block, secondHalfIds).getSizeInBytes(), + block.getRegionSizeInBytes(firstHalfLength, secondHalfLength) + (Integer.BYTES * (long) secondHalfLength)); + assertEquals( + new DictionaryBlock(block, allIds).getRegionSizeInBytes(0, firstHalfLength), + block.getRegionSizeInBytes(0, firstHalfLength) + (Integer.BYTES * (long) firstHalfLength)); + assertEquals( + new DictionaryBlock(block, allIds).getRegionSizeInBytes(firstHalfLength, secondHalfLength), + block.getRegionSizeInBytes(firstHalfLength, secondHalfLength) + (Integer.BYTES * (long) secondHalfLength)); + } + } + } + private static DictionaryBlock createDictionaryBlockWithUnreferencedKeys(Slice[] expectedValues, int positionCount) { // adds references to 0 and all odd indexes diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java index 2acafd8cdc47..89dfb844c211 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java @@ -223,10 +223,11 @@ public OptionalInt fixedSizeInBytesPerPosition() // Each position is unique, so the per-position fixed size of the dictionary plus the dictionary id overhead // is our fixed size per position OptionalInt dictionarySizePerPosition = dictionary.fixedSizeInBytesPerPosition(); - if (dictionarySizePerPosition.isPresent()) { - // Add overhead for a per-position dictionary id entry - return OptionalInt.of(dictionarySizePerPosition.getAsInt() + Integer.BYTES); + // Nested dictionaries should not include the additional id array overhead in the result + if (dictionarySizePerPosition.isPresent() && !(dictionary instanceof DictionaryBlock)) { + dictionarySizePerPosition = OptionalInt.of(dictionarySizePerPosition.getAsInt() + Integer.BYTES); } + return dictionarySizePerPosition; } return OptionalInt.empty(); } @@ -244,83 +245,26 @@ private void calculateCompactSize() { int uniqueIds = 0; boolean[] used = new boolean[dictionary.getPositionCount()]; + // nested dictionaries are assumed not to have sequential ids + boolean isSequentialIds = !(dictionary instanceof DictionaryBlock); int previousPosition = -1; - boolean isSequentialIds = true; for (int i = 0; i < positionCount; i++) { - int position = getId(i); + int position = ids[idsOffset + i]; // Avoid branching uniqueIds += used[position] ? 0 : 1; used[position] = true; - - isSequentialIds = isSequentialIds && previousPosition < position; - previousPosition = position; - } - - long dictionaryBlockSize; - - if (dictionary instanceof DictionaryBlock) { - // dictionary is nested, compaction would unnest it and nested ids - // array shouldn't be accounted for - DictionaryBlock nestedDictionary = (DictionaryBlock) dictionary; - if (uniqueIds == dictionary.getPositionCount()) { - // dictionary is compact, all positions were used - dictionaryBlockSize = nestedDictionary.getCompactedDictionarySizeInBytes(); - } - else { - dictionaryBlockSize = nestedDictionary.getCompactedDictionaryPositionsSizeInBytes(used); - } - // nested dictionaries are assumed not to have sequential ids - isSequentialIds = false; - } - else { - if (uniqueIds == dictionary.getPositionCount()) { - // dictionary is compact, all positions were used - dictionaryBlockSize = dictionary.getSizeInBytes(); - } - else { - dictionaryBlockSize = dictionary.getPositionsSizeInBytes(used, uniqueIds); + if (isSequentialIds) { + // this branch is predictable and will switch paths at most once while looping + isSequentialIds = previousPosition < position; + previousPosition = position; } } - this.sizeInBytes = dictionaryBlockSize + (Integer.BYTES * (long) positionCount); + this.sizeInBytes = getSizeInBytesForSelectedPositions(used, uniqueIds, positionCount); this.uniqueIds = uniqueIds; this.isSequentialIds = isSequentialIds; } - /** - * Returns size of compacted dictionary. This is computed as if the dictionaries were unnested. - */ - private long getCompactedDictionarySizeInBytes() - { - if (sizeInBytes == -1) { - calculateCompactSize(); - } - - return sizeInBytes - (Integer.BYTES * (long) positionCount); - } - - /** - * Returns size of compacted dictionary for given positions. This is computed as if the dictionaries were unnested. - */ - private long getCompactedDictionaryPositionsSizeInBytes(boolean[] positions) - { - int usedIds = 0; - boolean[] used = new boolean[dictionary.getPositionCount()]; - for (int i = 0; i < positions.length; i++) { - int id = getId(i); - if (positions[i]) { - usedIds += used[id] ? 0 : 1; - used[id] = true; - } - } - - if (dictionary instanceof DictionaryBlock) { - return ((DictionaryBlock) dictionary).getCompactedDictionaryPositionsSizeInBytes(used); - } - - return dictionary.getPositionsSizeInBytes(used, usedIds); - } - @Override public long getLogicalSizeInBytes() { @@ -360,29 +304,16 @@ public long getRegionSizeInBytes(int positionOffset, int length) return fixedSizeInBytesPerPosition.getAsInt() * (long) length; } - int usedIds = 0; + int uniqueIds = 0; boolean[] used = new boolean[dictionary.getPositionCount()]; int startOffset = idsOffset + positionOffset; for (int i = 0; i < length; i++) { int id = ids[startOffset + i]; - usedIds += used[id] ? 0 : 1; + uniqueIds += used[id] ? 0 : 1; used[id] = true; } - long dictionarySize; - if (usedIds == used.length) { - // discovered dictionary is compact - dictionarySize = dictionary.getSizeInBytes(); - if (sizeInBytes < 0) { - // save the information about compactness - this.uniqueIds = usedIds; - this.sizeInBytes = dictionarySize + (Integer.BYTES * (long) positionCount); - } - } - else { - dictionarySize = dictionary.getPositionsSizeInBytes(used, usedIds); - } - return dictionarySize + (Integer.BYTES * (long) length); + return getSizeInBytesForSelectedPositions(used, uniqueIds, length); } @Override @@ -401,30 +332,32 @@ public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCo return fixedSizeInBytesPerPosition.getAsInt() * (long) selectedPositionsCount; } - int usedIds = 0; + int uniqueIds = 0; boolean[] used = new boolean[dictionary.getPositionCount()]; for (int i = 0; i < positions.length; i++) { int id = ids[idsOffset + i]; if (positions[i]) { - usedIds += used[id] ? 0 : 1; + uniqueIds += used[id] ? 0 : 1; used[id] = true; } } - long dictionarySize; - if (usedIds == used.length) { - // discovered dictionary is compact - dictionarySize = dictionary.getSizeInBytes(); - if (sizeInBytes < 0) { - // save the information about compactness - this.uniqueIds = usedIds; - this.sizeInBytes = dictionarySize + (Integer.BYTES * (long) positionCount); - } + return getSizeInBytesForSelectedPositions(used, uniqueIds, selectedPositionsCount); + } + + private long getSizeInBytesForSelectedPositions(boolean[] usedIds, int uniqueIds, int selectedPositions) + { + long dictionarySize = dictionary.getPositionsSizeInBytes(usedIds, uniqueIds); + if (dictionary instanceof DictionaryBlock) { + // Don't include the nested ids array overhead in the resulting size + dictionarySize -= (Integer.BYTES * (long) uniqueIds); } - else { - dictionarySize = dictionary.getPositionsSizeInBytes(used, usedIds); + if (uniqueIds == dictionary.getPositionCount() && this.sizeInBytes == -1) { + // All positions in the dictionary are referenced, store the uniqueId count and sizeInBytes + this.uniqueIds = uniqueIds; + this.sizeInBytes = dictionarySize + (Integer.BYTES * (long) positionCount); } - return dictionarySize + (Integer.BYTES * (long) selectedPositionsCount); + return dictionarySize + (Integer.BYTES * (long) selectedPositions); } @Override