diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/TopNBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/TopNBenchmark.java index c53d08b063ba9..f4b5397e55d39 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/TopNBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/TopNBenchmark.java @@ -13,11 +13,13 @@ import org.elasticsearch.compute.data.BooleanBlock; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.operator.Operator; -import org.elasticsearch.compute.operator.TopNOperator; +import org.elasticsearch.compute.operator.topn.TopNEncoder; +import org.elasticsearch.compute.operator.topn.TopNOperator; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -77,8 +79,27 @@ private static Operator operator(String data, int topCount) { case TWO_LONGS, LONGS_AND_BYTES_REFS -> 2; default -> throw new IllegalArgumentException("unsupported data type [" + data + "]"); }; + List elementTypes = switch (data) { + case LONGS -> List.of(ElementType.LONG); + case INTS -> List.of(ElementType.INT); + case DOUBLES -> List.of(ElementType.DOUBLE); + case BOOLEANS -> List.of(ElementType.BOOLEAN); + case BYTES_REFS -> List.of(ElementType.BYTES_REF); + case TWO_LONGS -> List.of(ElementType.INT, ElementType.INT); + case LONGS_AND_BYTES_REFS -> List.of(ElementType.INT, ElementType.BYTES_REF); + default -> throw new IllegalArgumentException("unsupported data type [" + data + "]"); + }; + List encoders = switch (data) { + case LONGS, INTS, DOUBLES, BOOLEANS -> List.of(TopNEncoder.DEFAULT_SORTABLE); + case BYTES_REFS -> List.of(TopNEncoder.UTF8); + case TWO_LONGS -> List.of(TopNEncoder.DEFAULT_SORTABLE, TopNEncoder.DEFAULT_SORTABLE); + case LONGS_AND_BYTES_REFS -> List.of(TopNEncoder.DEFAULT_SORTABLE, TopNEncoder.UTF8); + default -> throw new IllegalArgumentException("unsupported data type [" + data + "]"); + }; return new TopNOperator( topCount, + elementTypes, + encoders, IntStream.range(0, count).mapToObj(c -> new TopNOperator.SortOrder(c, false, false)).toList(), 16 * 1024 ); diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java index 0d9fcad984cbb..9c527923fae02 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java @@ -33,7 +33,7 @@ import org.elasticsearch.compute.lucene.LuceneSourceOperator; import org.elasticsearch.compute.lucene.ValueSourceInfo; import org.elasticsearch.compute.lucene.ValuesSourceReaderOperator; -import org.elasticsearch.compute.operator.TopNOperator; +import org.elasticsearch.compute.operator.topn.TopNOperator; import org.elasticsearch.core.IOUtils; import org.elasticsearch.index.fielddata.FieldData; import org.elasticsearch.index.fielddata.IndexFieldDataCache; diff --git a/x-pack/plugin/esql/compute/build.gradle b/x-pack/plugin/esql/compute/build.gradle index d6a27b4122edb..6058770f25d1b 100644 --- a/x-pack/plugin/esql/compute/build.gradle +++ b/x-pack/plugin/esql/compute/build.gradle @@ -396,4 +396,82 @@ tasks.named('stringTemplates').configure { it.inputFile = multivalueDedupeInputFile it.outputFile = "org/elasticsearch/compute/operator/MultivalueDedupeBytesRef.java" } + File keyExtractorInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/operator/topn/X-KeyExtractor.java.st") + template { + it.properties = bytesRefProperties + it.inputFile = keyExtractorInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/KeyExtractorForBytesRef.java" + } + template { + it.properties = booleanProperties + it.inputFile = keyExtractorInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/KeyExtractorForBoolean.java" + } + template { + it.properties = intProperties + it.inputFile = keyExtractorInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/KeyExtractorForInt.java" + } + template { + it.properties = longProperties + it.inputFile = keyExtractorInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/KeyExtractorForLong.java" + } + template { + it.properties = doubleProperties + it.inputFile = keyExtractorInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/KeyExtractorForDouble.java" + } + File valueExtractorInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/operator/topn/X-ValueExtractor.java.st") + template { + it.properties = bytesRefProperties + it.inputFile = valueExtractorInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/ValueExtractorForBytesRef.java" + } + template { + it.properties = booleanProperties + it.inputFile = valueExtractorInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/ValueExtractorForBoolean.java" + } + template { + it.properties = intProperties + it.inputFile = valueExtractorInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/ValueExtractorForInt.java" + } + template { + it.properties = longProperties + it.inputFile = valueExtractorInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/ValueExtractorForLong.java" + } + template { + it.properties = doubleProperties + it.inputFile = valueExtractorInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/ValueExtractorForDouble.java" + } + File resultBuilderInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/operator/topn/X-ResultBuilder.java.st") + template { + it.properties = bytesRefProperties + it.inputFile = resultBuilderInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/ResultBuilderForBytesRef.java" + } + template { + it.properties = booleanProperties + it.inputFile = resultBuilderInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/ResultBuilderForBoolean.java" + } + template { + it.properties = intProperties + it.inputFile = resultBuilderInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/ResultBuilderForInt.java" + } + template { + it.properties = longProperties + it.inputFile = resultBuilderInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/ResultBuilderForLong.java" + } + template { + it.properties = doubleProperties + it.inputFile = resultBuilderInputFile + it.outputFile = "org/elasticsearch/compute/operator/topn/ResultBuilderForDouble.java" + } } diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForBoolean.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForBoolean.java new file mode 100644 index 0000000000000..0f7a4e109af75 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForBoolean.java @@ -0,0 +1,148 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BooleanVector; + +abstract class KeyExtractorForBoolean implements KeyExtractor { + static KeyExtractorForBoolean extractorFor(TopNEncoder encoder, boolean ascending, byte nul, byte nonNul, BooleanBlock block) { + BooleanVector v = block.asVector(); + if (v != null) { + return new KeyExtractorForBoolean.ForVector(encoder, nul, nonNul, v); + } + if (ascending) { + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorForBoolean.MinForAscending(encoder, nul, nonNul, block) + : new KeyExtractorForBoolean.MinForUnordered(encoder, nul, nonNul, block); + } + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorForBoolean.MaxForAscending(encoder, nul, nonNul, block) + : new KeyExtractorForBoolean.MaxForUnordered(encoder, nul, nonNul, block); + } + + private final byte nul; + private final byte nonNul; + + KeyExtractorForBoolean(TopNEncoder encoder, byte nul, byte nonNul) { + assert encoder == TopNEncoder.DEFAULT_SORTABLE; + this.nul = nul; + this.nonNul = nonNul; + } + + protected final int nonNul(BytesRefBuilder key, boolean value) { + key.append(nonNul); + TopNEncoder.DEFAULT_SORTABLE.encodeBoolean(value, key); + return Byte.BYTES + 1; + } + + protected final int nul(BytesRefBuilder key) { + key.append(nul); + return 1; + } + + static class ForVector extends KeyExtractorForBoolean { + private final BooleanVector vector; + + ForVector(TopNEncoder encoder, byte nul, byte nonNul, BooleanVector vector) { + super(encoder, nul, nonNul); + this.vector = vector; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + return nonNul(key, vector.getBoolean(position)); + } + } + + static class MinForAscending extends KeyExtractorForBoolean { + private final BooleanBlock block; + + MinForAscending(TopNEncoder encoder, byte nul, byte nonNul, BooleanBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } + return nonNul(key, block.getBoolean(block.getFirstValueIndex(position))); + } + } + + static class MaxForAscending extends KeyExtractorForBoolean { + private final BooleanBlock block; + + MaxForAscending(TopNEncoder encoder, byte nul, byte nonNul, BooleanBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } + return nonNul(key, block.getBoolean(block.getFirstValueIndex(position) + block.getValueCount(position) - 1)); + } + } + + static class MinForUnordered extends KeyExtractorForBoolean { + private final BooleanBlock block; + + MinForUnordered(TopNEncoder encoder, byte nul, byte nonNul, BooleanBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; + for (int i = start; i < end; i++) { + if (block.getBoolean(i) == false) { + return nonNul(key, false); + } + } + return nonNul(key, true); + } + } + + static class MaxForUnordered extends KeyExtractorForBoolean { + private final BooleanBlock block; + + MaxForUnordered(TopNEncoder encoder, byte nul, byte nonNul, BooleanBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; + for (int i = start; i < end; i++) { + if (block.getBoolean(i)) { + return nonNul(key, true); + } + } + return nonNul(key, false); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForBytesRef.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForBytesRef.java new file mode 100644 index 0000000000000..d9d8d3878817e --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForBytesRef.java @@ -0,0 +1,162 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; + +abstract class KeyExtractorForBytesRef implements KeyExtractor { + static KeyExtractorForBytesRef extractorFor(TopNEncoder encoder, boolean ascending, byte nul, byte nonNul, BytesRefBlock block) { + BytesRefVector v = block.asVector(); + if (v != null) { + return new KeyExtractorForBytesRef.ForVector(encoder, nul, nonNul, v); + } + if (ascending) { + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorForBytesRef.MinForAscending(encoder, nul, nonNul, block) + : new KeyExtractorForBytesRef.MinForUnordered(encoder, nul, nonNul, block); + } + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorForBytesRef.MaxForAscending(encoder, nul, nonNul, block) + : new KeyExtractorForBytesRef.MaxForUnordered(encoder, nul, nonNul, block); + } + + private final TopNEncoder encoder; + protected final BytesRef scratch = new BytesRef(); + private final byte nul; + private final byte nonNul; + + KeyExtractorForBytesRef(TopNEncoder encoder, byte nul, byte nonNul) { + this.encoder = encoder; + this.nul = nul; + this.nonNul = nonNul; + } + + protected final int nonNul(BytesRefBuilder key, BytesRef value) { + key.append(nonNul); + return encoder.encodeBytesRef(value, key) + 1; + } + + protected final int nul(BytesRefBuilder key) { + key.append(nul); + return 1; + } + + static class ForVector extends KeyExtractorForBytesRef { + private final BytesRefVector vector; + + ForVector(TopNEncoder encoder, byte nul, byte nonNul, BytesRefVector vector) { + super(encoder, nul, nonNul); + this.vector = vector; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + return nonNul(key, vector.getBytesRef(position, scratch)); + } + } + + static class MinForAscending extends KeyExtractorForBytesRef { + private final BytesRefBlock block; + + MinForAscending(TopNEncoder encoder, byte nul, byte nonNul, BytesRefBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } + return nonNul(key, block.getBytesRef(block.getFirstValueIndex(position), scratch)); + } + } + + static class MaxForAscending extends KeyExtractorForBytesRef { + private final BytesRefBlock block; + + MaxForAscending(TopNEncoder encoder, byte nul, byte nonNul, BytesRefBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } + return nonNul(key, block.getBytesRef(block.getFirstValueIndex(position) + block.getValueCount(position) - 1, scratch)); + } + } + + static class MinForUnordered extends KeyExtractorForBytesRef { + private final BytesRefBlock block; + + private final BytesRef minScratch = new BytesRef(); + + MinForUnordered(TopNEncoder encoder, byte nul, byte nonNul, BytesRefBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; + BytesRef min = block.getBytesRef(start, minScratch); + for (int i = start; i < end; i++) { + BytesRef v = block.getBytesRef(i, scratch); + if (v.compareTo(min) < 0) { + min.bytes = v.bytes; + min.offset = v.offset; + min.length = v.length; + } + } + return nonNul(key, min); + } + } + + static class MaxForUnordered extends KeyExtractorForBytesRef { + private final BytesRefBlock block; + + private final BytesRef maxScratch = new BytesRef(); + + MaxForUnordered(TopNEncoder encoder, byte nul, byte nonNul, BytesRefBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; + BytesRef max = block.getBytesRef(start, maxScratch); + for (int i = start; i < end; i++) { + BytesRef v = block.getBytesRef(i, scratch); + if (v.compareTo(max) > 0) { + max.bytes = v.bytes; + max.offset = v.offset; + max.length = v.length; + } + } + return nonNul(key, max); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForDouble.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForDouble.java new file mode 100644 index 0000000000000..8d8458d33ab47 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForDouble.java @@ -0,0 +1,146 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.DoubleVector; + +abstract class KeyExtractorForDouble implements KeyExtractor { + static KeyExtractorForDouble extractorFor(TopNEncoder encoder, boolean ascending, byte nul, byte nonNul, DoubleBlock block) { + DoubleVector v = block.asVector(); + if (v != null) { + return new KeyExtractorForDouble.ForVector(encoder, nul, nonNul, v); + } + if (ascending) { + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorForDouble.MinForAscending(encoder, nul, nonNul, block) + : new KeyExtractorForDouble.MinForUnordered(encoder, nul, nonNul, block); + } + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorForDouble.MaxForAscending(encoder, nul, nonNul, block) + : new KeyExtractorForDouble.MaxForUnordered(encoder, nul, nonNul, block); + } + + private final byte nul; + private final byte nonNul; + + KeyExtractorForDouble(TopNEncoder encoder, byte nul, byte nonNul) { + assert encoder == TopNEncoder.DEFAULT_SORTABLE; + this.nul = nul; + this.nonNul = nonNul; + } + + protected final int nonNul(BytesRefBuilder key, double value) { + key.append(nonNul); + TopNEncoder.DEFAULT_SORTABLE.encodeDouble(value, key); + return Double.BYTES + 1; + } + + protected final int nul(BytesRefBuilder key) { + key.append(nul); + return 1; + } + + static class ForVector extends KeyExtractorForDouble { + private final DoubleVector vector; + + ForVector(TopNEncoder encoder, byte nul, byte nonNul, DoubleVector vector) { + super(encoder, nul, nonNul); + this.vector = vector; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + return nonNul(key, vector.getDouble(position)); + } + } + + static class MinForAscending extends KeyExtractorForDouble { + private final DoubleBlock block; + + MinForAscending(TopNEncoder encoder, byte nul, byte nonNul, DoubleBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } + return nonNul(key, block.getDouble(block.getFirstValueIndex(position))); + } + } + + static class MaxForAscending extends KeyExtractorForDouble { + private final DoubleBlock block; + + MaxForAscending(TopNEncoder encoder, byte nul, byte nonNul, DoubleBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } + return nonNul(key, block.getDouble(block.getFirstValueIndex(position) + block.getValueCount(position) - 1)); + } + } + + static class MinForUnordered extends KeyExtractorForDouble { + private final DoubleBlock block; + + MinForUnordered(TopNEncoder encoder, byte nul, byte nonNul, DoubleBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; + double min = block.getDouble(start); + for (int i = start + 1; i < end; i++) { + min = Math.min(min, block.getDouble(i)); + } + return nonNul(key, min); + } + } + + static class MaxForUnordered extends KeyExtractorForDouble { + private final DoubleBlock block; + + MaxForUnordered(TopNEncoder encoder, byte nul, byte nonNul, DoubleBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; + double max = block.getDouble(start); + for (int i = start + 1; i < end; i++) { + max = Math.max(max, block.getDouble(i)); + } + return nonNul(key, max); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForInt.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForInt.java new file mode 100644 index 0000000000000..9c20f53689b0a --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForInt.java @@ -0,0 +1,146 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; + +abstract class KeyExtractorForInt implements KeyExtractor { + static KeyExtractorForInt extractorFor(TopNEncoder encoder, boolean ascending, byte nul, byte nonNul, IntBlock block) { + IntVector v = block.asVector(); + if (v != null) { + return new KeyExtractorForInt.ForVector(encoder, nul, nonNul, v); + } + if (ascending) { + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorForInt.MinForAscending(encoder, nul, nonNul, block) + : new KeyExtractorForInt.MinForUnordered(encoder, nul, nonNul, block); + } + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorForInt.MaxForAscending(encoder, nul, nonNul, block) + : new KeyExtractorForInt.MaxForUnordered(encoder, nul, nonNul, block); + } + + private final byte nul; + private final byte nonNul; + + KeyExtractorForInt(TopNEncoder encoder, byte nul, byte nonNul) { + assert encoder == TopNEncoder.DEFAULT_SORTABLE; + this.nul = nul; + this.nonNul = nonNul; + } + + protected final int nonNul(BytesRefBuilder key, int value) { + key.append(nonNul); + TopNEncoder.DEFAULT_SORTABLE.encodeInt(value, key); + return Integer.BYTES + 1; + } + + protected final int nul(BytesRefBuilder key) { + key.append(nul); + return 1; + } + + static class ForVector extends KeyExtractorForInt { + private final IntVector vector; + + ForVector(TopNEncoder encoder, byte nul, byte nonNul, IntVector vector) { + super(encoder, nul, nonNul); + this.vector = vector; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + return nonNul(key, vector.getInt(position)); + } + } + + static class MinForAscending extends KeyExtractorForInt { + private final IntBlock block; + + MinForAscending(TopNEncoder encoder, byte nul, byte nonNul, IntBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } + return nonNul(key, block.getInt(block.getFirstValueIndex(position))); + } + } + + static class MaxForAscending extends KeyExtractorForInt { + private final IntBlock block; + + MaxForAscending(TopNEncoder encoder, byte nul, byte nonNul, IntBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } + return nonNul(key, block.getInt(block.getFirstValueIndex(position) + block.getValueCount(position) - 1)); + } + } + + static class MinForUnordered extends KeyExtractorForInt { + private final IntBlock block; + + MinForUnordered(TopNEncoder encoder, byte nul, byte nonNul, IntBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; + int min = block.getInt(start); + for (int i = start + 1; i < end; i++) { + min = Math.min(min, block.getInt(i)); + } + return nonNul(key, min); + } + } + + static class MaxForUnordered extends KeyExtractorForInt { + private final IntBlock block; + + MaxForUnordered(TopNEncoder encoder, byte nul, byte nonNul, IntBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; + int max = block.getInt(start); + for (int i = start + 1; i < end; i++) { + max = Math.max(max, block.getInt(i)); + } + return nonNul(key, max); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForLong.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForLong.java new file mode 100644 index 0000000000000..5ad6c8d9602a8 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/KeyExtractorForLong.java @@ -0,0 +1,146 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.LongVector; + +abstract class KeyExtractorForLong implements KeyExtractor { + static KeyExtractorForLong extractorFor(TopNEncoder encoder, boolean ascending, byte nul, byte nonNul, LongBlock block) { + LongVector v = block.asVector(); + if (v != null) { + return new KeyExtractorForLong.ForVector(encoder, nul, nonNul, v); + } + if (ascending) { + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorForLong.MinForAscending(encoder, nul, nonNul, block) + : new KeyExtractorForLong.MinForUnordered(encoder, nul, nonNul, block); + } + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorForLong.MaxForAscending(encoder, nul, nonNul, block) + : new KeyExtractorForLong.MaxForUnordered(encoder, nul, nonNul, block); + } + + private final byte nul; + private final byte nonNul; + + KeyExtractorForLong(TopNEncoder encoder, byte nul, byte nonNul) { + assert encoder == TopNEncoder.DEFAULT_SORTABLE; + this.nul = nul; + this.nonNul = nonNul; + } + + protected final int nonNul(BytesRefBuilder key, long value) { + key.append(nonNul); + TopNEncoder.DEFAULT_SORTABLE.encodeLong(value, key); + return Long.BYTES + 1; + } + + protected final int nul(BytesRefBuilder key) { + key.append(nul); + return 1; + } + + static class ForVector extends KeyExtractorForLong { + private final LongVector vector; + + ForVector(TopNEncoder encoder, byte nul, byte nonNul, LongVector vector) { + super(encoder, nul, nonNul); + this.vector = vector; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + return nonNul(key, vector.getLong(position)); + } + } + + static class MinForAscending extends KeyExtractorForLong { + private final LongBlock block; + + MinForAscending(TopNEncoder encoder, byte nul, byte nonNul, LongBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } + return nonNul(key, block.getLong(block.getFirstValueIndex(position))); + } + } + + static class MaxForAscending extends KeyExtractorForLong { + private final LongBlock block; + + MaxForAscending(TopNEncoder encoder, byte nul, byte nonNul, LongBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } + return nonNul(key, block.getLong(block.getFirstValueIndex(position) + block.getValueCount(position) - 1)); + } + } + + static class MinForUnordered extends KeyExtractorForLong { + private final LongBlock block; + + MinForUnordered(TopNEncoder encoder, byte nul, byte nonNul, LongBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; + long min = block.getLong(start); + for (int i = start + 1; i < end; i++) { + min = Math.min(min, block.getLong(i)); + } + return nonNul(key, min); + } + } + + static class MaxForUnordered extends KeyExtractorForLong { + private final LongBlock block; + + MaxForUnordered(TopNEncoder encoder, byte nul, byte nonNul, LongBlock block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; + long max = block.getLong(start); + for (int i = start + 1; i < end; i++) { + max = Math.max(max, block.getLong(i)); + } + return nonNul(key, max); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForBoolean.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForBoolean.java new file mode 100644 index 0000000000000..50cef0417dd45 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForBoolean.java @@ -0,0 +1,66 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.BooleanBlock; + +class ResultBuilderForBoolean implements ResultBuilder { + private final BooleanBlock.Builder builder; + + private final boolean inKey; + + /** + * The value previously set by {@link #decodeKey}. + */ + private boolean key; + + ResultBuilderForBoolean(TopNEncoder encoder, boolean inKey, int initialSize) { + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE : encoder.toString(); + this.inKey = inKey; + this.builder = BooleanBlock.newBlockBuilder(initialSize); + } + + @Override + public void decodeKey(BytesRef keys) { + assert inKey; + key = TopNEncoder.DEFAULT_SORTABLE.decodeBoolean(keys); + } + + @Override + public void decodeValue(BytesRef values) { + int count = TopNEncoder.DEFAULT_UNSORTABLE.decodeVInt(values); + switch (count) { + case 0 -> { + builder.appendNull(); + } + case 1 -> builder.appendBoolean(inKey ? key : readValueFromValues(values)); + default -> { + builder.beginPositionEntry(); + for (int i = 0; i < count; i++) { + builder.appendBoolean(readValueFromValues(values)); + } + builder.endPositionEntry(); + } + } + } + + private boolean readValueFromValues(BytesRef values) { + return TopNEncoder.DEFAULT_UNSORTABLE.decodeBoolean(values); + } + + @Override + public BooleanBlock build() { + return builder.build(); + } + + @Override + public String toString() { + return "ResultBuilderForBoolean[inKey=" + inKey + "]"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForBytesRef.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForBytesRef.java new file mode 100644 index 0000000000000..55f324c931b67 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForBytesRef.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.BytesRefBlock; + +class ResultBuilderForBytesRef implements ResultBuilder { + private final BytesRefBlock.Builder builder; + + private final boolean inKey; + + private final TopNEncoder encoder; + + private final BytesRef scratch = new BytesRef(); + + /** + * The value previously set by {@link #decodeKey}. + */ + private BytesRef key; + + ResultBuilderForBytesRef(TopNEncoder encoder, boolean inKey, int initialSize) { + this.encoder = encoder; + this.inKey = inKey; + this.builder = BytesRefBlock.newBlockBuilder(initialSize); + } + + @Override + public void decodeKey(BytesRef keys) { + assert inKey; + key = encoder.toSortable().decodeBytesRef(keys, scratch); + } + + @Override + public void decodeValue(BytesRef values) { + int count = TopNEncoder.DEFAULT_UNSORTABLE.decodeVInt(values); + switch (count) { + case 0 -> { + builder.appendNull(); + } + case 1 -> builder.appendBytesRef(inKey ? key : readValueFromValues(values)); + default -> { + builder.beginPositionEntry(); + for (int i = 0; i < count; i++) { + builder.appendBytesRef(readValueFromValues(values)); + } + builder.endPositionEntry(); + } + } + } + + private BytesRef readValueFromValues(BytesRef values) { + return encoder.toUnsortable().decodeBytesRef(values, scratch); + } + + @Override + public BytesRefBlock build() { + return builder.build(); + } + + @Override + public String toString() { + return "ResultBuilderForBytesRef[inKey=" + inKey + "]"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForDouble.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForDouble.java new file mode 100644 index 0000000000000..ed4a9b45d90dc --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForDouble.java @@ -0,0 +1,66 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.DoubleBlock; + +class ResultBuilderForDouble implements ResultBuilder { + private final DoubleBlock.Builder builder; + + private final boolean inKey; + + /** + * The value previously set by {@link #decodeKey}. + */ + private double key; + + ResultBuilderForDouble(TopNEncoder encoder, boolean inKey, int initialSize) { + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE : encoder.toString(); + this.inKey = inKey; + this.builder = DoubleBlock.newBlockBuilder(initialSize); + } + + @Override + public void decodeKey(BytesRef keys) { + assert inKey; + key = TopNEncoder.DEFAULT_SORTABLE.decodeDouble(keys); + } + + @Override + public void decodeValue(BytesRef values) { + int count = TopNEncoder.DEFAULT_UNSORTABLE.decodeVInt(values); + switch (count) { + case 0 -> { + builder.appendNull(); + } + case 1 -> builder.appendDouble(inKey ? key : readValueFromValues(values)); + default -> { + builder.beginPositionEntry(); + for (int i = 0; i < count; i++) { + builder.appendDouble(readValueFromValues(values)); + } + builder.endPositionEntry(); + } + } + } + + private double readValueFromValues(BytesRef values) { + return TopNEncoder.DEFAULT_UNSORTABLE.decodeDouble(values); + } + + @Override + public DoubleBlock build() { + return builder.build(); + } + + @Override + public String toString() { + return "ResultBuilderForDouble[inKey=" + inKey + "]"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForInt.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForInt.java new file mode 100644 index 0000000000000..2bcfc81107445 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForInt.java @@ -0,0 +1,66 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.IntBlock; + +class ResultBuilderForInt implements ResultBuilder { + private final IntBlock.Builder builder; + + private final boolean inKey; + + /** + * The value previously set by {@link #decodeKey}. + */ + private int key; + + ResultBuilderForInt(TopNEncoder encoder, boolean inKey, int initialSize) { + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE : encoder.toString(); + this.inKey = inKey; + this.builder = IntBlock.newBlockBuilder(initialSize); + } + + @Override + public void decodeKey(BytesRef keys) { + assert inKey; + key = TopNEncoder.DEFAULT_SORTABLE.decodeInt(keys); + } + + @Override + public void decodeValue(BytesRef values) { + int count = TopNEncoder.DEFAULT_UNSORTABLE.decodeVInt(values); + switch (count) { + case 0 -> { + builder.appendNull(); + } + case 1 -> builder.appendInt(inKey ? key : readValueFromValues(values)); + default -> { + builder.beginPositionEntry(); + for (int i = 0; i < count; i++) { + builder.appendInt(readValueFromValues(values)); + } + builder.endPositionEntry(); + } + } + } + + private int readValueFromValues(BytesRef values) { + return TopNEncoder.DEFAULT_UNSORTABLE.decodeInt(values); + } + + @Override + public IntBlock build() { + return builder.build(); + } + + @Override + public String toString() { + return "ResultBuilderForInt[inKey=" + inKey + "]"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForLong.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForLong.java new file mode 100644 index 0000000000000..3ada85bf9d5c9 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ResultBuilderForLong.java @@ -0,0 +1,66 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.LongBlock; + +class ResultBuilderForLong implements ResultBuilder { + private final LongBlock.Builder builder; + + private final boolean inKey; + + /** + * The value previously set by {@link #decodeKey}. + */ + private long key; + + ResultBuilderForLong(TopNEncoder encoder, boolean inKey, int initialSize) { + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE : encoder.toString(); + this.inKey = inKey; + this.builder = LongBlock.newBlockBuilder(initialSize); + } + + @Override + public void decodeKey(BytesRef keys) { + assert inKey; + key = TopNEncoder.DEFAULT_SORTABLE.decodeLong(keys); + } + + @Override + public void decodeValue(BytesRef values) { + int count = TopNEncoder.DEFAULT_UNSORTABLE.decodeVInt(values); + switch (count) { + case 0 -> { + builder.appendNull(); + } + case 1 -> builder.appendLong(inKey ? key : readValueFromValues(values)); + default -> { + builder.beginPositionEntry(); + for (int i = 0; i < count; i++) { + builder.appendLong(readValueFromValues(values)); + } + builder.endPositionEntry(); + } + } + } + + private long readValueFromValues(BytesRef values) { + return TopNEncoder.DEFAULT_UNSORTABLE.decodeLong(values); + } + + @Override + public LongBlock build() { + return builder.build(); + } + + @Override + public String toString() { + return "ResultBuilderForLong[inKey=" + inKey + "]"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForBoolean.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForBoolean.java new file mode 100644 index 0000000000000..0136c795746d0 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForBoolean.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BooleanVector; + +abstract class ValueExtractorForBoolean implements ValueExtractor { + static ValueExtractorForBoolean extractorFor(TopNEncoder encoder, boolean inKey, BooleanBlock block) { + BooleanVector vector = block.asVector(); + if (vector != null) { + return new ValueExtractorForBoolean.ForVector(encoder, inKey, vector); + } + return new ValueExtractorForBoolean.ForBlock(encoder, inKey, block); + } + + protected final boolean inKey; + + ValueExtractorForBoolean(TopNEncoder encoder, boolean inKey) { + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE : encoder.toString(); + this.inKey = inKey; + } + + protected final void writeCount(BytesRefBuilder values, int count) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeVInt(count, values); + } + + protected final void actualWriteValue(BytesRefBuilder values, boolean value) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeBoolean(value, values); + } + + static class ForVector extends ValueExtractorForBoolean { + private final BooleanVector vector; + + ForVector(TopNEncoder encoder, boolean inKey, BooleanVector vector) { + super(encoder, inKey); + this.vector = vector; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + writeCount(values, 1); + if (inKey) { + // will read results from the key + return; + } + actualWriteValue(values, vector.getBoolean(position)); + } + } + + static class ForBlock extends ValueExtractorForBoolean { + private final BooleanBlock block; + + ForBlock(TopNEncoder encoder, boolean inKey, BooleanBlock block) { + super(encoder, inKey); + this.block = block; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + int size = block.getValueCount(position); + writeCount(values, size); + if (size == 1 && inKey) { + // Will read results from the key + return; + } + int start = block.getFirstValueIndex(position); + int end = start + size; + for (int i = start; i < end; i++) { + actualWriteValue(values, block.getBoolean(i)); + } + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForBytesRef.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForBytesRef.java new file mode 100644 index 0000000000000..97b2ce6da5e9b --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForBytesRef.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; + +abstract class ValueExtractorForBytesRef implements ValueExtractor { + static ValueExtractorForBytesRef extractorFor(TopNEncoder encoder, boolean inKey, BytesRefBlock block) { + BytesRefVector vector = block.asVector(); + if (vector != null) { + return new ValueExtractorForBytesRef.ForVector(encoder, inKey, vector); + } + return new ValueExtractorForBytesRef.ForBlock(encoder, inKey, block); + } + + private final TopNEncoder encoder; + + protected final BytesRef scratch = new BytesRef(); + + protected final boolean inKey; + + ValueExtractorForBytesRef(TopNEncoder encoder, boolean inKey) { + this.encoder = encoder; + this.inKey = inKey; + } + + protected final void writeCount(BytesRefBuilder values, int count) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeVInt(count, values); + } + + protected final void actualWriteValue(BytesRefBuilder values, BytesRef value) { + encoder.encodeBytesRef(value, values); + } + + static class ForVector extends ValueExtractorForBytesRef { + private final BytesRefVector vector; + + ForVector(TopNEncoder encoder, boolean inKey, BytesRefVector vector) { + super(encoder, inKey); + this.vector = vector; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + writeCount(values, 1); + if (inKey) { + // will read results from the key + return; + } + actualWriteValue(values, vector.getBytesRef(position, scratch)); + } + } + + static class ForBlock extends ValueExtractorForBytesRef { + private final BytesRefBlock block; + + ForBlock(TopNEncoder encoder, boolean inKey, BytesRefBlock block) { + super(encoder, inKey); + this.block = block; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + int size = block.getValueCount(position); + writeCount(values, size); + if (size == 1 && inKey) { + // Will read results from the key + return; + } + int start = block.getFirstValueIndex(position); + int end = start + size; + for (int i = start; i < end; i++) { + actualWriteValue(values, block.getBytesRef(i, scratch)); + } + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForDouble.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForDouble.java new file mode 100644 index 0000000000000..0bceeea462283 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForDouble.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.DoubleVector; + +abstract class ValueExtractorForDouble implements ValueExtractor { + static ValueExtractorForDouble extractorFor(TopNEncoder encoder, boolean inKey, DoubleBlock block) { + DoubleVector vector = block.asVector(); + if (vector != null) { + return new ValueExtractorForDouble.ForVector(encoder, inKey, vector); + } + return new ValueExtractorForDouble.ForBlock(encoder, inKey, block); + } + + protected final boolean inKey; + + ValueExtractorForDouble(TopNEncoder encoder, boolean inKey) { + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE : encoder.toString(); + this.inKey = inKey; + } + + protected final void writeCount(BytesRefBuilder values, int count) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeVInt(count, values); + } + + protected final void actualWriteValue(BytesRefBuilder values, double value) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeDouble(value, values); + } + + static class ForVector extends ValueExtractorForDouble { + private final DoubleVector vector; + + ForVector(TopNEncoder encoder, boolean inKey, DoubleVector vector) { + super(encoder, inKey); + this.vector = vector; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + writeCount(values, 1); + if (inKey) { + // will read results from the key + return; + } + actualWriteValue(values, vector.getDouble(position)); + } + } + + static class ForBlock extends ValueExtractorForDouble { + private final DoubleBlock block; + + ForBlock(TopNEncoder encoder, boolean inKey, DoubleBlock block) { + super(encoder, inKey); + this.block = block; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + int size = block.getValueCount(position); + writeCount(values, size); + if (size == 1 && inKey) { + // Will read results from the key + return; + } + int start = block.getFirstValueIndex(position); + int end = start + size; + for (int i = start; i < end; i++) { + actualWriteValue(values, block.getDouble(i)); + } + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForInt.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForInt.java new file mode 100644 index 0000000000000..28156ccb87cf7 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForInt.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; + +abstract class ValueExtractorForInt implements ValueExtractor { + static ValueExtractorForInt extractorFor(TopNEncoder encoder, boolean inKey, IntBlock block) { + IntVector vector = block.asVector(); + if (vector != null) { + return new ValueExtractorForInt.ForVector(encoder, inKey, vector); + } + return new ValueExtractorForInt.ForBlock(encoder, inKey, block); + } + + protected final boolean inKey; + + ValueExtractorForInt(TopNEncoder encoder, boolean inKey) { + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE : encoder.toString(); + this.inKey = inKey; + } + + protected final void writeCount(BytesRefBuilder values, int count) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeVInt(count, values); + } + + protected final void actualWriteValue(BytesRefBuilder values, int value) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeInt(value, values); + } + + static class ForVector extends ValueExtractorForInt { + private final IntVector vector; + + ForVector(TopNEncoder encoder, boolean inKey, IntVector vector) { + super(encoder, inKey); + this.vector = vector; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + writeCount(values, 1); + if (inKey) { + // will read results from the key + return; + } + actualWriteValue(values, vector.getInt(position)); + } + } + + static class ForBlock extends ValueExtractorForInt { + private final IntBlock block; + + ForBlock(TopNEncoder encoder, boolean inKey, IntBlock block) { + super(encoder, inKey); + this.block = block; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + int size = block.getValueCount(position); + writeCount(values, size); + if (size == 1 && inKey) { + // Will read results from the key + return; + } + int start = block.getFirstValueIndex(position); + int end = start + size; + for (int i = start; i < end; i++) { + actualWriteValue(values, block.getInt(i)); + } + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForLong.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForLong.java new file mode 100644 index 0000000000000..aec9aaf11c919 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/operator/topn/ValueExtractorForLong.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.LongVector; + +abstract class ValueExtractorForLong implements ValueExtractor { + static ValueExtractorForLong extractorFor(TopNEncoder encoder, boolean inKey, LongBlock block) { + LongVector vector = block.asVector(); + if (vector != null) { + return new ValueExtractorForLong.ForVector(encoder, inKey, vector); + } + return new ValueExtractorForLong.ForBlock(encoder, inKey, block); + } + + protected final boolean inKey; + + ValueExtractorForLong(TopNEncoder encoder, boolean inKey) { + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE : encoder.toString(); + this.inKey = inKey; + } + + protected final void writeCount(BytesRefBuilder values, int count) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeVInt(count, values); + } + + protected final void actualWriteValue(BytesRefBuilder values, long value) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeLong(value, values); + } + + static class ForVector extends ValueExtractorForLong { + private final LongVector vector; + + ForVector(TopNEncoder encoder, boolean inKey, LongVector vector) { + super(encoder, inKey); + this.vector = vector; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + writeCount(values, 1); + if (inKey) { + // will read results from the key + return; + } + actualWriteValue(values, vector.getLong(position)); + } + } + + static class ForBlock extends ValueExtractorForLong { + private final LongBlock block; + + ForBlock(TopNEncoder encoder, boolean inKey, LongBlock block) { + super(encoder, inKey); + this.block = block; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + int size = block.getValueCount(position); + writeCount(values, size); + if (size == 1 && inKey) { + // Will read results from the key + return; + } + int start = block.getFirstValueIndex(position); + int end = start + size; + for (int i = start; i < end; i++) { + actualWriteValue(values, block.getLong(i)); + } + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/module-info.java b/x-pack/plugin/esql/compute/src/main/java/module-info.java index 280f2467a566c..69aa6f5bb217a 100644 --- a/x-pack/plugin/esql/compute/src/main/java/module-info.java +++ b/x-pack/plugin/esql/compute/src/main/java/module-info.java @@ -23,4 +23,5 @@ exports org.elasticsearch.compute.operator; exports org.elasticsearch.compute.operator.exchange; exports org.elasticsearch.compute.aggregation.blockhash; + exports org.elasticsearch.compute.operator.topn; } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java index 7d14241801352..18063e5c96d3e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java @@ -47,6 +47,19 @@ public Block filter(int... positions) { return new DocBlock(asVector().filter(positions)); } + @Override + public int hashCode() { + return vector.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof DocBlock == false) { + return false; + } + return vector.equals(((DocBlock) obj).vector); + } + /** * A builder the for {@link DocBlock}. */ diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java index a17ab3d64a706..8d9d7af3474dd 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java @@ -9,6 +9,8 @@ import org.apache.lucene.util.IntroSorter; +import java.util.Objects; + /** * {@link Vector} where each entry references a lucene document. */ @@ -178,4 +180,18 @@ public ElementType elementType() { public boolean isConstant() { return shards.isConstant() && segments.isConstant() && docs.isConstant(); } + + @Override + public int hashCode() { + return Objects.hash(shards, segments, docs); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof DocVector == false) { + return false; + } + DocVector other = (DocVector) obj; + return shards.equals(other.shards) && segments.equals(other.segments) && docs.equals(other.docs); + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/FixedLengthTopNEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/FixedLengthTopNEncoder.java deleted file mode 100644 index 05629e93572bb..0000000000000 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/FixedLengthTopNEncoder.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.compute.operator; - -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; - -public class FixedLengthTopNEncoder implements TopNEncoder { - - @Override - public void encodeBytesRef(BytesRef value, BytesRefBuilder bytesRefBuilder) { - bytesRefBuilder.append(value); - } - - @Override - public String toString() { - return "FixedLengthTopNEncoder"; - } -} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/TopNEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/TopNEncoder.java deleted file mode 100644 index f8fd7c0c10e5a..0000000000000 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/TopNEncoder.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.compute.operator; - -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.NumericUtils; - -/** - * Defines a default BytesRef encoding behavior for all block types, leaving text based types for concrete implementations. - */ -public interface TopNEncoder { - - default void encodeLong(long value, BytesRefBuilder bytesRefBuilder) { - bytesRefBuilder.grow(bytesRefBuilder.length() + Long.BYTES); - NumericUtils.longToSortableBytes(value, bytesRefBuilder.bytes(), bytesRefBuilder.length()); - bytesRefBuilder.setLength(bytesRefBuilder.length() + Long.BYTES); - } - - default void encodeInteger(int value, BytesRefBuilder bytesRefBuilder) { - bytesRefBuilder.grow(bytesRefBuilder.length() + Integer.BYTES); - NumericUtils.intToSortableBytes(value, bytesRefBuilder.bytes(), bytesRefBuilder.length()); - bytesRefBuilder.setLength(bytesRefBuilder.length() + Integer.BYTES); - } - - default void encodeDouble(double value, BytesRefBuilder bytesRefBuilder) { - bytesRefBuilder.grow(bytesRefBuilder.length() + Long.BYTES); - NumericUtils.longToSortableBytes(NumericUtils.doubleToSortableLong(value), bytesRefBuilder.bytes(), bytesRefBuilder.length()); - bytesRefBuilder.setLength(bytesRefBuilder.length() + Long.BYTES); - } - - default void encodeBoolean(boolean value, BytesRefBuilder bytesRefBuilder) { - var bytes = new byte[] { value ? (byte) 1 : (byte) 0 }; - bytesRefBuilder.append(bytes, 0, 1); - } - - void encodeBytesRef(BytesRef value, BytesRefBuilder bytesRefBuilder); - -} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/TopNOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/TopNOperator.java deleted file mode 100644 index a7832f58727c1..0000000000000 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/TopNOperator.java +++ /dev/null @@ -1,699 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.compute.operator; - -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.PriorityQueue; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.BooleanBlock; -import org.elasticsearch.compute.data.BytesRefBlock; -import org.elasticsearch.compute.data.DocBlock; -import org.elasticsearch.compute.data.DocVector; -import org.elasticsearch.compute.data.DoubleBlock; -import org.elasticsearch.compute.data.ElementType; -import org.elasticsearch.compute.data.IntBlock; -import org.elasticsearch.compute.data.LongBlock; -import org.elasticsearch.compute.data.Page; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.BitSet; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; - -/** - * An operator that sorts "rows" of values by encoding the values to sort on, as bytes (using BytesRef). Each data type is encoded - * in a specific way, defined by methods of a TopNEncoder. All the values used to sort a specific row (think of column/block 3 - * and column/block 6) are converted/encoded in a byte array and the concatenated bytes are all compared in bulk. - * For now, the only values that have a special "treatment" when it comes to encoding are the text-based ones (text, keyword, ip, version). - * For each "special" encoding there is should be new TopNEncoder implementation. See {@link UTF8TopNEncoder} for encoding regular - * "text" and "keyword" data types. See LocalExecutionPlanner for which data type uses which encoder. - * - * This Operator will not be able to sort binary values (encoded as BytesRef) because the bytes used as separator and "null"s can appear - * as valid bytes inside a binary value. - */ -public class TopNOperator implements Operator { - - private static final byte SEPARATOR = 0x0; // separator for values inside the BytesRef sorting key - private static final byte SMALL_NULL = 0x01; // "null" representation for "nulls first" - private static final byte BIG_NULL = 0x02; // "null" representation for "nulls last" - public static final TopNEncoder BYTESREF_FIXED_LENGTH_ENCODER = new FixedLengthTopNEncoder(); - public static final TopNEncoder BYTESREF_UTF8_ENCODER = new UTF8TopNEncoder(); - public static final TopNEncoder DEFAULT_ENCODER = new TopNEncoder() { - @Override - public void encodeBytesRef(BytesRef value, BytesRefBuilder bytesRefBuilder) { - throw new IllegalStateException("Cannot find encoder for BytesRef value"); - } - - @Override - public String toString() { - return "DefaultEncoder"; - } - }; - - // enum to be extended in the future with other sorting modes (AVG average, for example) - private enum MvSortMode { - MIN, - MAX - } - - /** - * Internal row to be used in the PriorityQueue instead of the full blown Page. - * It mirrors somehow the Block build in the sense that it keeps around an array of offsets and a count of values (to account for - * multivalues) to reference each position in each block of the Page. - */ - static final class Row { - boolean[] booleans; - int[] ints; - long[] longs; - double[] doubles; - BytesRef[] byteRefs; - int[] docs; - boolean[] nullValues; - - int[] idToFirstValueIndex; // keeps the offset inside each of the arrays above where a specific block position starts from - ElementType[] idToType; - int[] numberOfValues; // keeps the count of values of each field in the specialized array - - BitSet blockIsUnordered; - BytesRefBuilder orderByCompositeKey = new BytesRefBuilder(); // BytesRef used to sort rows between each other - /** - * A true/false value (bit set/unset) for each byte in the BytesRef above corresponding to an asc/desc ordering. - * For ex, if a Long is represented as 8 bytes, each of these bytes will have the same value (set/unset) if the respective Long - * value is used for sorting ascending/descending. - */ - BitSet orderByCompositeKeyAscending; - - boolean isNull(int i) { - return nullValues[i]; - } - - boolean getBoolean(int i, int offset) { - return booleans[idToFirstValueIndex[i] + offset]; - } - - int getInt(int i, int offset) { - return ints[idToFirstValueIndex[i] + offset]; - } - - long getLong(int i, int offset) { - return longs[idToFirstValueIndex[i] + offset]; - } - - double getDouble(int i, int offset) { - return doubles[idToFirstValueIndex[i] + offset]; - } - - BytesRef getBytesRef(int i, int offset) { - return byteRefs[idToFirstValueIndex[i] + offset]; - } - - boolean blockIsUnordered(int i) { - return blockIsUnordered.get(i); - } - } - - static final class RowFactory { - - int size; - int nBooleans; - int nInts; - int nLongs; - int nDoubles; - int nByteRefs; - int nDocs; - - ElementType[] idToType; - - RowFactory(Page page) { - size = page.getBlockCount(); - idToType = new ElementType[size]; - for (int i = 0; i < size; i++) { - Block block = page.getBlock(i); - switch (block.elementType()) { - case LONG -> nLongs++; - case INT -> nInts++; - case DOUBLE -> nDoubles++; - case BYTES_REF -> nByteRefs++; - case BOOLEAN -> nBooleans++; - case DOC -> nDocs++; - case NULL -> { - } - case UNKNOWN -> { - assert false : "Must not occur here as TopN should never receive intermediate blocks"; - throw new UnsupportedOperationException("Block doesn't support retrieving elements"); - } - } - idToType[i] = block.elementType(); - } - } - - Row row(Page origin, int rowNum, Row spare, List sortOrders) { - Row result; - if (spare == null) { - result = new Row(); - result.nullValues = new boolean[size]; - result.booleans = new boolean[nBooleans]; - result.ints = new int[nInts]; - result.longs = new long[nLongs]; - result.doubles = new double[nDoubles]; - result.byteRefs = new BytesRef[nByteRefs]; - for (int i = 0; i < nByteRefs; i++) { - result.byteRefs[i] = new BytesRef(); - } - result.idToFirstValueIndex = new int[size]; - result.idToType = idToType; - result.docs = new int[nDocs * 3]; - result.numberOfValues = new int[size]; - result.orderByCompositeKeyAscending = new BitSet(); - } else { - result = spare; - // idToType has to be set because different pages could have different block types due to different mappings - result.idToType = idToType; - Arrays.fill(result.nullValues, false); - result.orderByCompositeKey = new BytesRefBuilder(); - result.orderByCompositeKeyAscending.clear(); - } - result.blockIsUnordered = new BitSet(size); - - int lastLongFirstValueIndex = 0; - int lastIntFirstValueIndex = 0; - int lastDoubleFirstValueIndex = 0; - int lastBytesRefFirstValueIndex = 0; - int lastBooleanFirstValueIndex = 0; - int lastDocFirstValueIndex = 0; - - for (int i = 0; i < size; i++) { - Block block = origin.getBlock(i); - if (block.mvOrdering() == Block.MvOrdering.UNORDERED) { - result.blockIsUnordered.set(i); - } - if (block.isNull(rowNum)) { - result.nullValues[i] = true; - } else { - int valuesCount = block.getValueCount(rowNum); - result.numberOfValues[i] = valuesCount; - switch (block.elementType()) { - case LONG -> { - int firstValueIndex = lastLongFirstValueIndex; - if (firstValueIndex + valuesCount > result.longs.length) { - result.longs = Arrays.copyOf(result.longs, firstValueIndex + valuesCount); - } - int start = block.getFirstValueIndex(rowNum); - int end = start + valuesCount; - for (int j = start, offset = 0; j < end; j++, offset++) { - result.longs[firstValueIndex + offset] = ((LongBlock) block).getLong(j); - } - result.idToFirstValueIndex[i] = firstValueIndex; - lastLongFirstValueIndex = firstValueIndex + valuesCount; - } - case INT -> { - int firstValueIndex = lastIntFirstValueIndex; - if (firstValueIndex + valuesCount > result.ints.length) { - result.ints = Arrays.copyOf(result.ints, firstValueIndex + valuesCount); - } - int start = block.getFirstValueIndex(rowNum); - int end = start + valuesCount; - for (int j = start, offset = 0; j < end; j++, offset++) { - result.ints[firstValueIndex + offset] = ((IntBlock) block).getInt(j); - } - result.idToFirstValueIndex[i] = firstValueIndex; - lastIntFirstValueIndex = firstValueIndex + valuesCount; - } - case DOUBLE -> { - int firstValueIndex = lastDoubleFirstValueIndex; - if (firstValueIndex + valuesCount > result.doubles.length) { - result.doubles = Arrays.copyOf(result.doubles, firstValueIndex + valuesCount); - } - int start = block.getFirstValueIndex(rowNum); - int end = start + valuesCount; - for (int j = start, offset = 0; j < end; j++, offset++) { - result.doubles[firstValueIndex + offset] = ((DoubleBlock) block).getDouble(j); - } - result.idToFirstValueIndex[i] = firstValueIndex; - lastDoubleFirstValueIndex = firstValueIndex + valuesCount; - } - case BYTES_REF -> { - int firstValueIndex = lastBytesRefFirstValueIndex; - if (firstValueIndex + valuesCount > result.byteRefs.length) { - int additionalSize = firstValueIndex + valuesCount - result.byteRefs.length; - result.byteRefs = Arrays.copyOf(result.byteRefs, firstValueIndex + valuesCount); - for (int j = 1; j <= additionalSize; j++) { - result.byteRefs[result.byteRefs.length - j] = new BytesRef(); - } - } - int start = block.getFirstValueIndex(rowNum); - int end = start + valuesCount; - for (int j = start, offset = 0; j < end; j++, offset++) { - BytesRef b = result.byteRefs[firstValueIndex + offset]; - b = ((BytesRefBlock) block).getBytesRef(j, b); - result.byteRefs[firstValueIndex + offset] = b; - } - result.idToFirstValueIndex[i] = firstValueIndex; - lastBytesRefFirstValueIndex = firstValueIndex + valuesCount; - } - case BOOLEAN -> { - int firstValueIndex = lastBooleanFirstValueIndex; - if (firstValueIndex + valuesCount > result.booleans.length) { - result.booleans = Arrays.copyOf(result.booleans, firstValueIndex + valuesCount); - } - int start = block.getFirstValueIndex(rowNum); - int end = start + valuesCount; - for (int j = start, offset = 0; j < end; j++, offset++) { - result.booleans[firstValueIndex + offset] = ((BooleanBlock) block).getBoolean(j); - } - result.idToFirstValueIndex[i] = firstValueIndex; - lastBooleanFirstValueIndex = firstValueIndex + valuesCount; - } - case DOC -> { - int firstValueIndex = lastDocFirstValueIndex; - if (firstValueIndex + 3 > result.docs.length) { - result.docs = Arrays.copyOf(result.docs, firstValueIndex + 3); - } - DocVector doc = ((DocBlock) block).asVector(); - result.docs[firstValueIndex] = doc.shards().getInt(rowNum); - result.docs[firstValueIndex + 1] = doc.segments().getInt(rowNum); - result.docs[firstValueIndex + 2] = doc.docs().getInt(rowNum); - - result.idToFirstValueIndex[i] = firstValueIndex; - lastDocFirstValueIndex = firstValueIndex + 3; - } - case NULL -> { - assert false : "Must not occur here as we check nulls above already"; - throw new UnsupportedOperationException("Block of nulls doesn't support comparison"); - } - default -> { - assert false : "Must not occur here as TopN should never receive intermediate blocks"; - throw new UnsupportedOperationException("Block doesn't support retrieving elements"); - } - - } - } - } - - int orderByCompositeKeyCurrentPosition = 0; - for (SortOrder so : sortOrders) { - byte nul, nonNul; - if (so.nullsFirst) { - nul = so.asc ? SMALL_NULL : BIG_NULL; - nonNul = so.asc ? BIG_NULL : SMALL_NULL; - } else { - nul = so.asc ? BIG_NULL : SMALL_NULL; - nonNul = so.asc ? SMALL_NULL : BIG_NULL; - } - - MvSortMode sortMode = so.asc ? MvSortMode.MIN : MvSortMode.MAX; - int mvOffset = result.blockIsUnordered(so.channel) - ? -1 - : (sortMode == MvSortMode.MIN ? 0 : result.numberOfValues[so.channel] - 1); - int valueAsBytesSize; - - if (result.isNull(so.channel)) { - result.orderByCompositeKey.append(nul); - valueAsBytesSize = 0; - } else { - result.orderByCompositeKey.append(nonNul); - switch (result.idToType[so.channel]) { - case LONG -> { - long rowValue; - if (mvOffset >= 0) { - rowValue = result.getLong(so.channel, mvOffset); - } else { - rowValue = result.getLong(so.channel, 0); - for (int j = 1; j < result.numberOfValues[so.channel]; j++) { - long value = result.getLong(so.channel, j); - if (sortMode == MvSortMode.MIN) { - rowValue = Math.min(value, rowValue); - } else if (sortMode == MvSortMode.MAX) { - rowValue = Math.max(value, rowValue); - } - } - } - so.encoder.encodeLong(rowValue, result.orderByCompositeKey); - valueAsBytesSize = Long.BYTES; - } - case INT -> { - int rowValue; - if (mvOffset >= 0) { - rowValue = result.getInt(so.channel, mvOffset); - } else { - rowValue = result.getInt(so.channel, 0); - for (int j = 1; j < result.numberOfValues[so.channel]; j++) { - int value = result.getInt(so.channel, j); - if (sortMode == MvSortMode.MIN) { - rowValue = Math.min(value, rowValue); - } else if (sortMode == MvSortMode.MAX) { - rowValue = Math.max(value, rowValue); - } - } - } - so.encoder.encodeInteger(rowValue, result.orderByCompositeKey); - valueAsBytesSize = Integer.BYTES; - } - case DOUBLE -> { - double rowValue; - if (mvOffset >= 0) { - rowValue = result.getDouble(so.channel, mvOffset); - } else { - rowValue = result.getDouble(so.channel, 0); - for (int j = 1; j < result.numberOfValues[so.channel]; j++) { - double value = result.getDouble(so.channel, j); - if (sortMode == MvSortMode.MIN) { - rowValue = Math.min(value, rowValue); - } else if (sortMode == MvSortMode.MAX) { - rowValue = Math.max(value, rowValue); - } - } - } - so.encoder.encodeDouble(rowValue, result.orderByCompositeKey); - valueAsBytesSize = Long.BYTES; - } - case BYTES_REF -> { - BytesRef rowValue; - if (mvOffset >= 0) { - rowValue = result.getBytesRef(so.channel, mvOffset); - } else { - rowValue = result.getBytesRef(so.channel, 0); - for (int j = 1; j < result.numberOfValues[so.channel]; j++) { - BytesRef value = result.getBytesRef(so.channel, j); - int compare = value.compareTo(rowValue); - if (sortMode == MvSortMode.MIN && compare < 0 || sortMode == MvSortMode.MAX && compare > 0) { - rowValue = value; - } - } - } - so.encoder.encodeBytesRef(rowValue, result.orderByCompositeKey); - valueAsBytesSize = rowValue.length; - } - case BOOLEAN -> { - boolean rowValue; - if (mvOffset >= 0) { - rowValue = result.getBoolean(so.channel, mvOffset); - } else { - rowValue = result.getBoolean(so.channel, 0); - for (int j = 1; j < result.numberOfValues[so.channel] - && (sortMode == MvSortMode.MIN && rowValue || sortMode == MvSortMode.MAX && rowValue == false); j++) { - boolean value = result.getBoolean(so.channel, j); - if (sortMode == MvSortMode.MIN && value == false) { - rowValue = false; - } else if (sortMode == MvSortMode.MAX && value) { - rowValue = true; - } - } - } - so.encoder.encodeBoolean(rowValue, result.orderByCompositeKey); - valueAsBytesSize = 1; - } - default -> { - assert false : "Must not occur here as TopN should never receive intermediate blocks"; - throw new UnsupportedOperationException("Block doesn't support retrieving elements"); - } - } - } - result.orderByCompositeKeyAscending.set( - orderByCompositeKeyCurrentPosition, - valueAsBytesSize + orderByCompositeKeyCurrentPosition + 2, - so.asc - ); - orderByCompositeKeyCurrentPosition += valueAsBytesSize + 2; - result.orderByCompositeKey.append(SEPARATOR); - } - - return result; - } - } - - public record SortOrder(int channel, boolean asc, boolean nullsFirst, TopNEncoder encoder) { - - public SortOrder(int channel, boolean asc, boolean nullsFirst) { - this(channel, asc, nullsFirst, DEFAULT_ENCODER); - } - - @Override - public String toString() { - return "SortOrder[channel=" - + this.channel - + ", asc=" - + this.asc - + ", nullsFirst=" - + this.nullsFirst - + ", encoder=" - + this.encoder - + "]"; - } - } - - public record TopNOperatorFactory(int topCount, List sortOrders, int maxPageSize) implements OperatorFactory { - - @Override - public Operator get(DriverContext driverContext) { - return new TopNOperator(topCount, sortOrders, maxPageSize); - } - - @Override - public String describe() { - return "TopNOperator[count = " + topCount + ", sortOrders = " + sortOrders + "]"; - } - } - - private final PriorityQueue inputQueue; - - private final int maxPageSize; - private RowFactory rowFactory; - - private final List sortOrders; - - // these will be inferred at runtime: one input page might not contain all the information needed - // eg. it could be missing some fields in the mapping, so it could have NULL blocks as placeholders - private ElementType[] outputTypes; - - private Iterator output; - - public TopNOperator(int topCount, List sortOrders, int maxPageSize) { - this.maxPageSize = maxPageSize; - this.sortOrders = sortOrders; - this.inputQueue = new PriorityQueue<>(topCount) { - @Override - protected boolean lessThan(Row r1, Row r2) { - return compareRows(r1, r2) < 0; - } - - @Override - public String toString() { - if (sortOrders.size() == 1) { - SortOrder order = sortOrders.get(0); - return "count = " + size() + "/" + topCount + ", sortOrder = " + order; - } else { - return "count = " + size() + "/" + topCount + ", sortOrders = " + sortOrders; - } - } - }; - } - - static int compareRows(Row r1, Row r2) { - // This is simliar to r1.orderByCompositeKey.compareTo(r2.orderByCompositeKey) but stopping somewhere in the middle so that - // we check the byte that mismatched - BytesRef br1 = r1.orderByCompositeKey.get(); - BytesRef br2 = r2.orderByCompositeKey.get(); - int mismatchedByteIndex = Arrays.mismatch( - br1.bytes, - br1.offset, - br1.offset + br1.length, - br2.bytes, - br2.offset, - br2.offset + br2.length - ); - if (mismatchedByteIndex < 0) { - // the two rows are equal - return 0; - } - int length = Math.min(br1.length, br2.length); - // one value is the prefix of the other - if (mismatchedByteIndex == length) { - // the value with the greater length is considered greater than the other - if (length == br1.length) {// first row is less than the second row - return r2.orderByCompositeKeyAscending.get(length) ? 1 : -1; - } else {// second row is less than the first row - return r1.orderByCompositeKeyAscending.get(length) ? -1 : 1; - } - } else { - // compare the byte that mismatched accounting for that respective byte asc/desc ordering - int c = Byte.compareUnsigned( - r1.orderByCompositeKey.bytes()[br1.offset + mismatchedByteIndex], - r2.orderByCompositeKey.bytes()[br2.offset + mismatchedByteIndex] - ); - return r1.orderByCompositeKeyAscending.get(mismatchedByteIndex) ? -c : c; - } - } - - @Override - public boolean needsInput() { - return output == null; - } - - @Override - public void addInput(Page page) { - // rebuild for every page, since blocks can originate from different indices, with different mapping - rowFactory = new RowFactory(page); - if (outputTypes == null) { - outputTypes = Arrays.copyOf(rowFactory.idToType, rowFactory.idToType.length); - } else { - for (int i = 0; i < rowFactory.idToType.length; i++) { - if (outputTypes[i] == ElementType.NULL) { // the type could just be missing in the previous mappings - outputTypes[i] = rowFactory.idToType[i]; - } - } - } - - Row removed = null; - for (int i = 0; i < page.getPositionCount(); i++) { - Row x = rowFactory.row(page, i, removed, sortOrders); - removed = inputQueue.insertWithOverflow(x); - } - } - - @Override - public void finish() { - if (output == null) { - output = toPages(); - } - } - - private Iterator toPages() { - if (inputQueue.size() == 0) { - return Collections.emptyIterator(); - } - List list = new ArrayList<>(inputQueue.size()); - while (inputQueue.size() > 0) { - list.add(inputQueue.pop()); - } - Collections.reverse(list); - - List result = new ArrayList<>(); - Block.Builder[] builders = null; - int p = 0; - int size = 0; - for (int i = 0; i < list.size(); i++) { - if (builders == null) { - size = Math.min(maxPageSize, list.size() - i); - builders = new Block.Builder[rowFactory.size]; - for (int b = 0; b < builders.length; b++) { - builders[b] = outputTypes[b].newBlockBuilder(size); - } - p = 0; - } - - Row row = list.get(i); - for (int b = 0; b < builders.length; b++) { - if (row.isNull(b)) { - builders[b].appendNull(); - continue; - } - switch (outputTypes[b]) { - case BOOLEAN -> { - if (row.numberOfValues[b] > 1) { - ((BooleanBlock.Builder) builders[b]).beginPositionEntry(); - for (int j = 0; j < row.numberOfValues[b]; j++) { - ((BooleanBlock.Builder) builders[b]).appendBoolean(row.getBoolean(b, j)); - } - ((BooleanBlock.Builder) builders[b]).endPositionEntry(); - } else { - ((BooleanBlock.Builder) builders[b]).appendBoolean(row.getBoolean(b, 0)); - } - } - case INT -> { - if (row.numberOfValues[b] > 1) { - ((IntBlock.Builder) builders[b]).beginPositionEntry(); - for (int j = 0; j < row.numberOfValues[b]; j++) { - ((IntBlock.Builder) builders[b]).appendInt(row.getInt(b, j)); - } - ((IntBlock.Builder) builders[b]).endPositionEntry(); - } else { - ((IntBlock.Builder) builders[b]).appendInt(row.getInt(b, 0)); - } - } - case LONG -> { - if (row.numberOfValues[b] > 1) { - ((LongBlock.Builder) builders[b]).beginPositionEntry(); - for (int j = 0; j < row.numberOfValues[b]; j++) { - ((LongBlock.Builder) builders[b]).appendLong(row.getLong(b, j)); - } - ((LongBlock.Builder) builders[b]).endPositionEntry(); - } else { - ((LongBlock.Builder) builders[b]).appendLong(row.getLong(b, 0)); - } - } - case DOUBLE -> { - if (row.numberOfValues[b] > 1) { - ((DoubleBlock.Builder) builders[b]).beginPositionEntry(); - for (int j = 0; j < row.numberOfValues[b]; j++) { - ((DoubleBlock.Builder) builders[b]).appendDouble(row.getDouble(b, j)); - } - ((DoubleBlock.Builder) builders[b]).endPositionEntry(); - } else { - ((DoubleBlock.Builder) builders[b]).appendDouble(row.getDouble(b, 0)); - } - } - case BYTES_REF -> { - if (row.numberOfValues[b] > 1) { - ((BytesRefBlock.Builder) builders[b]).beginPositionEntry(); - for (int j = 0; j < row.numberOfValues[b]; j++) { - ((BytesRefBlock.Builder) builders[b]).appendBytesRef(row.getBytesRef(b, j)); - } - ((BytesRefBlock.Builder) builders[b]).endPositionEntry(); - } else { - ((BytesRefBlock.Builder) builders[b]).appendBytesRef(row.getBytesRef(b, 0)); - } - } - case DOC -> { - int dp = row.idToFirstValueIndex[b]; - int shard = row.docs[dp++]; - int segment = row.docs[dp++]; - int doc = row.docs[dp]; - ((DocBlock.Builder) builders[b]).appendShard(shard).appendSegment(segment).appendDoc(doc); - } - case NULL -> builders[b].appendNull(); - default -> throw new IllegalStateException("unsupported type [" + rowFactory.idToType[b] + "]"); - } - } - - p++; - if (p == size) { - result.add(new Page(Arrays.stream(builders).map(Block.Builder::build).toArray(Block[]::new))); - builders = null; - } - } - assert builders == null; - return result.iterator(); - } - - @Override - public boolean isFinished() { - return output != null && output.hasNext() == false; - } - - @Override - public Page getOutput() { - if (output != null && output.hasNext()) { - return output.next(); - } - return null; - } - - @Override - public void close() { - - } - - @Override - public String toString() { - return "TopNOperator[" + inputQueue + "]"; - } -} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/UTF8TopNEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/UTF8TopNEncoder.java deleted file mode 100644 index 9cab7c9e000aa..0000000000000 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/UTF8TopNEncoder.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.compute.operator; - -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; - -public class UTF8TopNEncoder implements TopNEncoder { - - private static final int CONTINUATION_BYTE = 0b1000_0000; - - @Override - public void encodeBytesRef(BytesRef value, BytesRefBuilder bytesRefBuilder) { - // add one bit to every byte so that there are no "0" bytes in the provided bytes. The only "0" bytes are - // those defined as separators - int end = value.offset + value.length; - for (int i = value.offset; i < end; i++) { - byte b = value.bytes[i]; - if ((b & CONTINUATION_BYTE) == 0) { - b++; - } - bytesRefBuilder.append(b); - } - } - - @Override - public String toString() { - return "UTF8TopNEncoder"; - } -} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/DefaultSortableTopNEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/DefaultSortableTopNEncoder.java new file mode 100644 index 0000000000000..3b3ba69407065 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/DefaultSortableTopNEncoder.java @@ -0,0 +1,38 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; + +class DefaultSortableTopNEncoder extends SortableTopNEncoder { + @Override + public int encodeBytesRef(BytesRef value, BytesRefBuilder bytesRefBuilder) { + throw new IllegalStateException("Cannot find encoder for BytesRef value"); + } + + @Override + public BytesRef decodeBytesRef(BytesRef bytes, BytesRef scratch) { + throw new IllegalStateException("Cannot find encoder for BytesRef value"); + } + + @Override + public String toString() { + return "DefaultUnsortable"; + } + + @Override + public TopNEncoder toSortable() { + return this; + } + + @Override + public TopNEncoder toUnsortable() { + return TopNEncoder.DEFAULT_UNSORTABLE; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/DefaultUnsortableTopNEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/DefaultUnsortableTopNEncoder.java new file mode 100644 index 0000000000000..668353b86519a --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/DefaultUnsortableTopNEncoder.java @@ -0,0 +1,181 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; + +/** + * A {@link TopNEncoder} that doesn't encode values so they are sortable but is + * capable of encoding any values. + */ +final class DefaultUnsortableTopNEncoder implements TopNEncoder { + public static final VarHandle LONG = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.nativeOrder()); + public static final VarHandle INT = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.nativeOrder()); + public static final VarHandle DOUBLE = MethodHandles.byteArrayViewVarHandle(double[].class, ByteOrder.nativeOrder()); + + @Override + public void encodeLong(long value, BytesRefBuilder bytesRefBuilder) { + bytesRefBuilder.grow(bytesRefBuilder.length() + Long.BYTES); + LONG.set(bytesRefBuilder.bytes(), bytesRefBuilder.length(), value); + bytesRefBuilder.setLength(bytesRefBuilder.length() + Long.BYTES); + } + + @Override + public long decodeLong(BytesRef bytes) { + if (bytes.length < Long.BYTES) { + throw new IllegalArgumentException("not enough bytes"); + } + long v = (long) LONG.get(bytes.bytes, bytes.offset); + bytes.offset += Long.BYTES; + bytes.length -= Long.BYTES; + return v; + } + + /** + * Writes an int in a variable-length format. Writes between one and + * five bytes. Smaller values take fewer bytes. Negative numbers + * will always use all 5 bytes. + */ + public void encodeVInt(int value, BytesRefBuilder bytesRefBuilder) { + while ((value & ~0x7F) != 0) { + bytesRefBuilder.append(((byte) ((value & 0x7f) | 0x80))); + value >>>= 7; + } + bytesRefBuilder.append((byte) value); + } + + /** + * Reads an int stored in variable-length format. Reads between one and + * five bytes. Smaller values take fewer bytes. Negative numbers + * will always use all 5 bytes. + */ + public int decodeVInt(BytesRef bytes) { + /* + * The loop for this is unrolled because we unrolled the loop in StreamInput. + * I presume it's a decent choice here because it was a good choice there. + */ + byte b = bytes.bytes[bytes.offset]; + if (b >= 0) { + bytes.offset += 1; + bytes.length -= 1; + return b; + } + int i = b & 0x7F; + b = bytes.bytes[bytes.offset + 1]; + i |= (b & 0x7F) << 7; + if (b >= 0) { + bytes.offset += 2; + bytes.length -= 2; + return i; + } + b = bytes.bytes[bytes.offset + 2]; + i |= (b & 0x7F) << 14; + if (b >= 0) { + bytes.offset += 3; + bytes.length -= 3; + return i; + } + b = bytes.bytes[bytes.offset + 3]; + i |= (b & 0x7F) << 21; + if (b >= 0) { + bytes.offset += 4; + bytes.length -= 4; + return i; + } + b = bytes.bytes[bytes.offset + 4]; + i |= (b & 0x0F) << 28; + if ((b & 0xF0) != 0) { + throw new IllegalStateException("Invalid last byte for a vint [" + Integer.toHexString(b) + "]"); + } + bytes.offset += 5; + bytes.length -= 5; + return i; + } + + @Override + public void encodeInt(int value, BytesRefBuilder bytesRefBuilder) { + bytesRefBuilder.grow(bytesRefBuilder.length() + Integer.BYTES); + INT.set(bytesRefBuilder.bytes(), bytesRefBuilder.length(), value); + bytesRefBuilder.setLength(bytesRefBuilder.length() + Integer.BYTES); + } + + @Override + public int decodeInt(BytesRef bytes) { + if (bytes.length < Integer.BYTES) { + throw new IllegalArgumentException("not enough bytes"); + } + int v = (int) INT.get(bytes.bytes, bytes.offset); + bytes.offset += Integer.BYTES; + bytes.length -= Integer.BYTES; + return v; + } + + @Override + public void encodeDouble(double value, BytesRefBuilder bytesRefBuilder) { + bytesRefBuilder.grow(bytesRefBuilder.length() + Double.BYTES); + DOUBLE.set(bytesRefBuilder.bytes(), bytesRefBuilder.length(), value); + bytesRefBuilder.setLength(bytesRefBuilder.length() + Long.BYTES); + } + + @Override + public double decodeDouble(BytesRef bytes) { + if (bytes.length < Double.BYTES) { + throw new IllegalArgumentException("not enough bytes"); + } + double v = (double) DOUBLE.get(bytes.bytes, bytes.offset); + bytes.offset += Double.BYTES; + bytes.length -= Double.BYTES; + return v; + } + + @Override + public void encodeBoolean(boolean value, BytesRefBuilder bytesRefBuilder) { + bytesRefBuilder.append(value ? (byte) 1 : (byte) 0); + } + + @Override + public boolean decodeBoolean(BytesRef bytes) { + if (bytes.length < Byte.BYTES) { + throw new IllegalArgumentException("not enough bytes"); + } + boolean v = bytes.bytes[bytes.offset] == 1; + bytes.offset += Byte.BYTES; + bytes.length -= Byte.BYTES; + return v; + } + + @Override + public int encodeBytesRef(BytesRef value, BytesRefBuilder bytesRefBuilder) { + throw new UnsupportedOperationException(); + } + + @Override + public BytesRef decodeBytesRef(BytesRef bytes, BytesRef scratch) { + throw new UnsupportedOperationException(); + } + + @Override + public TopNEncoder toSortable() { + return TopNEncoder.DEFAULT_SORTABLE; + } + + @Override + public TopNEncoder toUnsortable() { + return this; + } + + @Override + public String toString() { + return "DefaultUnsortable"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/FixedLengthTopNEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/FixedLengthTopNEncoder.java new file mode 100644 index 0000000000000..fbca1080c871c --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/FixedLengthTopNEncoder.java @@ -0,0 +1,56 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; + +class FixedLengthTopNEncoder extends SortableTopNEncoder { + private final int length; + + FixedLengthTopNEncoder(int length) { + this.length = length; + } + + @Override + public int encodeBytesRef(BytesRef value, BytesRefBuilder bytesRefBuilder) { + if (value.length != length) { + throw new IllegalArgumentException("expected exactly [" + length + "] bytes but got [" + value.length + "]"); + } + bytesRefBuilder.append(value); + return length; + } + + @Override + public BytesRef decodeBytesRef(BytesRef bytes, BytesRef scratch) { + if (bytes.length < length) { + throw new IllegalArgumentException("expected [" + length + "] bytes but only [" + bytes.length + "] remain"); + } + scratch.bytes = bytes.bytes; + scratch.offset = bytes.offset; + scratch.length = length; + bytes.offset += length; + bytes.length -= length; + return scratch; + } + + @Override + public String toString() { + return "FixedLengthTopNEncoder[" + length + "]"; + } + + @Override + public TopNEncoder toSortable() { + return this; + } + + @Override + public TopNEncoder toUnsortable() { + return this; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/KeyExtractor.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/KeyExtractor.java new file mode 100644 index 0000000000000..19daa1aba8d03 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/KeyExtractor.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.LongBlock; + +/** + * Extracts keys into a {@link BytesRefBuilder}. + */ +interface KeyExtractor { + int writeKey(BytesRefBuilder key, int position); + + static KeyExtractor extractorFor(ElementType elementType, TopNEncoder encoder, boolean ascending, byte nul, byte nonNul, Block block) { + if (false == (elementType == block.elementType() || ElementType.NULL == block.elementType())) { + throw new IllegalArgumentException("Expected [" + elementType + "] but was [" + block.elementType() + "]"); + } + return switch (block.elementType()) { + case BOOLEAN -> KeyExtractorForBoolean.extractorFor(encoder, ascending, nul, nonNul, (BooleanBlock) block); + case BYTES_REF -> KeyExtractorForBytesRef.extractorFor(encoder, ascending, nul, nonNul, (BytesRefBlock) block); + case INT -> KeyExtractorForInt.extractorFor(encoder, ascending, nul, nonNul, (IntBlock) block); + case LONG -> KeyExtractorForLong.extractorFor(encoder, ascending, nul, nonNul, (LongBlock) block); + case DOUBLE -> KeyExtractorForDouble.extractorFor(encoder, ascending, nul, nonNul, (DoubleBlock) block); + case NULL -> new KeyExtractorForNull(nul); + default -> { + assert false : "No key extractor for [" + block.elementType() + "]"; + throw new UnsupportedOperationException("No key extractor for [" + block.elementType() + "]"); + } + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/KeyExtractorForNull.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/KeyExtractorForNull.java new file mode 100644 index 0000000000000..cea218f6036de --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/KeyExtractorForNull.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; + +class KeyExtractorForNull implements KeyExtractor { + private final byte nul; + + KeyExtractorForNull(byte nul) { + this.nul = nul; + } + + @Override + public int writeKey(BytesRefBuilder values, int position) { + values.append(nul); + return 1; + } + + @Override + public String toString() { + return "KeyExtractorForNull(" + Integer.toHexString(nul & 0xff) + ")"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java new file mode 100644 index 0000000000000..b8a41a3ee343d --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.ElementType; + +/** + * Builds {@link Block}s from keys and values encoded into {@link BytesRef}s. + */ +interface ResultBuilder { + /** + * Called for each sort key before {@link #decodeValue} to consume the sort key and + * store the value of the key for {@link #decodeValue} can use it to reconstruct + * the value. This will only be called if the value is part of the key. + */ + void decodeKey(BytesRef keys); + + /** + * Called once per row to decode the value and write to the internal {@link Block.Builder}. + * If the value is part of the key then {@link #decodeKey} will be called first and + * implementations can store keys in that method and reuse them in this method. Most + * implementations don't write single valued fields that appear in the key and instead + * use the value form {@link #decodeKey}. + */ + void decodeValue(BytesRef values); + + /** + * Build the result block. + */ + Block build(); + + static ResultBuilder resultBuilderFor(ElementType elementType, TopNEncoder encoder, boolean inKey, int positions) { + return switch (elementType) { + case BOOLEAN -> new ResultBuilderForBoolean(encoder, inKey, positions); + case BYTES_REF -> new ResultBuilderForBytesRef(encoder, inKey, positions); + case INT -> new ResultBuilderForInt(encoder, inKey, positions); + case LONG -> new ResultBuilderForLong(encoder, inKey, positions); + case DOUBLE -> new ResultBuilderForDouble(encoder, inKey, positions); + case NULL -> new ResultBuilderForNull(); + case DOC -> new ResultBuilderForDoc(positions); + default -> { + assert false : "Result builder for [" + elementType + "]"; + throw new UnsupportedOperationException("Result builder for [" + elementType + "]"); + } + }; + } + +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilderForDoc.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilderForDoc.java new file mode 100644 index 0000000000000..a825b7d160551 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilderForDoc.java @@ -0,0 +1,54 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.DocVector; +import org.elasticsearch.compute.data.IntArrayVector; + +class ResultBuilderForDoc implements ResultBuilder { + private final int[] shards; + private final int[] segments; + private final int[] docs; + private int position; + + ResultBuilderForDoc(int positions) { + this.shards = new int[positions]; + this.segments = new int[positions]; + this.docs = new int[positions]; + } + + @Override + public void decodeKey(BytesRef keys) { + throw new AssertionError("_doc can't be a key"); + } + + @Override + public void decodeValue(BytesRef values) { + shards[position] = TopNEncoder.DEFAULT_UNSORTABLE.decodeInt(values); + segments[position] = TopNEncoder.DEFAULT_UNSORTABLE.decodeInt(values); + docs[position] = TopNEncoder.DEFAULT_UNSORTABLE.decodeInt(values); + position++; + } + + @Override + public Block build() { + return new DocVector( + new IntArrayVector(shards, position), + new IntArrayVector(segments, position), + new IntArrayVector(docs, position), + null + ).asBlock(); + } + + @Override + public String toString() { + return "ValueExtractorForDoc"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilderForNull.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilderForNull.java new file mode 100644 index 0000000000000..05b9ba2a07658 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilderForNull.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; + +public class ResultBuilderForNull implements ResultBuilder { + private int positions; + + @Override + public void decodeKey(BytesRef keys) { + throw new AssertionError("somehow got a value for a null key"); + } + + @Override + public void decodeValue(BytesRef values) { + int size = TopNEncoder.DEFAULT_UNSORTABLE.decodeVInt(values); + if (size != 0) { + throw new IllegalArgumentException("null columns should always have 0 entries"); + } + positions++; + } + + @Override + public Block build() { + return Block.constantNullBlock(positions); + } + + @Override + public String toString() { + return "ValueExtractorForNull"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/SortableTopNEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/SortableTopNEncoder.java new file mode 100644 index 0000000000000..21b8b9d3d2e36 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/SortableTopNEncoder.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.NumericUtils; + +/** + * A {@link TopNEncoder} that encodes values to byte arrays that may be sorted directly. + */ +public abstract class SortableTopNEncoder implements TopNEncoder { + @Override + public final void encodeLong(long value, BytesRefBuilder bytesRefBuilder) { + bytesRefBuilder.grow(bytesRefBuilder.length() + Long.BYTES); + NumericUtils.longToSortableBytes(value, bytesRefBuilder.bytes(), bytesRefBuilder.length()); + bytesRefBuilder.setLength(bytesRefBuilder.length() + Long.BYTES); + } + + @Override + public final long decodeLong(BytesRef bytes) { + if (bytes.length < Long.BYTES) { + throw new IllegalArgumentException("not enough bytes"); + } + long v = NumericUtils.sortableBytesToLong(bytes.bytes, bytes.offset); + bytes.offset += Long.BYTES; + bytes.length -= Long.BYTES; + return v; + } + + @Override + public final void encodeInt(int value, BytesRefBuilder bytesRefBuilder) { + bytesRefBuilder.grow(bytesRefBuilder.length() + Integer.BYTES); + NumericUtils.intToSortableBytes(value, bytesRefBuilder.bytes(), bytesRefBuilder.length()); + bytesRefBuilder.setLength(bytesRefBuilder.length() + Integer.BYTES); + } + + @Override + public final int decodeInt(BytesRef bytes) { + if (bytes.length < Integer.BYTES) { + throw new IllegalArgumentException("not enough bytes"); + } + int v = NumericUtils.sortableBytesToInt(bytes.bytes, bytes.offset); + bytes.offset += Integer.BYTES; + bytes.length -= Integer.BYTES; + return v; + } + + @Override + public final void encodeDouble(double value, BytesRefBuilder bytesRefBuilder) { + bytesRefBuilder.grow(bytesRefBuilder.length() + Long.BYTES); + NumericUtils.longToSortableBytes(NumericUtils.doubleToSortableLong(value), bytesRefBuilder.bytes(), bytesRefBuilder.length()); + bytesRefBuilder.setLength(bytesRefBuilder.length() + Long.BYTES); + } + + @Override + public final double decodeDouble(BytesRef bytes) { + if (bytes.length < Double.BYTES) { + throw new IllegalArgumentException("not enough bytes"); + } + double v = NumericUtils.sortableLongToDouble(NumericUtils.sortableBytesToLong(bytes.bytes, bytes.offset)); + bytes.offset += Double.BYTES; + bytes.length -= Double.BYTES; + return v; + } + + @Override + public final void encodeBoolean(boolean value, BytesRefBuilder bytesRefBuilder) { + bytesRefBuilder.append(value ? (byte) 1 : (byte) 0); + } + + @Override + public final boolean decodeBoolean(BytesRef bytes) { + if (bytes.length < Byte.BYTES) { + throw new IllegalArgumentException("not enough bytes"); + } + boolean v = bytes.bytes[bytes.offset] == 1; + bytes.offset += Byte.BYTES; + bytes.length -= Byte.BYTES; + return v; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNEncoder.java new file mode 100644 index 0000000000000..220a31a8fdac2 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNEncoder.java @@ -0,0 +1,75 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; + +/** + * Encodes values for {@link TopNOperator}. Some encoders encode values so sorting + * the bytes will sort the values. This is called "sortable" and you can always + * go from any {@link TopNEncoder} to a "sortable" version of it with {@link #toSortable()}. + * If you don't need the bytes to be sortable you can get an "unsortable" encoder + * with {@link #toUnsortable()}. + */ +public interface TopNEncoder { + /** + * An encoder that encodes values such that sorting the bytes sorts the values. + */ + DefaultSortableTopNEncoder DEFAULT_SORTABLE = new DefaultSortableTopNEncoder(); + /** + * An encoder that encodes values as compactly as possible without making the + * encoded bytes sortable. + */ + DefaultUnsortableTopNEncoder DEFAULT_UNSORTABLE = new DefaultUnsortableTopNEncoder(); + /** + * An encoder for IP addresses. + */ + FixedLengthTopNEncoder IP = new FixedLengthTopNEncoder(InetAddressPoint.BYTES); + /** + * An encoder for UTF-8 text. + */ + UTF8TopNEncoder UTF8 = new UTF8TopNEncoder(); + /** + * An encoder for semver versions. + */ + VersionTopNEncoder VERSION = new VersionTopNEncoder(); + + void encodeLong(long value, BytesRefBuilder bytesRefBuilder); + + long decodeLong(BytesRef bytes); + + void encodeInt(int value, BytesRefBuilder bytesRefBuilder); + + int decodeInt(BytesRef bytes); + + void encodeDouble(double value, BytesRefBuilder bytesRefBuilder); + + double decodeDouble(BytesRef bytes); + + void encodeBoolean(boolean value, BytesRefBuilder bytesRefBuilder); + + boolean decodeBoolean(BytesRef bytes); + + int encodeBytesRef(BytesRef value, BytesRefBuilder bytesRefBuilder); + + BytesRef decodeBytesRef(BytesRef bytes, BytesRef scratch); + + /** + * Get a version of this encoder that encodes values such that sorting + * the encoded bytes sorts by the values. + */ + TopNEncoder toSortable(); + + /** + * Get a version of this encoder that encodes values as fast as possible + * without making the encoded bytes sortable. + */ + TopNEncoder toUnsortable(); +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java new file mode 100644 index 0000000000000..acc4e90de6339 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java @@ -0,0 +1,440 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.Operator; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +/** + * An operator that sorts "rows" of values by encoding the values to sort on, as bytes (using BytesRef). Each data type is encoded + * in a specific way, defined by methods of a TopNEncoder. All the values used to sort a specific row (think of column/block 3 + * and column/block 6) are converted/encoded in a byte array and the concatenated bytes are all compared in bulk. + * For now, the only values that have a special "treatment" when it comes to encoding are the text-based ones (text, keyword, ip, version). + * For each "special" encoding there is should be new TopNEncoder implementation. See {@link TopNEncoder#UTF8} for + * encoding regular "text" and "keyword" data types. See LocalExecutionPlanner for which data type uses which encoder. + * + * This Operator will not be able to sort binary values (encoded as BytesRef) because the bytes used as separator and "null"s can appear + * as valid bytes inside a binary value. + */ +public class TopNOperator implements Operator, Accountable { + private static final byte SMALL_NULL = 0x01; // "null" representation for "nulls first" + private static final byte BIG_NULL = 0x02; // "null" representation for "nulls last" + + /** + * Internal row to be used in the PriorityQueue instead of the full blown Page. + * It mirrors somehow the Block build in the sense that it keeps around an array of offsets and a count of values (to account for + * multivalues) to reference each position in each block of the Page. + */ + static final class Row implements Accountable { + private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(Row.class) + 2 * (RamUsageEstimator + .shallowSizeOfInstance(BytesRefBuilder.class) + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)) + RamUsageEstimator + .shallowSizeOfInstance(BitSet.class); + + final BytesRefBuilder keys = new BytesRefBuilder(); // BytesRef used to sort rows between each other + /** + * A true/false value (bit set/unset) for each byte in the BytesRef above corresponding to an asc/desc ordering. + * For ex, if a Long is represented as 8 bytes, each of these bytes will have the same value (set/unset) if the respective Long + * value is used for sorting ascending/descending. + */ + final BitSet orderByCompositeKeyAscending = new BitSet(); + + final BytesRefBuilder values = new BytesRefBuilder(); + + @Override + public long ramBytesUsed() { + return SHALLOW_SIZE + RamUsageEstimator.sizeOf(keys.bytes()) + orderByCompositeKeyAscending.size() / Byte.SIZE + + RamUsageEstimator.sizeOf(values.bytes()); + } + } + + record KeyFactory(KeyExtractor extractor, boolean ascending) {} + + static final class RowFactory { + private final ValueExtractor[] valueExtractors; + private final KeyFactory[] keyFactories; + + RowFactory(List elementTypes, List encoders, List sortOrders, Page page) { + valueExtractors = new ValueExtractor[page.getBlockCount()]; + for (int b = 0; b < valueExtractors.length; b++) { + valueExtractors[b] = ValueExtractor.extractorFor( + elementTypes.get(b), + encoders.get(b).toUnsortable(), + channelInKey(sortOrders, b), + page.getBlock(b) + ); + } + keyFactories = new KeyFactory[sortOrders.size()]; + for (int k = 0; k < keyFactories.length; k++) { + SortOrder so = sortOrders.get(k); + KeyExtractor extractor = KeyExtractor.extractorFor( + elementTypes.get(so.channel), + encoders.get(so.channel).toSortable(), + so.asc, + so.nul(), + so.nonNul(), + page.getBlock(so.channel) + ); + keyFactories[k] = new KeyFactory(extractor, so.asc); + } + } + + Row row(int position, Row spare) { + Row result; + if (spare == null) { + result = new Row(); + } else { + result = spare; + result.keys.clear(); + result.orderByCompositeKeyAscending.clear(); + result.values.clear(); + } + + writeKey(position, result); + writeValues(position, result.values); + + return result; + } + + private void writeKey(int position, Row row) { + int orderByCompositeKeyCurrentPosition = 0; + for (KeyFactory factory : keyFactories) { + int valueAsBytesSize = factory.extractor.writeKey(row.keys, position); + row.orderByCompositeKeyAscending.set( + orderByCompositeKeyCurrentPosition, + valueAsBytesSize + orderByCompositeKeyCurrentPosition, + factory.ascending + ); + orderByCompositeKeyCurrentPosition += valueAsBytesSize; + } + } + + private void writeValues(int position, BytesRefBuilder values) { + for (ValueExtractor e : valueExtractors) { + e.writeValue(values, position); + } + } + } + + public record SortOrder(int channel, boolean asc, boolean nullsFirst) { + + private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(SortOrder.class); + + @Override + public String toString() { + return "SortOrder[channel=" + this.channel + ", asc=" + this.asc + ", nullsFirst=" + this.nullsFirst + "]"; + } + + byte nul() { + if (nullsFirst) { + return asc ? SMALL_NULL : BIG_NULL; + } else { + return asc ? BIG_NULL : SMALL_NULL; + } + } + + byte nonNul() { + if (nullsFirst) { + return asc ? BIG_NULL : SMALL_NULL; + } else { + return asc ? SMALL_NULL : BIG_NULL; + } + } + } + + public record TopNOperatorFactory( + int topCount, + List elementTypes, + List encoders, + List sortOrders, + int maxPageSize + ) implements OperatorFactory { + public TopNOperatorFactory + + { + for (ElementType e : elementTypes) { + if (e == null) { + throw new IllegalArgumentException("ElementType not known"); + } + } + } + + @Override + public TopNOperator get(DriverContext driverContext) { + return new TopNOperator(topCount, elementTypes, encoders, sortOrders, maxPageSize); + } + + @Override + public String describe() { + return "TopNOperator[count=" + + topCount + + ", elementTypes=" + + elementTypes + + ", encoders=" + + encoders + + ", sortOrders=" + + sortOrders + + "]"; + } + } + + private final Queue inputQueue; + + private final int maxPageSize; + + private final List elementTypes; + private final List encoders; + private final List sortOrders; + + private Iterator output; + + public TopNOperator( + int topCount, + List elementTypes, + List encoders, + List sortOrders, + int maxPageSize + ) { + this.maxPageSize = maxPageSize; + this.elementTypes = elementTypes; + this.encoders = encoders; + this.sortOrders = sortOrders; + this.inputQueue = new Queue(topCount); + } + + static int compareRows(Row r1, Row r2) { + // This is similar to r1.key.compareTo(r2.key) but stopping somewhere in the middle so that + // we check the byte that mismatched + BytesRef br1 = r1.keys.get(); + BytesRef br2 = r2.keys.get(); + int mismatchedByteIndex = Arrays.mismatch( + br1.bytes, + br1.offset, + br1.offset + br1.length, + br2.bytes, + br2.offset, + br2.offset + br2.length + ); + if (mismatchedByteIndex < 0) { + // the two rows are equal + return 0; + } + int length = Math.min(br1.length, br2.length); + // one value is the prefix of the other + if (mismatchedByteIndex == length) { + // the value with the greater length is considered greater than the other + if (length == br1.length) {// first row is less than the second row + return r2.orderByCompositeKeyAscending.get(length) ? 1 : -1; + } else {// second row is less than the first row + return r1.orderByCompositeKeyAscending.get(length) ? -1 : 1; + } + } else { + // compare the byte that mismatched accounting for that respective byte asc/desc ordering + int c = Byte.compareUnsigned( + r1.keys.bytes()[br1.offset + mismatchedByteIndex], + r2.keys.bytes()[br2.offset + mismatchedByteIndex] + ); + return r1.orderByCompositeKeyAscending.get(mismatchedByteIndex) ? -c : c; + } + } + + @Override + public boolean needsInput() { + return output == null; + } + + @Override + public void addInput(Page page) { + RowFactory rowFactory = new RowFactory(elementTypes, encoders, sortOrders, page); + + Row removed = null; + for (int i = 0; i < page.getPositionCount(); i++) { + Row x = rowFactory.row(i, removed); + removed = inputQueue.insertWithOverflow(x); + } + } + + @Override + public void finish() { + if (output == null) { + output = toPages(); + } + } + + private Iterator toPages() { + if (inputQueue.size() == 0) { + return Collections.emptyIterator(); + } + List list = new ArrayList<>(inputQueue.size()); + while (inputQueue.size() > 0) { + list.add(inputQueue.pop()); + } + Collections.reverse(list); + + List result = new ArrayList<>(); + ResultBuilder[] builders = null; + int p = 0; + int size = 0; + for (int i = 0; i < list.size(); i++) { + if (builders == null) { + size = Math.min(maxPageSize, list.size() - i); + builders = new ResultBuilder[elementTypes.size()]; + for (int b = 0; b < builders.length; b++) { + builders[b] = ResultBuilder.resultBuilderFor( + elementTypes.get(b), + encoders.get(b).toUnsortable(), + channelInKey(sortOrders, b), + size + ); + } + p = 0; + } + + Row row = list.get(i); + BytesRef keys = row.keys.get(); + for (SortOrder so : sortOrders) { + if (keys.bytes[keys.offset] == so.nul()) { + keys.offset++; + keys.length--; + continue; + } + keys.offset++; + keys.length--; + builders[so.channel].decodeKey(keys); + } + if (keys.length != 0) { + throw new IllegalArgumentException("didn't read all keys"); + } + + BytesRef values = row.values.get(); + for (ResultBuilder builder : builders) { + builder.decodeValue(values); + } + if (values.length != 0) { + throw new IllegalArgumentException("didn't read all values"); + } + + p++; + if (p == size) { + result.add(new Page(Arrays.stream(builders).map(ResultBuilder::build).toArray(Block[]::new))); + builders = null; + } + } + assert builders == null; + return result.iterator(); + } + + private static boolean channelInKey(List sortOrders, int channel) { + for (SortOrder so : sortOrders) { + if (so.channel == channel) { + return true; + } + } + return false; + } + + @Override + public boolean isFinished() { + return output != null && output.hasNext() == false; + } + + @Override + public Page getOutput() { + if (output != null && output.hasNext()) { + return output.next(); + } + return null; + } + + @Override + public void close() { + + } + + private static long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(TopNOperator.class) + RamUsageEstimator + .shallowSizeOfInstance(List.class) * 3; + + @Override + public long ramBytesUsed() { + // NOTE: this is ignoring the output iterator for now. Pages are not Accountable. Yet. + long arrHeader = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; + long ref = RamUsageEstimator.NUM_BYTES_OBJECT_REF; + long size = SHALLOW_SIZE; + // These lists may slightly under-count, but it's not likely to be by much. + size += RamUsageEstimator.alignObjectSize(arrHeader + ref * elementTypes.size()); + size += RamUsageEstimator.alignObjectSize(arrHeader + ref * encoders.size()); + size += RamUsageEstimator.alignObjectSize(arrHeader + ref * sortOrders.size()); + size += sortOrders.size() * SortOrder.SHALLOW_SIZE; + size += inputQueue.ramBytesUsed(); + return size; + } + + @Override + public Status status() { + return new TopNOperatorStatus(inputQueue.size(), ramBytesUsed()); + } + + @Override + public String toString() { + return "TopNOperator[count=" + + inputQueue + + ", elementTypes=" + + elementTypes + + ", encoders=" + + encoders + + ", sortOrders=" + + sortOrders + + "]"; + } + + private static class Queue extends PriorityQueue implements Accountable { + private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(Queue.class); + private final int maxSize; + + Queue(int maxSize) { + super(maxSize); + this.maxSize = maxSize; + } + + @Override + protected boolean lessThan(Row r1, Row r2) { + return compareRows(r1, r2) < 0; + } + + @Override + public String toString() { + return size() + "/" + maxSize; + } + + @Override + public long ramBytesUsed() { + long total = SHALLOW_SIZE; + total += RamUsageEstimator.alignObjectSize( + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + RamUsageEstimator.NUM_BYTES_OBJECT_REF * (maxSize + 1) + ); + for (Row r : this) { + total += r == null ? 0 : r.ramBytesUsed(); + } + return total; + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperatorStatus.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperatorStatus.java new file mode 100644 index 0000000000000..1261332ea1423 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperatorStatus.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.compute.operator.Operator; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Objects; + +public class TopNOperatorStatus implements Operator.Status { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Operator.Status.class, + "topn", + TopNOperatorStatus::new + ); + private final int occupiedRows; + private final long ramBytesUsed; + + public TopNOperatorStatus(int occupiedRows, long ramBytesUsed) { + this.occupiedRows = occupiedRows; + this.ramBytesUsed = ramBytesUsed; + } + + TopNOperatorStatus(StreamInput in) throws IOException { + this.occupiedRows = in.readVInt(); + this.ramBytesUsed = in.readVLong(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(occupiedRows); + out.writeVLong(ramBytesUsed); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + public int occupiedRows() { + return occupiedRows; + } + + public long ramBytesUsed() { + return ramBytesUsed; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("occupied_rows", occupiedRows); + builder.field("ram_bytes_used", ramBytesUsed); + builder.field("ram_used", ByteSizeValue.ofBytes(ramBytesUsed)); + return builder.endObject(); + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + TopNOperatorStatus that = (TopNOperatorStatus) o; + return occupiedRows == that.occupiedRows && ramBytesUsed == that.ramBytesUsed; + } + + @Override + public int hashCode() { + return Objects.hash(occupiedRows, ramBytesUsed); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/UTF8TopNEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/UTF8TopNEncoder.java new file mode 100644 index 0000000000000..3692e3009dd45 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/UTF8TopNEncoder.java @@ -0,0 +1,124 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; + +import java.util.Arrays; + +/** + * Encodes utf-8 strings as {@code nul} terminated strings. + *

+ * Utf-8 can contain {@code nul} aka {@code 0x00} so we wouldn't be able + * to use that as a terminator. But we fix this by adding {@code 1} to all + * values less than the continuation byte. This removes some of the + * self-synchronizing nature of utf-8, but we don't need that here. When + * we decode we undo out munging so all consumers just get normal utf-8. + *

+ */ +final class UTF8TopNEncoder extends SortableTopNEncoder { + + private static final int CONTINUATION_BYTE = 0b1000_0000; + static final byte TERMINATOR = 0x00; + + @Override + public int encodeBytesRef(BytesRef value, BytesRefBuilder bytesRefBuilder) { + // add one bit to every byte so that there are no "0" bytes in the provided bytes. The only "0" bytes are + // those defined as separators + int end = value.offset + value.length; + for (int i = value.offset; i < end; i++) { + byte b = value.bytes[i]; + if ((b & CONTINUATION_BYTE) == 0) { + b++; + } + bytesRefBuilder.append(b); + } + bytesRefBuilder.append(TERMINATOR); + return value.length + 1; + } + + @Override + public BytesRef decodeBytesRef(BytesRef bytes, BytesRef scratch) { + scratch.bytes = bytes.bytes; + scratch.offset = bytes.offset; + scratch.length = 0; + int i = bytes.offset; + decode: while (true) { + int leadByte = bytes.bytes[i] & 0xff; + int numBytes = utf8CodeLength[leadByte]; + switch (numBytes) { + case 0: + break decode; + case 1: + bytes.bytes[i]--; + i++; + break; + case 2: + i += 2; + break; + case 3: + i += 3; + break; + case 4: + i += 4; + break; + default: + throw new IllegalArgumentException("Invalid UTF8 header byte: 0x" + Integer.toHexString(leadByte)); + } + } + scratch.length = i - bytes.offset; + bytes.offset = i + 1; + bytes.length -= scratch.length + 1; + return scratch; + } + + @Override + public TopNEncoder toSortable() { + return this; + } + + @Override + public TopNEncoder toUnsortable() { + return this; + } + + @Override + public String toString() { + return "UTF8TopNEncoder"; + } + + // This section very inspired by Lucene's UnicodeUtil + static final int[] utf8CodeLength; + + static { + int v = Integer.MIN_VALUE; + + utf8CodeLength = Arrays.stream( + new int[][] { + // The next line differs from UnicodeUtil - the first entry is 0 because that's our terminator + { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + // The next line differs from UnicodeUtil - the first entry is 1 because it's valid in our encoding. + { 1, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v }, + { v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v }, + { v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v }, + { v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v }, + { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, + { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, + { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, + { 4, 4, 4, 4, 4, 4, 4, 4 /* , 5, 5, 5, 5, 6, 6, 0, 0 */ } } + ).flatMapToInt(Arrays::stream).toArray(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java new file mode 100644 index 0000000000000..2369078643ae4 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java @@ -0,0 +1,44 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.DocBlock; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.LongBlock; + +/** + * Extracts values into a {@link BytesRefBuilder}. + */ +interface ValueExtractor { + void writeValue(BytesRefBuilder values, int position); + + static ValueExtractor extractorFor(ElementType elementType, TopNEncoder encoder, boolean inKey, Block block) { + if (false == (elementType == block.elementType() || ElementType.NULL == block.elementType())) { + throw new IllegalArgumentException("Expected [" + elementType + "] but was [" + block.elementType() + "]"); + } + return switch (block.elementType()) { + case BOOLEAN -> ValueExtractorForBoolean.extractorFor(encoder, inKey, (BooleanBlock) block); + case BYTES_REF -> ValueExtractorForBytesRef.extractorFor(encoder, inKey, (BytesRefBlock) block); + case INT -> ValueExtractorForInt.extractorFor(encoder, inKey, (IntBlock) block); + case LONG -> ValueExtractorForLong.extractorFor(encoder, inKey, (LongBlock) block); + case DOUBLE -> ValueExtractorForDouble.extractorFor(encoder, inKey, (DoubleBlock) block); + case NULL -> new ValueExtractorForNull(); + case DOC -> new ValueExtractorForDoc(encoder, ((DocBlock) block).asVector()); + default -> { + assert false : "No value extractor for [" + block.elementType() + "]"; + throw new UnsupportedOperationException("No value extractor for [" + block.elementType() + "]"); + } + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractorForDoc.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractorForDoc.java new file mode 100644 index 0000000000000..733b9cd4ab708 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractorForDoc.java @@ -0,0 +1,32 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.DocVector; + +class ValueExtractorForDoc implements ValueExtractor { + private final DocVector vector; + + ValueExtractorForDoc(TopNEncoder encoder, DocVector vector) { + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE; + this.vector = vector; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeInt(vector.shards().getInt(position), values); + TopNEncoder.DEFAULT_UNSORTABLE.encodeInt(vector.segments().getInt(position), values); + TopNEncoder.DEFAULT_UNSORTABLE.encodeInt(vector.docs().getInt(position), values); + } + + @Override + public String toString() { + return "ValueExtractorForDoc"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractorForNull.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractorForNull.java new file mode 100644 index 0000000000000..967ede1b3f46d --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractorForNull.java @@ -0,0 +1,26 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRefBuilder; + +class ValueExtractorForNull implements ValueExtractor { + @Override + public void writeValue(BytesRefBuilder values, int position) { + /* + * Write 0 values which can be read by *any* result builder and will always + * make a null value. + */ + TopNEncoder.DEFAULT_UNSORTABLE.encodeVInt(0, values); + } + + @Override + public String toString() { + return "ValueExtractorForNull"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/VersionTopNEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/VersionTopNEncoder.java new file mode 100644 index 0000000000000..b6ce97586c449 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/VersionTopNEncoder.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; + +class VersionTopNEncoder extends SortableTopNEncoder { + @Override + public int encodeBytesRef(BytesRef value, BytesRefBuilder bytesRefBuilder) { + // TODO versions can contain nul so we need to delegate to the utf-8 encoder for the utf-8 parts of a version + for (int i = value.offset; i < value.length; i++) { + if (value.bytes[i] == UTF8TopNEncoder.TERMINATOR) { + throw new IllegalArgumentException("Can't sort versions containing nul"); + } + } + bytesRefBuilder.append(value); + bytesRefBuilder.append(UTF8TopNEncoder.TERMINATOR); + return value.length + 1; + } + + @Override + public BytesRef decodeBytesRef(BytesRef bytes, BytesRef scratch) { + int i = bytes.offset; + while (bytes.bytes[i] != UTF8TopNEncoder.TERMINATOR) { + i++; + } + scratch.bytes = bytes.bytes; + scratch.offset = bytes.offset; + scratch.length = i - bytes.offset; + bytes.offset += scratch.length + 1; + bytes.length -= scratch.length + 1; + return scratch; + } + + @Override + public String toString() { + return "VersionTopNEncoder"; + } + + @Override + public TopNEncoder toSortable() { + return this; + } + + @Override + public TopNEncoder toUnsortable() { + return this; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/X-KeyExtractor.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/X-KeyExtractor.java.st new file mode 100644 index 0000000000000..28f452ccac1a7 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/X-KeyExtractor.java.st @@ -0,0 +1,224 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +$if(BytesRef)$ +import org.apache.lucene.util.BytesRef; +$endif$ +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.$Type$Block; +import org.elasticsearch.compute.data.$Type$Vector; + +abstract class KeyExtractorFor$Type$ implements KeyExtractor { + static KeyExtractorFor$Type$ extractorFor(TopNEncoder encoder, boolean ascending, byte nul, byte nonNul, $Type$Block block) { + $Type$Vector v = block.asVector(); + if (v != null) { + return new KeyExtractorFor$Type$.ForVector(encoder, nul, nonNul, v); + } + if (ascending) { + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorFor$Type$.MinForAscending(encoder, nul, nonNul, block) + : new KeyExtractorFor$Type$.MinForUnordered(encoder, nul, nonNul, block); + } + return block.mvOrdering() == Block.MvOrdering.ASCENDING + ? new KeyExtractorFor$Type$.MaxForAscending(encoder, nul, nonNul, block) + : new KeyExtractorFor$Type$.MaxForUnordered(encoder, nul, nonNul, block); + } + +$if(BytesRef)$ + private final TopNEncoder encoder; + protected final BytesRef scratch = new BytesRef(); +$endif$ + private final byte nul; + private final byte nonNul; + + KeyExtractorFor$Type$(TopNEncoder encoder, byte nul, byte nonNul) { +$if(BytesRef)$ + this.encoder = encoder; +$else$ + assert encoder == TopNEncoder.DEFAULT_SORTABLE; +$endif$ + this.nul = nul; + this.nonNul = nonNul; + } + + protected final int nonNul(BytesRefBuilder key, $type$ value) { + key.append(nonNul); +$if(BytesRef)$ + return encoder.encodeBytesRef(value, key) + 1; +$elseif(boolean)$ + TopNEncoder.DEFAULT_SORTABLE.encodeBoolean(value, key); + return Byte.BYTES + 1; +$else$ + TopNEncoder.DEFAULT_SORTABLE.encode$Type$(value, key); + return $BYTES$ + 1; +$endif$ + } + + protected final int nul(BytesRefBuilder key) { + key.append(nul); + return 1; + } + + static class ForVector extends KeyExtractorFor$Type$ { + private final $Type$Vector vector; + + ForVector(TopNEncoder encoder, byte nul, byte nonNul, $Type$Vector vector) { + super(encoder, nul, nonNul); + this.vector = vector; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { +$if(BytesRef)$ + return nonNul(key, vector.get$Type$(position, scratch)); +$else$ + return nonNul(key, vector.get$Type$(position)); +$endif$ + } + } + + static class MinForAscending extends KeyExtractorFor$Type$ { + private final $Type$Block block; + + MinForAscending(TopNEncoder encoder, byte nul, byte nonNul, $Type$Block block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } +$if(BytesRef)$ + return nonNul(key, block.get$Type$(block.getFirstValueIndex(position), scratch)); +$else$ + return nonNul(key, block.get$Type$(block.getFirstValueIndex(position))); +$endif$ + } + } + + static class MaxForAscending extends KeyExtractorFor$Type$ { + private final $Type$Block block; + + MaxForAscending(TopNEncoder encoder, byte nul, byte nonNul, $Type$Block block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + if (block.isNull(position)) { + return nul(key); + } +$if(BytesRef)$ + return nonNul(key, block.get$Type$(block.getFirstValueIndex(position) + block.getValueCount(position) - 1, scratch)); +$else$ + return nonNul(key, block.get$Type$(block.getFirstValueIndex(position) + block.getValueCount(position) - 1)); +$endif$ + } + } + + static class MinForUnordered extends KeyExtractorFor$Type$ { + private final $Type$Block block; + +$if(BytesRef)$ + private final BytesRef minScratch = new BytesRef(); +$endif$ + + MinForUnordered(TopNEncoder encoder, byte nul, byte nonNul, $Type$Block block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; +$if(BytesRef)$ + BytesRef min = block.getBytesRef(start, minScratch); + for (int i = start; i < end; i++) { + BytesRef v = block.getBytesRef(i, scratch); + if (v.compareTo(min) < 0) { + min.bytes = v.bytes; + min.offset = v.offset; + min.length = v.length; + } + } + return nonNul(key, min); +$elseif(boolean)$ + for (int i = start; i < end; i++) { + if (block.getBoolean(i) == false) { + return nonNul(key, false); + } + } + return nonNul(key, true); +$else$ + $type$ min = block.get$Type$(start); + for (int i = start + 1; i < end; i++) { + min = Math.min(min, block.get$Type$(i)); + } + return nonNul(key, min); +$endif$ + } + } + + static class MaxForUnordered extends KeyExtractorFor$Type$ { + private final $Type$Block block; + +$if(BytesRef)$ + private final BytesRef maxScratch = new BytesRef(); +$endif$ + + MaxForUnordered(TopNEncoder encoder, byte nul, byte nonNul, $Type$Block block) { + super(encoder, nul, nonNul); + this.block = block; + } + + @Override + public int writeKey(BytesRefBuilder key, int position) { + int size = block.getValueCount(position); + if (size == 0) { + return nul(key); + } + int start = block.getFirstValueIndex(position); + int end = start + size; +$if(BytesRef)$ + BytesRef max = block.getBytesRef(start, maxScratch); + for (int i = start; i < end; i++) { + BytesRef v = block.getBytesRef(i, scratch); + if (v.compareTo(max) > 0) { + max.bytes = v.bytes; + max.offset = v.offset; + max.length = v.length; + } + } + return nonNul(key, max); +$elseif(boolean)$ + for (int i = start; i < end; i++) { + if (block.getBoolean(i)) { + return nonNul(key, true); + } + } + return nonNul(key, false); +$else$ + $type$ max = block.get$Type$(start); + for (int i = start + 1; i < end; i++) { + max = Math.max(max, block.get$Type$(i)); + } + return nonNul(key, max); +$endif$ + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/X-ResultBuilder.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/X-ResultBuilder.java.st new file mode 100644 index 0000000000000..5f9a35bd0ebd3 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/X-ResultBuilder.java.st @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.$Type$Block; + +class ResultBuilderFor$Type$ implements ResultBuilder { + private final $Type$Block.Builder builder; + + private final boolean inKey; + +$if(BytesRef)$ + private final TopNEncoder encoder; + + private final BytesRef scratch = new BytesRef(); +$endif$ + + /** + * The value previously set by {@link #decodeKey}. + */ + private $type$ key; + + ResultBuilderFor$Type$(TopNEncoder encoder, boolean inKey, int initialSize) { +$if(BytesRef)$ + this.encoder = encoder; +$else$ + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE : encoder.toString(); +$endif$ + this.inKey = inKey; + this.builder = $Type$Block.newBlockBuilder(initialSize); + } + + @Override + public void decodeKey(BytesRef keys) { + assert inKey; +$if(BytesRef)$ + key = encoder.toSortable().decodeBytesRef(keys, scratch); +$else$ + key = TopNEncoder.DEFAULT_SORTABLE.decode$Type$(keys); +$endif$ + } + + @Override + public void decodeValue(BytesRef values) { + int count = TopNEncoder.DEFAULT_UNSORTABLE.decodeVInt(values); + switch (count) { + case 0 -> { + builder.appendNull(); + } + case 1 -> builder.append$Type$(inKey ? key : readValueFromValues(values)); + default -> { + builder.beginPositionEntry(); + for (int i = 0; i < count; i++) { + builder.append$Type$(readValueFromValues(values)); + } + builder.endPositionEntry(); + } + } + } + + private $type$ readValueFromValues(BytesRef values) { +$if(BytesRef)$ + return encoder.toUnsortable().decodeBytesRef(values, scratch); +$else$ + return TopNEncoder.DEFAULT_UNSORTABLE.decode$Type$(values); +$endif$ + } + + @Override + public $Type$Block build() { + return builder.build(); + } + + @Override + public String toString() { + return "ResultBuilderFor$Type$[inKey=" + inKey + "]"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/X-ValueExtractor.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/X-ValueExtractor.java.st new file mode 100644 index 0000000000000..3a8792387b142 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/X-ValueExtractor.java.st @@ -0,0 +1,105 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +$if(BytesRef)$ +import org.apache.lucene.util.BytesRef; +$endif$ +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.$Type$Block; +import org.elasticsearch.compute.data.$Type$Vector; + +abstract class ValueExtractorFor$Type$ implements ValueExtractor { + static ValueExtractorFor$Type$ extractorFor(TopNEncoder encoder, boolean inKey, $Type$Block block) { + $Type$Vector vector = block.asVector(); + if (vector != null) { + return new ValueExtractorFor$Type$.ForVector(encoder, inKey, vector); + } + return new ValueExtractorFor$Type$.ForBlock(encoder, inKey, block); + } + +$if(BytesRef)$ + private final TopNEncoder encoder; + + protected final BytesRef scratch = new BytesRef(); +$endif$ + + protected final boolean inKey; + + ValueExtractorFor$Type$(TopNEncoder encoder, boolean inKey) { +$if(BytesRef)$ + this.encoder = encoder; +$else$ + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE : encoder.toString(); +$endif$ + this.inKey = inKey; + } + + protected final void writeCount(BytesRefBuilder values, int count) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeVInt(count, values); + } + + protected final void actualWriteValue(BytesRefBuilder values, $type$ value) { +$if(BytesRef)$ + encoder.encodeBytesRef(value, values); +$else$ + TopNEncoder.DEFAULT_UNSORTABLE.encode$Type$(value, values); +$endif$ + } + + static class ForVector extends ValueExtractorFor$Type$ { + private final $Type$Vector vector; + + ForVector(TopNEncoder encoder, boolean inKey, $Type$Vector vector) { + super(encoder, inKey); + this.vector = vector; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + writeCount(values, 1); + if (inKey) { + // will read results from the key + return; + } +$if(BytesRef)$ + actualWriteValue(values, vector.get$Type$(position, scratch)); +$else$ + actualWriteValue(values, vector.get$Type$(position)); +$endif$ + } + } + + static class ForBlock extends ValueExtractorFor$Type$ { + private final $Type$Block block; + + ForBlock(TopNEncoder encoder, boolean inKey, $Type$Block block) { + super(encoder, inKey); + this.block = block; + } + + @Override + public void writeValue(BytesRefBuilder values, int position) { + int size = block.getValueCount(position); + writeCount(values, size); + if (size == 1 && inKey) { + // Will read results from the key + return; + } + int start = block.getFirstValueIndex(position); + int end = start + size; + for (int i = start; i < end; i++) { +$if(BytesRef)$ + actualWriteValue(values, block.getBytesRef(i, scratch)); +$else$ + actualWriteValue(values, block.get$Type$(i)); +$endif$ + } + } + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/DefaultUnsortableTopNEncoderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/DefaultUnsortableTopNEncoderTests.java new file mode 100644 index 0000000000000..9f46399546a09 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/DefaultUnsortableTopNEncoderTests.java @@ -0,0 +1,49 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class DefaultUnsortableTopNEncoderTests extends ESTestCase { + public void testVIntSmall() { + testVInt(between(0, 127), 1); + } + + public void testVIntMed() { + testVInt(between(128, 16383), 2); + } + + public void testVIntBig() { + testVInt(between(16384, 2097151), 3); + } + + public void testVIntBigger() { + testVInt(between(2097152, 268435455), 4); + } + + public void testVIntBiggest() { + testVInt(between(268435456, Integer.MAX_VALUE), 5); + } + + public void testVIntNegative() { + testVInt(between(Integer.MIN_VALUE, -1), 5); + } + + private void testVInt(int v, int expectedBytes) { + BytesRefBuilder builder = new BytesRefBuilder(); + TopNEncoder.DEFAULT_UNSORTABLE.encodeVInt(v, builder); + assertThat(builder.length(), equalTo(expectedBytes)); + BytesRef bytes = builder.toBytesRef(); + assertThat(TopNEncoder.DEFAULT_UNSORTABLE.decodeVInt(bytes), equalTo(v)); + assertThat(bytes.length, equalTo(0)); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java new file mode 100644 index 0000000000000..5fbb44f1fac0b --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java @@ -0,0 +1,177 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockTestUtils; +import org.elasticsearch.compute.data.BlockUtils; +import org.elasticsearch.compute.data.DocVector; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.test.ESTestCase; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; + +public class ExtractorTests extends ESTestCase { + @ParametersFactory + public static Iterable parameters() { + List cases = new ArrayList<>(); + for (ElementType e : ElementType.values()) { + switch (e) { + case UNKNOWN -> { + } + case BYTES_REF -> { + cases.add(valueTestCase("single alpha", e, TopNEncoder.UTF8, () -> randomAlphaOfLength(5))); + cases.add(valueTestCase("many alpha", e, TopNEncoder.UTF8, () -> randomList(2, 10, () -> randomAlphaOfLength(5)))); + cases.add(valueTestCase("single utf8", e, TopNEncoder.UTF8, () -> randomRealisticUnicodeOfLength(10))); + cases.add( + valueTestCase("many utf8", e, TopNEncoder.UTF8, () -> randomList(2, 10, () -> randomRealisticUnicodeOfLength(10))) + ); + cases.add(valueTestCase("single version", e, TopNEncoder.VERSION, () -> TopNEncoderTests.randomVersion().toBytesRef())); + cases.add( + valueTestCase( + "many version", + e, + TopNEncoder.VERSION, + () -> randomList(2, 10, () -> TopNEncoderTests.randomVersion().toBytesRef()) + ) + ); + cases.add( + valueTestCase( + "single IP", + e, + TopNEncoder.IP, + () -> new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean()))) + ) + ); + cases.add( + valueTestCase( + "many IP", + e, + TopNEncoder.IP, + () -> randomList(2, 10, () -> new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean())))) + ) + ); + } + case DOC -> cases.add( + new Object[] { + new TestCase( + "doc", + e, + TopNEncoder.DEFAULT_UNSORTABLE, + () -> new DocVector( + IntBlock.newConstantBlockWith(randomInt(), 1).asVector(), + IntBlock.newConstantBlockWith(randomInt(), 1).asVector(), + IntBlock.newConstantBlockWith(randomInt(), 1).asVector(), + randomBoolean() ? null : randomBoolean() + ).asBlock() + ) } + ); + case NULL -> cases.add(valueTestCase("null", e, TopNEncoder.DEFAULT_UNSORTABLE, () -> null)); + default -> { + cases.add(valueTestCase("single " + e, e, TopNEncoder.DEFAULT_UNSORTABLE, () -> BlockTestUtils.randomValue(e))); + cases.add( + valueTestCase( + "many " + e, + e, + TopNEncoder.DEFAULT_UNSORTABLE, + () -> randomList(2, 10, () -> BlockTestUtils.randomValue(e)) + ) + ); + } + } + } + return cases; + } + + static Object[] valueTestCase(String name, ElementType type, TopNEncoder encoder, Supplier value) { + return new Object[] { new TestCase(name, type, encoder, () -> BlockUtils.fromListRow(Arrays.asList(value.get()))[0]) }; + } + + static class TestCase { + private final String name; + private final ElementType type; + private final TopNEncoder encoder; + private final Supplier value; + + TestCase(String name, ElementType type, TopNEncoder encoder, Supplier value) { + this.name = name; + this.type = type; + this.encoder = encoder; + this.value = value; + } + + @Override + public String toString() { + return name; + } + } + + private final TestCase testCase; + + public ExtractorTests(TestCase testCase) { + this.testCase = testCase; + } + + public void testNotInKey() { + Block value = testCase.value.get(); + + BytesRefBuilder valuesBuilder = new BytesRefBuilder(); + ValueExtractor.extractorFor(testCase.type, testCase.encoder.toUnsortable(), false, value).writeValue(valuesBuilder, 0); + assertThat(valuesBuilder.length(), greaterThan(0)); + + ResultBuilder result = ResultBuilder.resultBuilderFor(testCase.type, testCase.encoder.toUnsortable(), false, 1); + BytesRef values = valuesBuilder.get(); + result.decodeValue(values); + assertThat(values.length, equalTo(0)); + + assertThat(result.build(), equalTo(value)); + } + + public void testInKey() { + assumeFalse("can't sort on _doc", testCase.type == ElementType.DOC); + Block value = testCase.value.get(); + + BytesRefBuilder keysBuilder = new BytesRefBuilder(); + KeyExtractor.extractorFor(testCase.type, testCase.encoder.toSortable(), randomBoolean(), randomByte(), randomByte(), value) + .writeKey(keysBuilder, 0); + assertThat(keysBuilder.length(), greaterThan(0)); + + BytesRefBuilder valuesBuilder = new BytesRefBuilder(); + ValueExtractor.extractorFor(testCase.type, testCase.encoder.toUnsortable(), true, value).writeValue(valuesBuilder, 0); + assertThat(valuesBuilder.length(), greaterThan(0)); + + ResultBuilder result = ResultBuilder.resultBuilderFor(testCase.type, testCase.encoder.toUnsortable(), true, 1); + BytesRef keys = keysBuilder.get(); + if (testCase.type == ElementType.NULL) { + assertThat(keys.length, equalTo(1)); + } else { + // Skip the non-null byte + keys.offset++; + keys.length--; + result.decodeKey(keys); + assertThat(keys.length, equalTo(0)); + } + BytesRef values = valuesBuilder.get(); + result.decodeValue(values); + assertThat(values.length, equalTo(0)); + + assertThat(result.build(), equalTo(value)); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNEncoderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNEncoderTests.java new file mode 100644 index 0000000000000..99fd535626cb0 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNEncoderTests.java @@ -0,0 +1,130 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.versionfield.Version; + +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; + +public class TopNEncoderTests extends ESTestCase { + @ParametersFactory + public static Iterable parameters() { + return List.of( + new Object[] { TopNEncoder.DEFAULT_SORTABLE }, + new Object[] { TopNEncoder.UTF8 }, + new Object[] { TopNEncoder.VERSION }, + new Object[] { TopNEncoder.IP }, + new Object[] { TopNEncoder.DEFAULT_UNSORTABLE } + ); + } + + private final TopNEncoder encoder; + + public TopNEncoderTests(TopNEncoder encoder) { + this.encoder = encoder; + } + + public void testLong() { + BytesRefBuilder builder = new BytesRefBuilder(); + long v = randomLong(); + encoder.encodeLong(v, builder); + BytesRef encoded = builder.toBytesRef(); + assertThat(encoder.decodeLong(encoded), equalTo(v)); + assertThat(encoded.length, equalTo(0)); + } + + public void testInt() { + BytesRefBuilder builder = new BytesRefBuilder(); + int v = randomInt(); + encoder.encodeInt(v, builder); + BytesRef encoded = builder.toBytesRef(); + assertThat(encoder.decodeInt(encoded), equalTo(v)); + assertThat(encoded.length, equalTo(0)); + } + + public void testDouble() { + BytesRefBuilder builder = new BytesRefBuilder(); + double v = randomDouble(); + encoder.encodeDouble(v, builder); + BytesRef encoded = builder.toBytesRef(); + assertThat(encoder.decodeDouble(encoded), equalTo(v)); + assertThat(encoded.length, equalTo(0)); + } + + public void testBoolean() { + BytesRefBuilder builder = new BytesRefBuilder(); + boolean v = randomBoolean(); + encoder.encodeBoolean(v, builder); + BytesRef encoded = builder.toBytesRef(); + assertThat(encoder.decodeBoolean(encoded), equalTo(v)); + assertThat(encoded.length, equalTo(0)); + } + + public void testAlpha() { + assumeTrue("unsupported", encoder == TopNEncoder.UTF8); + roundTripBytesRef(new BytesRef(randomAlphaOfLength(6))); + } + + public void testUtf8() { + assumeTrue("unsupported", encoder == TopNEncoder.UTF8); + roundTripBytesRef(new BytesRef(randomRealisticUnicodeOfLength(6))); + } + + /** + * Round trip the highest unicode character to encode without a continuation. + */ + public void testDel() { + assumeTrue("unsupported", encoder == TopNEncoder.UTF8); + roundTripBytesRef(new BytesRef("\u007F")); + } + + /** + * Round trip the lowest unicode character to encode using a continuation byte. + */ + public void testPaddingCharacter() { + assumeTrue("unsupported", encoder == TopNEncoder.UTF8); + roundTripBytesRef(new BytesRef("\u0080")); + } + + public void testVersion() { + assumeTrue("unsupported", encoder == TopNEncoder.VERSION); + roundTripBytesRef(randomVersion().toBytesRef()); + } + + public void testIp() { + assumeTrue("unsupported", encoder == TopNEncoder.IP); + roundTripBytesRef(new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean())))); + } + + private void roundTripBytesRef(BytesRef v) { + BytesRefBuilder builder = new BytesRefBuilder(); + int reportedSize = encoder.encodeBytesRef(v, builder); + BytesRef encoded = builder.toBytesRef(); + assertThat(encoded.length, equalTo(reportedSize)); + assertThat(encoder.decodeBytesRef(encoded, new BytesRef()), equalTo(v)); + assertThat(encoded.length, equalTo(0)); + } + + static Version randomVersion() { + // TODO degenerate versions and stuff + return switch (between(0, 2)) { + case 0 -> new Version(Integer.toString(between(0, 100))); + case 1 -> new Version(between(0, 100) + "." + between(0, 100)); + case 2 -> new Version(between(0, 100) + "." + between(0, 100) + "." + between(0, 100)); + default -> throw new IllegalArgumentException(); + }; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorStatusTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorStatusTests.java new file mode 100644 index 0000000000000..f52274b68bdf6 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorStatusTests.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractWireSerializingTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class TopNOperatorStatusTests extends AbstractWireSerializingTestCase { + public void testToXContent() { + assertThat(Strings.toString(new TopNOperatorStatus(10, 2000)), equalTo(""" + {"occupied_rows":10,"ram_bytes_used":2000,"ram_used":"1.9kb"}""")); + } + + @Override + protected Writeable.Reader instanceReader() { + return TopNOperatorStatus::new; + } + + @Override + protected TopNOperatorStatus createTestInstance() { + return new TopNOperatorStatus(randomNonNegativeInt(), randomNonNegativeLong()); + } + + @Override + protected TopNOperatorStatus mutateInstance(TopNOperatorStatus instance) { + int occupiedRows = instance.occupiedRows(); + long ramBytesUsed = instance.ramBytesUsed(); + switch (between(0, 1)) { + case 0: + occupiedRows = randomValueOtherThan(occupiedRows, () -> randomNonNegativeInt()); + break; + case 1: + ramBytesUsed = randomValueOtherThan(ramBytesUsed, () -> randomNonNegativeLong()); + break; + default: + throw new IllegalArgumentException(); + } + return new TopNOperatorStatus(occupiedRows, ramBytesUsed); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/TopNOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java similarity index 69% rename from x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/TopNOperatorTests.java rename to x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java index 4dffd07e23acc..1d068d3d13586 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/TopNOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java @@ -5,9 +5,10 @@ * 2.0. */ -package org.elasticsearch.compute.operator; +package org.elasticsearch.compute.operator.topn; import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.tests.util.RamUsageTester; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.network.NetworkAddress; import org.elasticsearch.common.unit.ByteSizeValue; @@ -15,12 +16,22 @@ import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BooleanBlock; import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.DoubleBlock; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.IntArrayVector; import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.data.TestBlockBuilder; +import org.elasticsearch.compute.operator.CannedSourceOperator; +import org.elasticsearch.compute.operator.Driver; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.Operator; +import org.elasticsearch.compute.operator.OperatorTestCase; +import org.elasticsearch.compute.operator.PageConsumerOperator; +import org.elasticsearch.compute.operator.SequenceLongBlockSourceOperator; +import org.elasticsearch.compute.operator.SourceOperator; +import org.elasticsearch.compute.operator.TupleBlockSourceOperator; import org.elasticsearch.core.Tuple; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.versionfield.Version; @@ -31,11 +42,8 @@ import java.util.Arrays; import java.util.Collections; import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -53,12 +61,13 @@ import static org.elasticsearch.compute.data.ElementType.DOUBLE; import static org.elasticsearch.compute.data.ElementType.INT; import static org.elasticsearch.compute.data.ElementType.LONG; -import static org.elasticsearch.compute.operator.TopNOperator.BYTESREF_FIXED_LENGTH_ENCODER; -import static org.elasticsearch.compute.operator.TopNOperator.BYTESREF_UTF8_ENCODER; -import static org.elasticsearch.compute.operator.TopNOperator.DEFAULT_ENCODER; +import static org.elasticsearch.compute.operator.topn.TopNEncoder.DEFAULT_SORTABLE; +import static org.elasticsearch.compute.operator.topn.TopNEncoder.DEFAULT_UNSORTABLE; +import static org.elasticsearch.compute.operator.topn.TopNEncoder.UTF8; import static org.elasticsearch.core.Tuple.tuple; import static org.elasticsearch.test.ListMatcher.matchesList; import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasSize; @@ -66,7 +75,6 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo; public class TopNOperatorTests extends OperatorTestCase { - private final int pageSize = randomPageSize(); // versions taken from org.elasticsearch.xpack.versionfield.VersionTests private static final List VERSIONS = List.of( @@ -109,18 +117,26 @@ public class TopNOperatorTests extends OperatorTestCase { ); @Override - protected Operator.OperatorFactory simple(BigArrays bigArrays) { - return new TopNOperator.TopNOperatorFactory(4, List.of(new TopNOperator.SortOrder(0, true, false)), pageSize); + protected TopNOperator.TopNOperatorFactory simple(BigArrays bigArrays) { + return new TopNOperator.TopNOperatorFactory( + 4, + List.of(LONG), + List.of(DEFAULT_UNSORTABLE), + List.of(new TopNOperator.SortOrder(0, true, false)), + pageSize + ); } @Override protected String expectedDescriptionOfSimple() { - return "TopNOperator[count = 4, sortOrders = [SortOrder[channel=0, asc=true, nullsFirst=false, " + "encoder=DefaultEncoder]]]"; + return "TopNOperator[count=4, elementTypes=[LONG], encoders=[DefaultUnsortable], " + + "sortOrders=[SortOrder[channel=0, asc=true, nullsFirst=false]]]"; } @Override protected String expectedToStringOfSimple() { - return "TopNOperator[count = 0/4, sortOrder = SortOrder[channel=0, asc=true, nullsFirst=false, " + "encoder=DefaultEncoder]]"; + return "TopNOperator[count=0/4, elementTypes=[LONG], encoders=[DefaultUnsortable], " + + "sortOrders=[SortOrder[channel=0, asc=true, nullsFirst=false]]]"; } @Override @@ -157,332 +173,241 @@ protected ByteSizeValue smallEnoughToCircuitBreak() { return ByteSizeValue.ZERO; } + public void testRamBytesUsed() { + int topCount = 10_000; + // We under-count by a few bytes because of the lists. In that end that's fine, but we need to account for it here. + long underCount = 100; + TopNOperator op = new TopNOperator.TopNOperatorFactory( + topCount, + List.of(LONG), + List.of(DEFAULT_UNSORTABLE), + List.of(new TopNOperator.SortOrder(0, true, false)), + pageSize + ).get(new DriverContext()); + long actualEmpty = RamUsageTester.ramUsed(op) - RamUsageTester.ramUsed(LONG) - RamUsageTester.ramUsed(DEFAULT_UNSORTABLE); + assertThat(op.ramBytesUsed(), both(greaterThan(actualEmpty - underCount)).and(lessThan(actualEmpty))); + // But when we fill it then we're quite close + for (Page p : CannedSourceOperator.collectPages(simpleInput(topCount))) { + op.addInput(p); + } + long actualFull = RamUsageTester.ramUsed(op) - RamUsageTester.ramUsed(BYTES_REF) - RamUsageTester.ramUsed(DEFAULT_UNSORTABLE); + assertThat(op.ramBytesUsed(), both(greaterThan(actualFull - underCount)).and(lessThan(actualFull))); + } + public void testRandomTopN() { for (boolean asc : List.of(true, false)) { int limit = randomIntBetween(1, 20); List inputValues = randomList(0, 5000, ESTestCase::randomLong); Comparator comparator = asc ? naturalOrder() : reverseOrder(); List expectedValues = inputValues.stream().sorted(comparator).limit(limit).toList(); - List outputValues = topN(inputValues, limit, asc, false); + List outputValues = topNLong(inputValues, limit, asc, false); assertThat(outputValues, equalTo(expectedValues)); } } public void testBasicTopN() { List values = Arrays.asList(2L, 1L, 4L, null, 5L, 10L, null, 20L, 4L, 100L); - assertThat(topN(values, 1, true, false), equalTo(Arrays.asList(1L))); - assertThat(topN(values, 1, false, false), equalTo(Arrays.asList(100L))); - assertThat(topN(values, 2, true, false), equalTo(Arrays.asList(1L, 2L))); - assertThat(topN(values, 2, false, false), equalTo(Arrays.asList(100L, 20L))); - assertThat(topN(values, 3, true, false), equalTo(Arrays.asList(1L, 2L, 4L))); - assertThat(topN(values, 3, false, false), equalTo(Arrays.asList(100L, 20L, 10L))); - assertThat(topN(values, 4, true, false), equalTo(Arrays.asList(1L, 2L, 4L, 4L))); - assertThat(topN(values, 4, false, false), equalTo(Arrays.asList(100L, 20L, 10L, 5L))); - assertThat(topN(values, 100, true, false), equalTo(Arrays.asList(1L, 2L, 4L, 4L, 5L, 10L, 20L, 100L, null, null))); - assertThat(topN(values, 100, false, false), equalTo(Arrays.asList(100L, 20L, 10L, 5L, 4L, 4L, 2L, 1L, null, null))); - assertThat(topN(values, 1, true, true), equalTo(Arrays.asList(new Long[] { null }))); - assertThat(topN(values, 1, false, true), equalTo(Arrays.asList(new Long[] { null }))); - assertThat(topN(values, 2, true, true), equalTo(Arrays.asList(null, null))); - assertThat(topN(values, 2, false, true), equalTo(Arrays.asList(null, null))); - assertThat(topN(values, 3, true, true), equalTo(Arrays.asList(null, null, 1L))); - assertThat(topN(values, 3, false, true), equalTo(Arrays.asList(null, null, 100L))); - assertThat(topN(values, 4, true, true), equalTo(Arrays.asList(null, null, 1L, 2L))); - assertThat(topN(values, 4, false, true), equalTo(Arrays.asList(null, null, 100L, 20L))); - assertThat(topN(values, 100, true, true), equalTo(Arrays.asList(null, null, 1L, 2L, 4L, 4L, 5L, 10L, 20L, 100L))); - assertThat(topN(values, 100, false, true), equalTo(Arrays.asList(null, null, 100L, 20L, 10L, 5L, 4L, 4L, 2L, 1L))); + assertThat(topNLong(values, 1, true, false), equalTo(Arrays.asList(1L))); + assertThat(topNLong(values, 1, false, false), equalTo(Arrays.asList(100L))); + assertThat(topNLong(values, 2, true, false), equalTo(Arrays.asList(1L, 2L))); + assertThat(topNLong(values, 2, false, false), equalTo(Arrays.asList(100L, 20L))); + assertThat(topNLong(values, 3, true, false), equalTo(Arrays.asList(1L, 2L, 4L))); + assertThat(topNLong(values, 3, false, false), equalTo(Arrays.asList(100L, 20L, 10L))); + assertThat(topNLong(values, 4, true, false), equalTo(Arrays.asList(1L, 2L, 4L, 4L))); + assertThat(topNLong(values, 4, false, false), equalTo(Arrays.asList(100L, 20L, 10L, 5L))); + assertThat(topNLong(values, 100, true, false), equalTo(Arrays.asList(1L, 2L, 4L, 4L, 5L, 10L, 20L, 100L, null, null))); + assertThat(topNLong(values, 100, false, false), equalTo(Arrays.asList(100L, 20L, 10L, 5L, 4L, 4L, 2L, 1L, null, null))); + assertThat(topNLong(values, 1, true, true), equalTo(Arrays.asList(new Long[] { null }))); + assertThat(topNLong(values, 1, false, true), equalTo(Arrays.asList(new Long[] { null }))); + assertThat(topNLong(values, 2, true, true), equalTo(Arrays.asList(null, null))); + assertThat(topNLong(values, 2, false, true), equalTo(Arrays.asList(null, null))); + assertThat(topNLong(values, 3, true, true), equalTo(Arrays.asList(null, null, 1L))); + assertThat(topNLong(values, 3, false, true), equalTo(Arrays.asList(null, null, 100L))); + assertThat(topNLong(values, 4, true, true), equalTo(Arrays.asList(null, null, 1L, 2L))); + assertThat(topNLong(values, 4, false, true), equalTo(Arrays.asList(null, null, 100L, 20L))); + assertThat(topNLong(values, 100, true, true), equalTo(Arrays.asList(null, null, 1L, 2L, 4L, 4L, 5L, 10L, 20L, 100L))); + assertThat(topNLong(values, 100, false, true), equalTo(Arrays.asList(null, null, 100L, 20L, 10L, 5L, 4L, 4L, 2L, 1L))); } - public void testCompareInts() { - Block[] bs = new Block[] { - IntBlock.newBlockBuilder(2).appendInt(Integer.MIN_VALUE).appendInt(randomIntBetween(-1000, -1)).build(), - IntBlock.newBlockBuilder(2).appendInt(randomIntBetween(-1000, -1)).appendInt(0).build(), - IntBlock.newBlockBuilder(2).appendInt(0).appendInt(randomIntBetween(1, 1000)).build(), - IntBlock.newBlockBuilder(2).appendInt(randomIntBetween(1, 1000)).appendInt(Integer.MAX_VALUE).build(), - IntBlock.newBlockBuilder(2).appendInt(Integer.MAX_VALUE).appendInt(0).build() }; - - Page page = new Page(bs); - TopNOperator.RowFactory rowFactory = new TopNOperator.RowFactory(page); - - Block nullBlock = Block.constantNullBlock(1); - Block[] nullBs = new Block[] { nullBlock, nullBlock, nullBlock, nullBlock, nullBlock }; - Page nullPage = new Page(nullBs); - TopNOperator.RowFactory nullRowFactory = new TopNOperator.RowFactory(page); - - for (int i = 0; i < bs.length; i++) { - Tuple rows = nonBytesRefRows( - randomBoolean(), - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - null, - i - ); - assertEquals(0, TopNOperator.compareRows(rows.v1(), rows.v1())); - - rows = nonBytesRefRows( - randomBoolean(), - true, - so -> rowFactory.row(page, 0, null, so), - so -> nullRowFactory.row(nullPage, 0, null, so), - i - ); - assertEquals(-1, TopNOperator.compareRows(rows.v1(), rows.v2())); - - rows = nonBytesRefRows( - randomBoolean(), - false, - so -> rowFactory.row(page, 0, null, so), - so -> nullRowFactory.row(nullPage, 0, null, so), - i - ); - assertEquals(1, TopNOperator.compareRows(rows.v1(), rows.v2())); - - rows = nonBytesRefRows( - randomBoolean(), - true, - so -> rowFactory.row(page, 0, null, so), - so -> nullRowFactory.row(nullPage, 0, null, so), - i - ); - assertEquals(1, TopNOperator.compareRows(rows.v2(), rows.v1())); - - rows = nonBytesRefRows( - randomBoolean(), - false, - so -> rowFactory.row(page, 0, null, so), - so -> nullRowFactory.row(nullPage, 0, null, so), - i - ); - assertEquals(-1, TopNOperator.compareRows(rows.v2(), rows.v1())); - } - for (int i = 0; i < bs.length - 1; i++) { - Tuple rows = nonBytesRefRows( - true, - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - so -> rowFactory.row(page, 1, null, so), - i - ); - assertThat(TopNOperator.compareRows(rows.v1(), rows.v2()), greaterThan(0)); - rows = nonBytesRefRows( - true, - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - so -> rowFactory.row(page, 1, null, so), - i - ); - assertThat(TopNOperator.compareRows(rows.v2(), rows.v1()), lessThan(0)); - rows = nonBytesRefRows( - false, - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - so -> rowFactory.row(page, 1, null, so), - i - ); - assertThat(TopNOperator.compareRows(rows.v1(), rows.v2()), lessThan(0)); - rows = nonBytesRefRows( - false, - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - so -> rowFactory.row(page, 1, null, so), - i - ); - assertThat(TopNOperator.compareRows(rows.v2(), rows.v1()), greaterThan(0)); - } + private List topNLong(List inputValues, int limit, boolean ascendingOrder, boolean nullsFirst) { + return topNTwoColumns( + inputValues.stream().map(v -> tuple(v, 0L)).toList(), + limit, + List.of(LONG, LONG), + List.of(DEFAULT_UNSORTABLE, DEFAULT_UNSORTABLE), + List.of(new TopNOperator.SortOrder(0, ascendingOrder, nullsFirst)) + ).stream().map(Tuple::v1).toList(); } - private Tuple nonBytesRefRows( - boolean asc, - boolean nullsFirst, - Function, TopNOperator.Row> row1, - Function, TopNOperator.Row> row2, - int position - ) { - return rows(asc, nullsFirst, row1, row2, position, DEFAULT_ENCODER); + public void testCompareInts() { + testCompare( + new Page( + new Block[] { + IntBlock.newBlockBuilder(2).appendInt(Integer.MIN_VALUE).appendInt(randomIntBetween(-1000, -1)).build(), + IntBlock.newBlockBuilder(2).appendInt(randomIntBetween(-1000, -1)).appendInt(0).build(), + IntBlock.newBlockBuilder(2).appendInt(0).appendInt(randomIntBetween(1, 1000)).build(), + IntBlock.newBlockBuilder(2).appendInt(randomIntBetween(1, 1000)).appendInt(Integer.MAX_VALUE).build(), + IntBlock.newBlockBuilder(2).appendInt(0).appendInt(Integer.MAX_VALUE).build() } + ), + INT, + DEFAULT_SORTABLE + ); } - private Tuple bytesRefRows( - boolean asc, - boolean nullsFirst, - Function, TopNOperator.Row> row1, - Function, TopNOperator.Row> row2, - int position - ) { - return rows(asc, nullsFirst, row1, row2, position, BYTESREF_UTF8_ENCODER); + public void testCompareLongs() { + testCompare( + new Page( + new Block[] { + LongBlock.newBlockBuilder(2).appendLong(Long.MIN_VALUE).appendLong(randomLongBetween(-1000, -1)).build(), + LongBlock.newBlockBuilder(2).appendLong(randomLongBetween(-1000, -1)).appendLong(0).build(), + LongBlock.newBlockBuilder(2).appendLong(0).appendLong(randomLongBetween(1, 1000)).build(), + LongBlock.newBlockBuilder(2).appendLong(randomLongBetween(1, 1000)).appendLong(Long.MAX_VALUE).build(), + LongBlock.newBlockBuilder(2).appendLong(0).appendLong(Long.MAX_VALUE).build() } + ), + LONG, + DEFAULT_SORTABLE + ); } - private Tuple rows( - boolean asc, - boolean nullsFirst, - Function, TopNOperator.Row> row1, - Function, TopNOperator.Row> row2, - int position, - TopNEncoder encoder - ) { - List so = List.of(new TopNOperator.SortOrder(position, asc, nullsFirst, encoder)); - return new Tuple<>(row1 == null ? null : row1.apply(so), row2 == null ? null : row2.apply(so)); + public void testCompareDoubles() { + testCompare( + new Page( + new Block[] { + DoubleBlock.newBlockBuilder(2) + .appendDouble(-Double.MAX_VALUE) + .appendDouble(randomDoubleBetween(-1000, -1, true)) + .build(), + DoubleBlock.newBlockBuilder(2).appendDouble(randomDoubleBetween(-1000, -1, true)).appendDouble(0.0).build(), + DoubleBlock.newBlockBuilder(2).appendDouble(0).appendDouble(randomDoubleBetween(1, 1000, true)).build(), + DoubleBlock.newBlockBuilder(2).appendDouble(randomLongBetween(1, 1000)).appendDouble(Double.MAX_VALUE).build(), + DoubleBlock.newBlockBuilder(2).appendDouble(0.0).appendDouble(Double.MAX_VALUE).build() } + ), + DOUBLE, + DEFAULT_SORTABLE + ); } - public void testCompareBytesRef() { - Block[] bs = new Block[] { - BytesRefBlock.newBlockBuilder(2).appendBytesRef(new BytesRef("bye")).appendBytesRef(new BytesRef("hello")).build() }; - Page page = new Page(bs); - TopNOperator.RowFactory rowFactory = new TopNOperator.RowFactory(page); - - Tuple rows = bytesRefRows( - false, - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - null, - 0 + public void testCompareUtf8() { + testCompare( + new Page( + new Block[] { + BytesRefBlock.newBlockBuilder(2).appendBytesRef(new BytesRef("bye")).appendBytesRef(new BytesRef("hello")).build() } + ), + BYTES_REF, + UTF8 ); - assertEquals(0, TopNOperator.compareRows(rows.v1(), rows.v1())); - rows = bytesRefRows(false, randomBoolean(), so -> rowFactory.row(page, 1, null, so), null, 0); - assertEquals(0, TopNOperator.compareRows(rows.v1(), rows.v1())); - - rows = bytesRefRows(true, randomBoolean(), so -> rowFactory.row(page, 0, null, so), so -> rowFactory.row(page, 1, null, so), 0); - assertThat(TopNOperator.compareRows(rows.v1(), rows.v2()), greaterThan(0)); - rows = bytesRefRows(true, randomBoolean(), so -> rowFactory.row(page, 0, null, so), so -> rowFactory.row(page, 1, null, so), 0); - assertThat(TopNOperator.compareRows(rows.v2(), rows.v1()), lessThan(0)); - rows = bytesRefRows(false, randomBoolean(), so -> rowFactory.row(page, 0, null, so), so -> rowFactory.row(page, 1, null, so), 0); - assertThat(TopNOperator.compareRows(rows.v1(), rows.v2()), lessThan(0)); - rows = bytesRefRows(false, rarely(), so -> rowFactory.row(page, 0, null, so), so -> rowFactory.row(page, 1, null, so), 0); - assertThat(TopNOperator.compareRows(rows.v2(), rows.v1()), greaterThan(0)); } public void testCompareBooleans() { - Block[] bs = new Block[] { - BooleanBlock.newBlockBuilder(2).appendBoolean(false).appendBoolean(true).build(), - BooleanBlock.newBlockBuilder(2).appendBoolean(true).appendBoolean(false).build() }; - - Page page = new Page(bs); - TopNOperator.RowFactory rowFactory = new TopNOperator.RowFactory(page); - - Block nullBlock = Block.constantNullBlock(2); - Block[] nullBs = new Block[] { nullBlock, nullBlock }; - Page nullPage = new Page(nullBs); - TopNOperator.RowFactory nullRowFactory = new TopNOperator.RowFactory(page); - - Tuple rows = nonBytesRefRows( - randomBoolean(), - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - so -> rowFactory.row(page, 1, null, so), - 0 - ); - assertEquals(0, TopNOperator.compareRows(rows.v1(), rows.v1())); - assertEquals(0, TopNOperator.compareRows(rows.v2(), rows.v2())); - - rows = nonBytesRefRows( - randomBoolean(), - true, - so -> rowFactory.row(page, 0, null, so), - so -> nullRowFactory.row(nullPage, 0, null, so), - 0 - ); - assertEquals(-1, TopNOperator.compareRows(rows.v1(), rows.v2())); - rows = nonBytesRefRows( - randomBoolean(), - false, - so -> rowFactory.row(page, 0, null, so), - so -> nullRowFactory.row(nullPage, 0, null, so), - 0 - ); - assertEquals(1, TopNOperator.compareRows(rows.v1(), rows.v2())); - rows = nonBytesRefRows( - randomBoolean(), - true, - so -> rowFactory.row(page, 0, null, so), - so -> nullRowFactory.row(nullPage, 0, null, so), - 0 - ); - assertEquals(1, TopNOperator.compareRows(rows.v2(), rows.v1())); - rows = nonBytesRefRows( - randomBoolean(), - false, - so -> rowFactory.row(page, 0, null, so), - so -> nullRowFactory.row(nullPage, 0, null, so), - 0 + testCompare( + new Page(new Block[] { BooleanBlock.newBlockBuilder(2).appendBoolean(false).appendBoolean(true).build() }), + BOOLEAN, + DEFAULT_SORTABLE ); - assertEquals(-1, TopNOperator.compareRows(rows.v2(), rows.v1())); - - for (int i = 0; i < bs.length - 1; i++) { - rows = nonBytesRefRows( - true, - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - so -> rowFactory.row(page, 1, null, so), - 0 - ); - assertEquals(1, TopNOperator.compareRows(rows.v1(), rows.v2())); - rows = nonBytesRefRows( - true, - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - so -> rowFactory.row(page, 1, null, so), - 0 - ); - assertEquals(-1, TopNOperator.compareRows(rows.v2(), rows.v1())); - rows = nonBytesRefRows( - false, - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - so -> rowFactory.row(page, 1, null, so), - 0 - ); - assertEquals(-1, TopNOperator.compareRows(rows.v1(), rows.v2())); - rows = nonBytesRefRows( - false, - randomBoolean(), - so -> rowFactory.row(page, 0, null, so), - so -> rowFactory.row(page, 1, null, so), - 0 - ); - assertEquals(1, TopNOperator.compareRows(rows.v2(), rows.v1())); - } } - public void testCompareWithNulls() { - Block i1 = IntBlock.newBlockBuilder(2).appendInt(100).appendNull().build(); + private void testCompare(Page page, ElementType elementType, TopNEncoder encoder) { + Block nullBlock = Block.constantNullBlock(1); + Page nullPage = new Page(new Block[] { nullBlock, nullBlock, nullBlock, nullBlock, nullBlock }); - Page page = new Page(i1); - TopNOperator.RowFactory rowFactory = new TopNOperator.RowFactory(page); + for (int b = 0; b < page.getBlockCount(); b++) { + // Non-null identity + for (int p = 0; p < page.getPositionCount(); p++) { + TopNOperator.Row row = row(elementType, encoder, b, randomBoolean(), randomBoolean(), page, p); + assertEquals(0, TopNOperator.compareRows(row, row)); + } - Tuple rows = nonBytesRefRows( - randomBoolean(), - true, - so -> rowFactory.row(page, 0, null, so), - so -> rowFactory.row(page, 1, null, so), - 0 - ); - assertEquals(-1, TopNOperator.compareRows(rows.v1(), rows.v2())); - rows = nonBytesRefRows(randomBoolean(), true, so -> rowFactory.row(page, 0, null, so), so -> rowFactory.row(page, 1, null, so), 0); - assertEquals(1, TopNOperator.compareRows(rows.v2(), rows.v1())); - rows = nonBytesRefRows(randomBoolean(), false, so -> rowFactory.row(page, 0, null, so), so -> rowFactory.row(page, 1, null, so), 0); - assertEquals(1, TopNOperator.compareRows(rows.v1(), rows.v2())); - rows = nonBytesRefRows(randomBoolean(), false, so -> rowFactory.row(page, 0, null, so), so -> rowFactory.row(page, 1, null, so), 0); - assertEquals(-1, TopNOperator.compareRows(rows.v2(), rows.v1())); + // Null identity + for (int p = 0; p < page.getPositionCount(); p++) { + TopNOperator.Row row = row(elementType, encoder, b, randomBoolean(), randomBoolean(), nullPage, p); + assertEquals(0, TopNOperator.compareRows(row, row)); + } + + // nulls first + for (int p = 0; p < page.getPositionCount(); p++) { + boolean asc = randomBoolean(); + TopNOperator.Row nonNullRow = row(elementType, encoder, b, asc, true, page, p); + TopNOperator.Row nullRow = row(elementType, encoder, b, asc, true, nullPage, p); + assertEquals(-1, TopNOperator.compareRows(nonNullRow, nullRow)); + assertEquals(1, TopNOperator.compareRows(nullRow, nonNullRow)); + } + + // nulls last + for (int p = 0; p < page.getPositionCount(); p++) { + boolean asc = randomBoolean(); + TopNOperator.Row nonNullRow = row(elementType, encoder, b, asc, false, page, p); + TopNOperator.Row nullRow = row(elementType, encoder, b, asc, false, nullPage, p); + assertEquals(1, TopNOperator.compareRows(nonNullRow, nullRow)); + assertEquals(-1, TopNOperator.compareRows(nullRow, nonNullRow)); + } + + // ascending + { + boolean nullsFirst = randomBoolean(); + TopNOperator.Row r1 = row(elementType, encoder, b, true, nullsFirst, page, 0); + TopNOperator.Row r2 = row(elementType, encoder, b, true, nullsFirst, page, 1); + assertThat(TopNOperator.compareRows(r1, r2), greaterThan(0)); + assertThat(TopNOperator.compareRows(r2, r1), lessThan(0)); + } + // descending + { + boolean nullsFirst = randomBoolean(); + TopNOperator.Row r1 = row(elementType, encoder, b, false, nullsFirst, page, 0); + TopNOperator.Row r2 = row(elementType, encoder, b, false, nullsFirst, page, 1); + assertThat(TopNOperator.compareRows(r1, r2), lessThan(0)); + assertThat(TopNOperator.compareRows(r2, r1), greaterThan(0)); + } + } } - private List topN(List inputValues, int limit, boolean ascendingOrder, boolean nullsFirst) { - return topNTwoColumns( - inputValues.stream().map(v -> tuple(v, 0L)).toList(), - limit, - List.of(new TopNOperator.SortOrder(0, ascendingOrder, nullsFirst)) - ).stream().map(Tuple::v1).toList(); + private TopNOperator.Row row( + ElementType elementType, + TopNEncoder encoder, + int channel, + boolean asc, + boolean nullsFirst, + Page page, + int position + ) { + TopNOperator.RowFactory rf = new TopNOperator.RowFactory( + IntStream.range(0, page.getBlockCount()).mapToObj(i -> elementType).toList(), + IntStream.range(0, page.getBlockCount()).mapToObj(i -> encoder).toList(), + List.of(new TopNOperator.SortOrder(channel, asc, nullsFirst)), + page + ); + return rf.row(position, null); } public void testTopNTwoColumns() { List> values = Arrays.asList(tuple(1L, 1L), tuple(1L, 2L), tuple(null, null), tuple(null, 1L), tuple(1L, null)); assertThat( - topNTwoColumns(values, 5, List.of(new TopNOperator.SortOrder(0, true, false), new TopNOperator.SortOrder(1, true, false))), + topNTwoColumns( + values, + 5, + List.of(LONG, LONG), + List.of(TopNEncoder.DEFAULT_SORTABLE, TopNEncoder.DEFAULT_SORTABLE), + List.of(new TopNOperator.SortOrder(0, true, false), new TopNOperator.SortOrder(1, true, false)) + ), equalTo(List.of(tuple(1L, 1L), tuple(1L, 2L), tuple(1L, null), tuple(null, 1L), tuple(null, null))) ); assertThat( - topNTwoColumns(values, 5, List.of(new TopNOperator.SortOrder(0, true, true), new TopNOperator.SortOrder(1, true, false))), + topNTwoColumns( + values, + 5, + List.of(LONG, LONG), + List.of(TopNEncoder.DEFAULT_SORTABLE, TopNEncoder.DEFAULT_SORTABLE), + List.of(new TopNOperator.SortOrder(0, true, true), new TopNOperator.SortOrder(1, true, false)) + ), equalTo(List.of(tuple(null, 1L), tuple(null, null), tuple(1L, 1L), tuple(1L, 2L), tuple(1L, null))) ); assertThat( - topNTwoColumns(values, 5, List.of(new TopNOperator.SortOrder(0, true, false), new TopNOperator.SortOrder(1, true, true))), + topNTwoColumns( + values, + 5, + List.of(LONG, LONG), + List.of(TopNEncoder.DEFAULT_SORTABLE, TopNEncoder.DEFAULT_SORTABLE), + List.of(new TopNOperator.SortOrder(0, true, false), new TopNOperator.SortOrder(1, true, true)) + ), equalTo(List.of(tuple(1L, null), tuple(1L, 1L), tuple(1L, 2L), tuple(null, null), tuple(null, 1L))) ); } @@ -499,10 +424,19 @@ public void testCollectAllValues() { expectedTop.add(topKeys); blocks.add(keys); + List elementTypes = new ArrayList<>(); + List encoders = new ArrayList<>(); + + // Add the keys + elementTypes.add(INT); + encoders.add(DEFAULT_SORTABLE); + for (ElementType e : ElementType.values()) { if (e == ElementType.UNKNOWN) { continue; } + elementTypes.add(e); + encoders.add(e == BYTES_REF ? UTF8 : DEFAULT_UNSORTABLE); List eTop = new ArrayList<>(); Block.Builder builder = e.newBlockBuilder(size); for (int i = 0; i < size; i++) { @@ -526,7 +460,9 @@ public void testCollectAllValues() { List.of( new TopNOperator( topCount, - List.of(new TopNOperator.SortOrder(0, false, false, BYTESREF_UTF8_ENCODER)), + elementTypes, + encoders, + List.of(new TopNOperator.SortOrder(0, false, false)), randomPageSize() ) ), @@ -554,11 +490,20 @@ public void testCollectAllValues_RandomMultiValues() { expectedTop.add(topKeys); blocks.add(keys); + List elementTypes = new ArrayList<>(blocksCount); + List encoders = new ArrayList<>(blocksCount); + + // Add the keys + elementTypes.add(INT); + encoders.add(DEFAULT_UNSORTABLE); + for (int type = 0; type < blocksCount; type++) { ElementType e = randomFrom(ElementType.values()); if (e == ElementType.UNKNOWN) { continue; } + elementTypes.add(e); + encoders.add(e == BYTES_REF ? UTF8 : DEFAULT_SORTABLE); List eTop = new ArrayList<>(); Block.Builder builder = e.newBlockBuilder(rows); for (int i = 0; i < rows; i++) { @@ -600,7 +545,9 @@ public void testCollectAllValues_RandomMultiValues() { List.of( new TopNOperator( topCount, - List.of(new TopNOperator.SortOrder(0, false, false, BYTESREF_UTF8_ENCODER)), + elementTypes, + encoders, + List.of(new TopNOperator.SortOrder(0, false, false)), randomPageSize() ) ), @@ -618,6 +565,8 @@ public void testCollectAllValues_RandomMultiValues() { private List> topNTwoColumns( List> inputValues, int limit, + List elementTypes, + List encoder, List sortOrders ) { DriverContext driverContext = new DriverContext(); @@ -626,7 +575,7 @@ private List> topNTwoColumns( Driver driver = new Driver( driverContext, new TupleBlockSourceOperator(inputValues, randomIntBetween(1, 1000)), - List.of(new TopNOperator(limit, sortOrders, randomPageSize())), + List.of(new TopNOperator(limit, elementTypes, encoder, sortOrders, randomPageSize())), new PageConsumerOperator(page -> { LongBlock block1 = page.getBlock(0); LongBlock block2 = page.getBlock(1); @@ -645,21 +594,25 @@ private List> topNTwoColumns( } public void testTopNManyDescriptionAndToString() { + int fixedLength = between(1, 100); TopNOperator.TopNOperatorFactory factory = new TopNOperator.TopNOperatorFactory( 10, - List.of( - new TopNOperator.SortOrder(1, false, false, BYTESREF_UTF8_ENCODER), - new TopNOperator.SortOrder(3, false, true, BYTESREF_FIXED_LENGTH_ENCODER) - ), + List.of(BYTES_REF, BYTES_REF), + List.of(UTF8, new FixedLengthTopNEncoder(fixedLength)), + List.of(new TopNOperator.SortOrder(1, false, false), new TopNOperator.SortOrder(3, false, true)), randomPageSize() ); - String sorts = List.of( - "SortOrder[channel=1, asc=false, nullsFirst=false, encoder=UTF8TopNEncoder]", - "SortOrder[channel=3, asc=false, nullsFirst=true, encoder=FixedLengthTopNEncoder]" - ).stream().collect(Collectors.joining(", ")); - assertThat(factory.describe(), equalTo("TopNOperator[count = 10, sortOrders = [" + sorts + "]]")); + String sorts = List.of("SortOrder[channel=1, asc=false, nullsFirst=false]", "SortOrder[channel=3, asc=false, nullsFirst=true]") + .stream() + .collect(Collectors.joining(", ")); + String tail = ", elementTypes=[BYTES_REF, BYTES_REF], encoders=[UTF8TopNEncoder, FixedLengthTopNEncoder[" + + fixedLength + + "]], sortOrders=[" + + sorts + + "]]"; + assertThat(factory.describe(), equalTo("TopNOperator[count=10" + tail)); try (Operator operator = factory.get(new DriverContext())) { - assertThat(operator.toString(), equalTo("TopNOperator[count = 0/10, sortOrders = [" + sorts + "]]")); + assertThat(operator.toString(), equalTo("TopNOperator[count=0/10" + tail)); } } @@ -705,6 +658,7 @@ public void testTopNWithSortingOnSameField_DESC_then_ASC_int() { INT_MV, List.of(100, List.of(-1, 63, 2), List.of(63, 61, 62), 50, List.of(22, 21, 22)), INT, + DEFAULT_SORTABLE, new TopNOperator.SortOrder(0, false, false), new TopNOperator.SortOrder(0, true, false) ); @@ -728,6 +682,7 @@ public void testTopNWithSortingOnSameField_DESC_then_ASC_long() { LONG_MV, expectedValues, LONG, + DEFAULT_SORTABLE, new TopNOperator.SortOrder(0, false, false), new TopNOperator.SortOrder(0, true, false) ); @@ -751,6 +706,7 @@ public void testTopNWithSortingOnSameField_DESC_then_ASC_double() { DOUBLE_MV, expectedValues, DOUBLE, + DEFAULT_SORTABLE, new TopNOperator.SortOrder(0, false, false), new TopNOperator.SortOrder(0, true, false) ); @@ -761,6 +717,7 @@ public void testTopNWithSortingOnSameField_DESC_then_ASC_boolean() { BOOL_MV, List.of(List.of(true, false), List.of(true, false), true, List.of(true, true, true), List.of(false, false, false), false), BOOLEAN, + DEFAULT_SORTABLE, new TopNOperator.SortOrder(0, false, false), new TopNOperator.SortOrder(0, true, false) ); @@ -777,8 +734,9 @@ public void testTopNWithSortingOnSameField_DESC_then_ASC_BytesRef() { new BytesRef("100") ), BYTES_REF, - new TopNOperator.SortOrder(0, false, false, BYTESREF_UTF8_ENCODER), - new TopNOperator.SortOrder(0, true, false, BYTESREF_UTF8_ENCODER) + UTF8, + new TopNOperator.SortOrder(0, false, false), + new TopNOperator.SortOrder(0, true, false) ); } @@ -787,6 +745,7 @@ public void testTopNWithSortingOnSameField_ASC_then_DESC_int() { INT_MV, List.of(List.of(-1, 63, 2), List.of(22, 21, 22), 50, List.of(63, 61, 62), 100), INT, + DEFAULT_SORTABLE, new TopNOperator.SortOrder(0, true, false), new TopNOperator.SortOrder(0, false, false) ); @@ -810,6 +769,7 @@ public void testTopNWithSortingOnSameField_ASC_then_DESC_long() { LONG_MV, expectedValues, LONG, + DEFAULT_SORTABLE, new TopNOperator.SortOrder(0, true, false), new TopNOperator.SortOrder(0, false, false) ); @@ -833,6 +793,7 @@ public void testTopNWithSortingOnSameField_ASC_then_DESC_double() { DOUBLE_MV, expectedValues, DOUBLE, + DEFAULT_SORTABLE, new TopNOperator.SortOrder(0, true, false), new TopNOperator.SortOrder(0, false, false) ); @@ -849,8 +810,9 @@ public void testTopNWithSortingOnSameField_ASC_then_DESC_BytesRef() { List.of(new BytesRef("63"), new BytesRef("61"), new BytesRef("62")) ), BYTES_REF, - new TopNOperator.SortOrder(0, true, false, BYTESREF_UTF8_ENCODER), - new TopNOperator.SortOrder(0, false, false, BYTESREF_UTF8_ENCODER) + UTF8, + new TopNOperator.SortOrder(0, true, false), + new TopNOperator.SortOrder(0, false, false) ); } @@ -858,6 +820,7 @@ private void assertSortingOnMV( List> values, List expectedValues, ElementType blockType, + TopNEncoder encoder, TopNOperator.SortOrder... sortOrders ) { Block block = TestBlockBuilder.blockFromValues(values, blockType); @@ -870,7 +833,7 @@ private void assertSortingOnMV( Driver driver = new Driver( new DriverContext(), new CannedSourceOperator(List.of(page).iterator()), - List.of(new TopNOperator(topCount, List.of(sortOrders), randomPageSize())), + List.of(new TopNOperator(topCount, List.of(blockType), List.of(encoder), List.of(sortOrders), randomPageSize())), new PageConsumerOperator(p -> readInto(actualValues, p)), () -> {} ) @@ -889,7 +852,8 @@ public void testRandomMultiValuesTopN() { Set uniqueOrders = new LinkedHashSet<>(sortingByColumns); List>> expectedValues = new ArrayList<>(rows); List blocks = new ArrayList<>(blocksCount); - Map columnBytesRefEncoder = new HashMap<>(blocksCount); + List elementTypes = new ArrayList<>(blocksCount); + List encoders = new ArrayList<>(blocksCount); for (int i = 0; i < rows; i++) { expectedValues.add(new ArrayList<>(blocksCount)); @@ -900,6 +864,7 @@ public void testRandomMultiValuesTopN() { t -> t == ElementType.UNKNOWN || t == ElementType.DOC, () -> randomFrom(ElementType.values()) ); + elementTypes.add(e); Block.Builder builder = e.newBlockBuilder(rows); List previousValue = null; Function randomValueSupplier = (blockType) -> randomValue(blockType); @@ -908,17 +873,19 @@ public void testRandomMultiValuesTopN() { if (randomBoolean()) { // deal with IP fields (BytesRef block) like ES does and properly encode the ip addresses randomValueSupplier = (blockType) -> new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean()))); + // use the right BytesRef encoder (don't touch the bytes) + encoders.add(TopNEncoder.IP); } else { // create a valid Version randomValueSupplier = (blockType) -> randomVersion().toBytesRef(); + // use the right BytesRef encoder (don't touch the bytes) + encoders.add(TopNEncoder.VERSION); } - // use the right BytesRef encoder (don't touch the bytes) - columnBytesRefEncoder.put(type, BYTESREF_FIXED_LENGTH_ENCODER); } else { - columnBytesRefEncoder.put(type, BYTESREF_UTF8_ENCODER); + encoders.add(UTF8); } } else { - columnBytesRefEncoder.put(type, DEFAULT_ENCODER); + encoders.add(DEFAULT_SORTABLE); } for (int i = 0; i < rows; i++) { @@ -935,7 +902,8 @@ public void testRandomMultiValuesTopN() { values.add(value); } } else {// null or single-valued value - values.add(randomValueSupplier.apply(e)); + Object value = randomValueSupplier.apply(e); + values.add(value); } if (usually() && randomBoolean()) { @@ -963,12 +931,12 @@ public void testRandomMultiValuesTopN() { // same "nulls" handling) while (uniqueOrders.size() < sortingByColumns) { int column = randomIntBetween(0, blocksCount - 1); - uniqueOrders.add(new TopNOperator.SortOrder(column, randomBoolean(), randomBoolean(), columnBytesRefEncoder.get(column))); + uniqueOrders.add(new TopNOperator.SortOrder(column, randomBoolean(), randomBoolean())); } List>> actualValues = new ArrayList<>(); List results = this.drive( - new TopNOperator(topCount, uniqueOrders.stream().toList(), rows), + new TopNOperator(topCount, elementTypes, encoders, uniqueOrders.stream().toList(), rows), List.of(new Page(blocks.toArray(Block[]::new))).iterator() ); for (Page p : results) { @@ -982,11 +950,6 @@ public void testRandomMultiValuesTopN() { List> actualReducedValues = extractAndReduceSortedValues(actualValues, uniqueOrders); List> expectedReducedValues = extractAndReduceSortedValues(topNExpectedValues, uniqueOrders); - assertThat(actualReducedValues.size(), equalTo(topNExpectedValues.size())); - assertThat(expectedReducedValues.size(), equalTo(topNExpectedValues.size())); - for (int i = 0; i < topNExpectedValues.size(); i++) { - assertThat(topNExpectedValues.get(i).size(), equalTo(actualValues.get(i).size())); - } assertMap(actualReducedValues, matchesList(expectedReducedValues)); } @@ -999,15 +962,20 @@ public void testIPSortingSingleValue() throws UnknownHostException { append(builder, new BytesRef(InetAddressPoint.encode(InetAddress.getByName(ip)))); } - Set orders = new HashSet<>(1); - orders.add(new TopNOperator.SortOrder(0, asc, randomBoolean(), BYTESREF_FIXED_LENGTH_ENCODER)); - List> actual = new ArrayList<>(); try ( Driver driver = new Driver( new DriverContext(), new CannedSourceOperator(List.of(new Page(builder.build())).iterator()), - List.of(new TopNOperator(ips.size(), orders.stream().toList(), randomPageSize())), + List.of( + new TopNOperator( + ips.size(), + List.of(BYTES_REF), + List.of(TopNEncoder.IP), + List.of(new TopNOperator.SortOrder(0, asc, randomBoolean())), + randomPageSize() + ) + ), new PageConsumerOperator(p -> readInto(actual, p)), () -> {} ) @@ -1117,15 +1085,20 @@ private void assertIPSortingOnMultiValues( } } - Set orders = new HashSet<>(1); - orders.add(new TopNOperator.SortOrder(0, asc, nullsFirst, BYTESREF_FIXED_LENGTH_ENCODER)); - List> actual = new ArrayList<>(); try ( Driver driver = new Driver( new DriverContext(), new CannedSourceOperator(List.of(new Page(builder.build())).iterator()), - List.of(new TopNOperator(ips.size(), orders.stream().toList(), randomPageSize())), + List.of( + new TopNOperator( + ips.size(), + List.of(BYTES_REF), + List.of(TopNEncoder.IP), + List.of(new TopNOperator.SortOrder(0, asc, nullsFirst)), + randomPageSize() + ) + ), new PageConsumerOperator(p -> readInto(actual, p)), () -> {} ) @@ -1192,16 +1165,24 @@ public void testZeroByte() { List blocks = new ArrayList<>(2); blocks.add(builderText.build()); blocks.add(builderInt.build()); - Set orders = new HashSet<>(2); - orders.add(new TopNOperator.SortOrder(0, true, randomBoolean(), BYTESREF_UTF8_ENCODER)); - orders.add(new TopNOperator.SortOrder(1, randomBoolean(), randomBoolean(), DEFAULT_ENCODER)); List> actual = new ArrayList<>(); try ( Driver driver = new Driver( new DriverContext(), new CannedSourceOperator(List.of(new Page(blocks.toArray(Block[]::new))).iterator()), - List.of(new TopNOperator(2, orders.stream().toList(), randomPageSize())), + List.of( + new TopNOperator( + 2, + List.of(BYTES_REF, INT), + List.of(TopNEncoder.UTF8, DEFAULT_UNSORTABLE), + List.of( + new TopNOperator.SortOrder(0, true, randomBoolean()), + new TopNOperator.SortOrder(1, randomBoolean(), randomBoolean()) + ), + randomPageSize() + ) + ), new PageConsumerOperator(p -> readInto(actual, p)), () -> {} ) diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNRowTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNRowTests.java new file mode 100644 index 0000000000000..be65cda3cce70 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNRowTests.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.tests.util.RamUsageTester; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class TopNRowTests extends ESTestCase { + public void testRamBytesUsedEmpty() { + TopNOperator.Row row = new TopNOperator.Row(); + // We double count the shared empty array for empty rows. This overcounting is *fine*, but throws off the test. + assertThat(row.ramBytesUsed(), equalTo(RamUsageTester.ramUsed(row) + RamUsageTester.ramUsed(new byte[0]))); + } + + public void testRamBytesUsedSmall() { + TopNOperator.Row row = new TopNOperator.Row(); + row.keys.append(randomByte()); + row.values.append(randomByte()); + assertThat(row.ramBytesUsed(), equalTo(RamUsageTester.ramUsed(row))); + } + + public void testRamBytesUsedBig() { + TopNOperator.Row row = new TopNOperator.Row(); + for (int i = 0; i < 10000; i++) { + row.keys.append(randomByte()); + row.values.append(randomByte()); + } + assertThat(row.ramBytesUsed(), equalTo(RamUsageTester.ramUsed(row))); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index 74a6b561fb9b4..eb7f42f32a08f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -33,13 +33,13 @@ import org.elasticsearch.compute.operator.SourceOperator; import org.elasticsearch.compute.operator.SourceOperator.SourceOperatorFactory; import org.elasticsearch.compute.operator.StringExtractOperator; -import org.elasticsearch.compute.operator.TopNEncoder; -import org.elasticsearch.compute.operator.TopNOperator; -import org.elasticsearch.compute.operator.TopNOperator.TopNOperatorFactory; import org.elasticsearch.compute.operator.exchange.ExchangeSinkHandler; import org.elasticsearch.compute.operator.exchange.ExchangeSinkOperator.ExchangeSinkOperatorFactory; import org.elasticsearch.compute.operator.exchange.ExchangeSourceHandler; import org.elasticsearch.compute.operator.exchange.ExchangeSourceOperator.ExchangeSourceOperatorFactory; +import org.elasticsearch.compute.operator.topn.TopNEncoder; +import org.elasticsearch.compute.operator.topn.TopNOperator; +import org.elasticsearch.compute.operator.topn.TopNOperator.TopNOperatorFactory; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.tasks.CancellableTask; @@ -82,6 +82,7 @@ import org.elasticsearch.xpack.ql.util.Holder; import java.util.ArrayList; +import java.util.Arrays; import java.util.BitSet; import java.util.HashMap; import java.util.HashSet; @@ -256,6 +257,9 @@ public static ElementType toElementType(DataType dataType) { if (dataType == DataTypes.BOOLEAN) { return ElementType.BOOLEAN; } + if (dataType == EsQueryExec.DOC_DATA_TYPE) { + return ElementType.DOC; + } throw EsqlIllegalArgumentException.illegalDataType(dataType); } @@ -327,6 +331,29 @@ private PhysicalOperation planExchangeSource(ExchangeSourceExec exchangeSource, private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerContext context) { PhysicalOperation source = plan(topNExec.child(), context); + ElementType[] elementTypes = new ElementType[source.layout.numberOfChannels()]; + TopNEncoder[] encoders = new TopNEncoder[source.layout.numberOfChannels()]; + if (source.layout.numberOfChannels() != topNExec.child().output().size()) { + for (var l : source.layout.internalLayout().entrySet()) { + System.err.println("l: " + l.getKey()); + } + for (var o : topNExec.child().output()) { + System.err.println("o: " + o.id() + " " + o.name()); + } + System.err.println("ASDFDSAF"); + } + for (Attribute a : topNExec.child().output()) { + int channel = source.layout.getChannel(a.id()); + elementTypes[channel] = toElementType(a.dataType()); + encoders[channel] = switch (a.dataType().typeName()) { + case "ip" -> TopNEncoder.IP; + case "text", "keyword" -> TopNEncoder.UTF8; + case "version" -> TopNEncoder.VERSION; + case "boolean", "null", "byte", "short", "integer", "long", "double", "float", "half_float", "datetime", "date_period", + "time_duration", "object", "nested", "scaled_float", "unsigned_long", "_doc" -> TopNEncoder.DEFAULT_SORTABLE; + default -> throw new EsqlIllegalArgumentException("No TopN sorting encoder for type " + a.dataType().typeName()); + }; + } List orders = topNExec.order().stream().map(order -> { int sortByChannel; if (order.child() instanceof Attribute a) { @@ -335,28 +362,10 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte throw new EsqlIllegalArgumentException("order by expression must be an attribute"); } - TopNEncoder encoder = switch (a.dataType().typeName()) { - case "ip": { - yield TopNOperator.BYTESREF_FIXED_LENGTH_ENCODER; - } - case "text", "keyword": { - yield TopNOperator.BYTESREF_UTF8_ENCODER; - } - case "version": { - yield TopNOperator.BYTESREF_FIXED_LENGTH_ENCODER; - } - case "boolean", "null", "byte", "short", "integer", "long", "double", "float", "half_float", "datetime", "date_period", - "time_duration", "object", "nested", "scaled_float", "unsigned_long": { - yield TopNOperator.DEFAULT_ENCODER; - } - default: - throw new EsqlIllegalArgumentException("No TopN sorting encoder for type " + a.dataType().typeName()); - }; return new TopNOperator.SortOrder( sortByChannel, order.direction().equals(Order.OrderDirection.ASC), - order.nullsPosition().equals(Order.NullsPosition.FIRST), - encoder + order.nullsPosition().equals(Order.NullsPosition.FIRST) ); }).toList(); @@ -376,7 +385,16 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte * That'll be more accurate. And we don't have a path for estimating * incoming rows. And we don't need one because we can estimate. */ - return source.with(new TopNOperatorFactory(limit, orders, context.pageSize(2000 + topNExec.estimatedRowSize())), source.layout); + return source.with( + new TopNOperatorFactory( + limit, + Arrays.asList(elementTypes), + Arrays.asList(encoders), + orders, + context.pageSize(2000 + topNExec.estimatedRowSize()) + ), + source.layout + ); } private PhysicalOperation planEval(EvalExec eval, LocalExecutionPlannerContext context) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java index c088cae6f20c9..3ff682a905f95 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java @@ -13,8 +13,12 @@ import org.elasticsearch.compute.aggregation.GroupingAggregator; import org.elasticsearch.compute.aggregation.blockhash.BlockHash; import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.DocBlock; +import org.elasticsearch.compute.data.DocVector; import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntArrayVector; import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.HashAggregationOperator; @@ -33,6 +37,7 @@ import java.util.List; import java.util.Random; import java.util.function.Supplier; +import java.util.stream.IntStream; import static com.carrotsearch.randomizedtesting.generators.RandomNumbers.randomIntBetween; import static java.util.stream.Collectors.joining; @@ -96,16 +101,15 @@ public Page getOutput() { finish(); } - Block[] fakeSourceAttributesBlocks = new Block[1]; - // a block that contains the position of each document as int - // will be used to "filter" and extract the block's values later on. Basically, a replacement for _doc, _shard and _segment ids - IntBlock.Builder docIndexBlockBuilder = IntBlock.newBlockBuilder(testData.getPositionCount()); - for (int i = 0; i < testData.getPositionCount(); i++) { - docIndexBlockBuilder.appendInt(i); - } - fakeSourceAttributesBlocks[0] = docIndexBlockBuilder.build(); // instead of _doc - Page newPageWithSourceAttributes = new Page(fakeSourceAttributesBlocks); - return newPageWithSourceAttributes; + return new Page( + new Block[] { + new DocVector( + IntBlock.newConstantBlockWith(0, testData.getPositionCount()).asVector(), + IntBlock.newConstantBlockWith(0, testData.getPositionCount()).asVector(), + new IntArrayVector(IntStream.range(0, testData.getPositionCount()).toArray(), testData.getPositionCount()), + true + ).asBlock() } + ); } @Override @@ -286,13 +290,12 @@ private Block extractBlockForColumn(Page page, String columnName) { if (columnIndex < 0) { throw new EsqlIllegalArgumentException("Cannot find column named [{}] in {}", columnName, columnNames); } - // this is the first block added by TestSourceOperator - IntBlock docIndexBlock = page.getBlock(0); - // use its filtered position to extract the data needed for "columnName" block + DocBlock docBlock = page.getBlock(0); + IntVector docIndices = docBlock.asVector().docs(); Block loadedBlock = testData.getBlock(columnIndex); - int[] filteredPositions = new int[docIndexBlock.getPositionCount()]; - for (int c = 0; c < docIndexBlock.getPositionCount(); c++) { - filteredPositions[c] = (Integer) docIndexBlock.getInt(c); + int[] filteredPositions = new int[docIndices.getPositionCount()]; + for (int c = 0; c < docIndices.getPositionCount(); c++) { + filteredPositions[c] = docIndices.getInt(c); } return loadedBlock.filter(filteredPositions); }