diff --git a/server/src/main/java/org/opensearch/common/util/CollectionUtils.java b/server/src/main/java/org/opensearch/common/util/CollectionUtils.java index d8a86f4878f58..6452d7061fdfa 100644 --- a/server/src/main/java/org/opensearch/common/util/CollectionUtils.java +++ b/server/src/main/java/org/opensearch/common/util/CollectionUtils.java @@ -32,12 +32,10 @@ package org.opensearch.common.util; -import com.carrotsearch.hppc.ObjectArrayList; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefArray; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.InPlaceMergeSorter; -import org.apache.lucene.util.IntroSorter; import org.opensearch.common.Strings; import org.opensearch.common.collect.Iterators; @@ -50,7 +48,9 @@ import java.util.Comparator; import java.util.HashMap; import java.util.IdentityHashMap; +import java.util.Iterator; import java.util.List; +import java.util.ListIterator; import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -95,59 +95,28 @@ public static <T> List<T> rotate(final List<T> list, int distance) { return new RotatedList<>(list, d); } - public static void sortAndDedup(final ObjectArrayList<byte[]> array) { - int len = array.size(); - if (len > 1) { - sort(array); - int uniqueCount = 1; - for (int i = 1; i < len; ++i) { - if (!Arrays.equals(array.get(i), array.get(i - 1))) { - array.set(uniqueCount++, array.get(i)); - } - } - array.elementsCount = uniqueCount; + /** + * in place de-duplicates items in a list + */ + public static <T> void sortAndDedup(final List<T> array, Comparator<T> comparator) { + // base case: one item + if (array.size() <= 1) { + return; } - } - - public static void sort(final ObjectArrayList<byte[]> array) { - new IntroSorter() { - - byte[] pivot; - - @Override - protected void swap(int i, int j) { - final byte[] tmp = array.get(i); - array.set(i, array.get(j)); - array.set(j, tmp); - } - - @Override - protected int compare(int i, int j) { - return compare(array.get(i), array.get(j)); - } - - @Override - protected void setPivot(int i) { - pivot = array.get(i); - } - - @Override - protected int comparePivot(int j) { - return compare(pivot, array.get(j)); + array.sort(comparator); + ListIterator<T> deduped = array.listIterator(); + T cmp = deduped.next(); // return the first item and advance + Iterator<T> oldArray = array.iterator(); + oldArray.next(); // advance to the old to the second item (advanced to third below) + + do { + T old = oldArray.next(); // get the next item and advance iter + if (comparator.compare(cmp, old) != 0 && (cmp = deduped.next()) != old) { + deduped.set(old); } - - private int compare(byte[] left, byte[] right) { - for (int i = 0, j = 0; i < left.length && j < right.length; i++, j++) { - int a = left[i] & 0xFF; - int b = right[j] & 0xFF; - if (a != b) { - return a - b; - } - } - return left.length - right.length; - } - - }.sort(0, array.size()); + } while (oldArray.hasNext()); + // in place update + array.subList(deduped.nextIndex(), array.size()).clear(); } public static int[] toArray(Collection<Integer> ints) { diff --git a/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java index 6965f3bf664ed..9c60641e61258 100644 --- a/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java @@ -32,7 +32,6 @@ package org.opensearch.index.mapper; -import com.carrotsearch.hppc.ObjectArrayList; import org.apache.lucene.document.StoredField; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -52,6 +51,7 @@ import java.io.IOException; import java.time.ZoneId; +import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; import java.util.Collections; @@ -240,30 +240,28 @@ protected String contentType() { */ public static class CustomBinaryDocValuesField extends CustomDocValuesField { - private final ObjectArrayList<byte[]> bytesList; - - private int totalSize = 0; + private final ArrayList<byte[]> bytesList; public CustomBinaryDocValuesField(String name, byte[] bytes) { super(name); - bytesList = new ObjectArrayList<>(); + bytesList = new ArrayList<>(); add(bytes); } public void add(byte[] bytes) { bytesList.add(bytes); - totalSize += bytes.length; } @Override public BytesRef binaryValue() { try { - CollectionUtils.sortAndDedup(bytesList); - int size = bytesList.size(); - BytesStreamOutput out = new BytesStreamOutput(totalSize + (size + 1) * 5); - out.writeVInt(size); // write total number of values - for (int i = 0; i < size; i++) { - final byte[] value = bytesList.get(i); + // sort and dedup in place + CollectionUtils.sortAndDedup(bytesList, Arrays::compareUnsigned); + int size = bytesList.stream().map(b -> b.length).reduce(0, Integer::sum); + int length = bytesList.size(); + BytesStreamOutput out = new BytesStreamOutput(size + (length + 1) * 5); + out.writeVInt(length); // write total number of values + for (byte[] value : bytesList) { int valueLength = value.length; out.writeVInt(valueLength); out.writeBytes(value, 0, valueLength); diff --git a/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java b/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java index 40b2706d314ce..c237bdeb5c5cf 100644 --- a/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java +++ b/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java @@ -41,9 +41,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.SortedSet; @@ -85,6 +87,32 @@ public void testRotate() { } } + private <T> void assertDeduped(List<T> array, Comparator<T> cmp, int expectedLength) { + // test the dedup w/ ArrayLists and LinkedLists + List<List<T>> types = List.of(new ArrayList<T>(array), new LinkedList<>(array)); + for (List<T> clone : types) { + // dedup the list + CollectionUtils.sortAndDedup(clone, cmp); + // verify unique elements + for (int i = 0; i < clone.size() - 1; ++i) { + assertNotEquals(cmp.compare(clone.get(i), clone.get(i + 1)), 0); + } + assertEquals(expectedLength, clone.size()); + } + } + + public void testSortAndDedup() { + // test no elements in a string array + assertDeduped(List.<String>of(), Comparator.naturalOrder(), 0); + // test no elements in an integer array + assertDeduped(List.<Integer>of(), Comparator.naturalOrder(), 0); + // test unsorted array + assertDeduped(List.of(-1, 0, 2, 1, -1, 19, -1), Comparator.naturalOrder(), 5); + // test sorted array + assertDeduped(List.of(-1, 0, 1, 2, 19, 19), Comparator.naturalOrder(), 5); + // test sorted + } + public void testSortAndDedupByteRefArray() { SortedSet<BytesRef> set = new TreeSet<>(); final int numValues = scaledRandomIntBetween(0, 10000);