From e7cf0270a4abfef01374c120cafba145a71694ed Mon Sep 17 00:00:00 2001 From: Sudipto Guha Date: Tue, 4 Jan 2022 08:41:47 -0800 Subject: [PATCH 1/4] upgrading pointstore --- .../java/com/amazon/randomcutforest/RCF3.java | 4 +- .../randomcutforest/RandomCutForest.java | 4 +- .../state/RandomCutForestMapper.java | 4 +- .../state/store/PointStoreDoubleMapper.java | 17 +- .../state/store/PointStoreFloatMapper.java | 25 +- .../state/store/PointStoreState.java | 4 +- .../tree/CompactRandomCutTreeFloatMapper.java | 4 +- .../store/IntervalManager.java | 16 + .../randomcutforest/store/PointStore.java | 748 ++++++++-------- .../store/PointStoreDouble.java | 806 ++++++++++++++++- .../store/PointStoreFloat.java | 165 ---- ...ntStoreLarge.java => PointStoreLarge.java} | 35 +- ...ntStoreSmall.java => PointStoreSmall.java} | 43 +- .../randomcutforest/store/RCF3PointStore.java | 810 ------------------ ...RandomCutForestShingledFunctionalTest.java | 8 +- .../state/RandomCutForestMapperTest.java | 6 +- .../store/PointStoreFloatMapperTest.java | 7 +- .../CompactRandomCutTreeFloatMapperTest.java | 5 +- .../store/PointStoreDoubleTest.java | 2 - ...toreFloatTest.java => PointStoreTest.java} | 19 +- .../tree/CompactRandomCutTreeFloatTest.java | 10 +- .../json/v1/V1JsonToV2StateConverter.java | 7 +- 22 files changed, 1284 insertions(+), 1465 deletions(-) delete mode 100644 Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreFloat.java rename Java/core/src/main/java/com/amazon/randomcutforest/store/{RCF3PointStoreLarge.java => PointStoreLarge.java} (61%) rename Java/core/src/main/java/com/amazon/randomcutforest/store/{RCF3PointStoreSmall.java => PointStoreSmall.java} (56%) delete mode 100644 Java/core/src/main/java/com/amazon/randomcutforest/store/RCF3PointStore.java rename Java/core/src/test/java/com/amazon/randomcutforest/store/{PointStoreFloatTest.java => PointStoreTest.java} (93%) diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/RCF3.java b/Java/core/src/main/java/com/amazon/randomcutforest/RCF3.java index 516c3037..60f90629 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/RCF3.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/RCF3.java @@ -60,7 +60,7 @@ import com.amazon.randomcutforest.sampler.CompactSampler; import com.amazon.randomcutforest.sampler.IStreamSampler; import com.amazon.randomcutforest.store.IPointStore; -import com.amazon.randomcutforest.store.RCF3PointStore; +import com.amazon.randomcutforest.store.PointStore; import com.amazon.randomcutforest.tree.IBoundingBoxView; import com.amazon.randomcutforest.tree.ITree; import com.amazon.randomcutforest.tree.NewRandomCutTree; @@ -296,7 +296,7 @@ public RCF3(Builder builder) { } private void initCompactFloat(Builder builder) { - IPointStore tempStore = RCF3PointStore.builder().capacity(pointStoreCapacity).initialSize(2 * sampleSize) + IPointStore tempStore = PointStore.builder().capacity(pointStoreCapacity).initialSize(2 * sampleSize) .internalShinglingEnabled(internalShinglingEnabled).shingleSize(shingleSize).dimensions(dimensions) // .dynamicResizingEnabled(true) .internalRotationEnabled(builder.internalRotationEnabled).build(); diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/RandomCutForest.java b/Java/core/src/main/java/com/amazon/randomcutforest/RandomCutForest.java index 43391d3a..c7490f4d 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/RandomCutForest.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/RandomCutForest.java @@ -57,8 +57,8 @@ import com.amazon.randomcutforest.sampler.IStreamSampler; import com.amazon.randomcutforest.sampler.SimpleStreamSampler; import com.amazon.randomcutforest.store.IPointStore; +import com.amazon.randomcutforest.store.PointStore; import com.amazon.randomcutforest.store.PointStoreDouble; -import com.amazon.randomcutforest.store.PointStoreFloat; import com.amazon.randomcutforest.tree.CompactRandomCutTreeDouble; import com.amazon.randomcutforest.tree.CompactRandomCutTreeFloat; import com.amazon.randomcutforest.tree.ITree; @@ -323,7 +323,7 @@ private void initCompactDouble(Builder builder) { } private void initCompactFloat(Builder builder) { - PointStoreFloat tempStore = PointStoreFloat.builder().internalRotationEnabled(builder.internalRotationEnabled) + PointStore tempStore = PointStore.builder().internalRotationEnabled(builder.internalRotationEnabled) .capacity(pointStoreCapacity).initialSize(initialPointStoreSize) .directLocationEnabled(builder.directLocationMapEnabled) .internalShinglingEnabled(internalShinglingEnabled) diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/state/RandomCutForestMapper.java b/Java/core/src/main/java/com/amazon/randomcutforest/state/RandomCutForestMapper.java index f0cddf56..be1c59b4 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/state/RandomCutForestMapper.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/state/RandomCutForestMapper.java @@ -48,8 +48,8 @@ import com.amazon.randomcutforest.state.tree.CompactRandomCutTreeFloatMapper; import com.amazon.randomcutforest.state.tree.CompactRandomCutTreeState; import com.amazon.randomcutforest.store.IPointStore; +import com.amazon.randomcutforest.store.PointStore; import com.amazon.randomcutforest.store.PointStoreDouble; -import com.amazon.randomcutforest.store.PointStoreFloat; import com.amazon.randomcutforest.tree.CompactRandomCutTreeDouble; import com.amazon.randomcutforest.tree.CompactRandomCutTreeFloat; import com.amazon.randomcutforest.tree.ITree; @@ -163,7 +163,7 @@ public RandomCutForestState toState(RandomCutForest forest) { if (forest.getPrecision() == Precision.FLOAT_32) { PointStoreFloatMapper mapper = new PointStoreFloatMapper(); mapper.setCompressionEnabled(compressionEnabled); - pointStoreState = mapper.toState((PointStoreFloat) pointStoreCoordinator.getStore()); + pointStoreState = mapper.toState((PointStore) pointStoreCoordinator.getStore()); } else { PointStoreDoubleMapper mapper = new PointStoreDoubleMapper(); mapper.setCompressionEnabled(compressionEnabled); diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreDoubleMapper.java b/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreDoubleMapper.java index fb60bade..7c5f2368 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreDoubleMapper.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreDoubleMapper.java @@ -25,7 +25,6 @@ import com.amazon.randomcutforest.config.Precision; import com.amazon.randomcutforest.state.IStateMapper; -import com.amazon.randomcutforest.store.PointStore; import com.amazon.randomcutforest.store.PointStoreDouble; import com.amazon.randomcutforest.util.ArrayPacking; @@ -49,7 +48,7 @@ public PointStoreDouble toModel(PointStoreState state, long seed) { int startOfFreeSegment = state.getStartOfFreeSegment(); int[] refCount = ArrayPacking.unpackInts(state.getRefCount(), indexCapacity, state.isCompressed()); int[] locationList = new int[indexCapacity]; - Arrays.fill(locationList, PointStore.INFEASIBLE_POINTSTORE_LOCATION); + Arrays.fill(locationList, PointStoreDouble.INFEASIBLE_LOCN); int[] tempList = ArrayPacking.unpackInts(state.getLocationList(), state.isCompressed()); System.arraycopy(tempList, 0, locationList, 0, tempList.length); @@ -71,23 +70,25 @@ public PointStoreState toState(PointStoreDouble model) { state.setDimensions(model.getDimensions()); state.setCapacity(model.getCapacity()); state.setShingleSize(model.getShingleSize()); - state.setDirectLocationMap(model.isDirectLocationMap()); + state.setDirectLocationMap(false); state.setInternalShinglingEnabled(model.isInternalShinglingEnabled()); state.setLastTimeStamp(model.getNextSequenceIndex()); if (model.isInternalShinglingEnabled()) { state.setInternalShingle(model.getInternalShingle()); state.setRotationEnabled(model.isInternalRotationEnabled()); } - state.setDynamicResizingEnabled(model.isDynamicResizingEnabled()); - if (model.isDynamicResizingEnabled()) { + state.setDynamicResizingEnabled(true); + if (state.isDynamicResizingEnabled()) { state.setCurrentStoreCapacity(model.getCurrentStoreCapacity()); state.setIndexCapacity(model.getIndexCapacity()); } state.setStartOfFreeSegment(model.getStartOfFreeSegment()); state.setPrecision(Precision.FLOAT_64.name()); - int prefix = model.getValidPrefix(); - state.setRefCount(ArrayPacking.pack(model.getRefCount(), prefix, state.isCompressed())); - state.setLocationList(ArrayPacking.pack(model.getLocationList(), prefix, state.isCompressed())); + // int prefix = model.getValidPrefix(); + int[] refcount = model.getRefCount(); + state.setRefCount(ArrayPacking.pack(refcount, refcount.length, state.isCompressed())); + int[] locationList = model.getLocationList(); + state.setLocationList(ArrayPacking.pack(locationList, locationList.length, state.isCompressed())); state.setPointData(ArrayPacking.pack(model.getStore(), model.getStartOfFreeSegment())); return state; } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java b/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java index 23707195..b134896d 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java @@ -18,20 +18,17 @@ import static com.amazon.randomcutforest.CommonUtils.checkArgument; import static com.amazon.randomcutforest.CommonUtils.checkNotNull; -import java.util.Arrays; - import lombok.Getter; import lombok.Setter; import com.amazon.randomcutforest.config.Precision; import com.amazon.randomcutforest.state.IStateMapper; import com.amazon.randomcutforest.store.PointStore; -import com.amazon.randomcutforest.store.PointStoreFloat; import com.amazon.randomcutforest.util.ArrayPacking; @Getter @Setter -public class PointStoreFloatMapper implements IStateMapper { +public class PointStoreFloatMapper implements IStateMapper { /** * If true, then the arrays are compressed via simple data dependent scheme @@ -39,7 +36,7 @@ public class PointStoreFloatMapper implements IStateMapper stack, int capacity) { + checkArgument(capacity > 0, "incorrect parameters"); + lastInUse = stack.size(); + freeIndexesEnd = new int[lastInUse + 1]; + freeIndexesStart = new int[lastInUse + 1]; + this.capacity = capacity; + int count = 0; + while (stack.size() > 0) { + int[] interval = stack.pop(); + freeIndexesStart[count] = interval[0]; + freeIndexesEnd[count] = interval[1]; + ++count; + } + } + public void extendCapacity(int newCapacity) { checkArgument(newCapacity > capacity, " incorrect call, we can only increase capacity"); // the current capacity need not be the final capacity, for example in case of diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStore.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStore.java index 51286b2d..0c363e65 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStore.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStore.java @@ -17,40 +17,34 @@ import static com.amazon.randomcutforest.CommonUtils.checkArgument; import static com.amazon.randomcutforest.CommonUtils.checkState; +import static java.lang.Math.max; import java.util.Arrays; import java.util.BitSet; import java.util.HashMap; import java.util.Optional; +import java.util.Stack; +import java.util.Vector; -/** - * PointStore is a fixed size repository of points, where each point is a float - * array of a specified length. A PointStore counts references to points that - * are added, and frees space internally when a given point is no longer in use. - * The primary use of this store is to enable compression since the points in - * two different trees do not have to be stored separately. - * - * Stored points are referenced by index values which can be used to look up the - * point values and increment and decrement reference counts. Valid index values - * are between 0 (inclusive) and capacity (exclusive). - */ -public abstract class PointStore implements IPointStore { - - public static int INFEASIBLE_POINTSTORE_LOCATION = -2; +public abstract class PointStore implements IPointStore { public static int INFEASIBLE_POINTSTORE_INDEX = -1; /** * an index manager to manage free locations */ - protected IndexManager indexManager; + protected IntervalManager indexManager; /** * generic store class */ - protected Store store; + protected float[] store; /** * generic internal shingle, note that input is doubles */ protected double[] internalShingle; + /** + * enable rotation of shingles; use a cyclic buffer instead of sliding window + */ + boolean rotationEnabled; /** * last seen timestamp for internal shingling */ @@ -59,14 +53,16 @@ public abstract class PointStore implements IPointStore { * pointers to store locations, this decouples direct addressing and points can * be moved internally */ - protected int[] locationList; + // protected char[] locationList; /** * refCount[i] counts of the number of trees that are currently using the point * determined by locationList[i] or (for directLocationMapping) the point at * store[i * dimensions] */ - protected int[] refCount; + protected byte[] refCount; + + protected HashMap refCountMap; /** * first location where new data can be safely copied; */ @@ -87,11 +83,6 @@ public abstract class PointStore implements IPointStore { */ int baseDimension; - /** - * are the addresses mapped directly (saves space and runtime for shingleSize = - * 1) - */ - boolean directLocationMap; /** * maximum capacity */ @@ -100,18 +91,19 @@ public abstract class PointStore implements IPointStore { * current capacity of store (number of shingled points) */ int currentStoreCapacity; - /** - * ability to resize the store dynamically - */ - boolean dynamicResizingEnabled; + /** * enabling internal shingling */ boolean internalShinglingEnabled; - /** - * enable rotation of shingles; use a cyclic buffer instead of sliding window - */ - boolean rotationEnabled; + + abstract void setInfeasiblePointstoreLocationIndex(int index); + + abstract void extendLocationList(int newCapacity); + + abstract void setLocation(int index, int location); + + abstract int getLocation(int index); /** * Decrement the reference count for the given index. @@ -123,36 +115,28 @@ public abstract class PointStore implements IPointStore { */ @Override public int decrementRefCount(int index) { - indexManager.checkValidIndex(index); - if (refCount[index] == 1) { - indexManager.releaseIndex(index); - if (!directLocationMap) { - locationList[index] = PointStore.INFEASIBLE_POINTSTORE_LOCATION; + checkArgument(index >= 0 && index < locationListLength(), " index not supported by store"); + checkArgument((refCount[index] & 0xff) > 0, " cannot decrement index"); + Integer value = refCountMap.remove(index); + if (value == null) { + if ((refCount[index] & 0xff) == 1) { + indexManager.releaseIndex(index); + refCount[index] = (byte) 0; + setInfeasiblePointstoreLocationIndex(index); + return 0; + } else { + int newVal = (byte) ((refCount[index] & 0xff) - 1); + refCount[index] = (byte) newVal; + return newVal; + } + } else { + if (value > 1) { + refCountMap.put(index, value - 1); } + return value - 1 + (refCount[index] & 0xff); } - return --refCount[index]; } - /** - * the function checks if the provided shingled point aligns with the location - * - * @param location location in the store where the point is copied - * @param point the point to be added - * @return true/false for an alignment - */ - abstract boolean checkShingleAlignment(int location, double[] point); - - /** - * copy the point starting from its location src to the location in the store - * for desired length - * - * @param point input point - * @param src location of the point that is not in a previous shingle - * @param location location in the store - * @param length length to be copied - */ - abstract void copyPoint(double[] point, int src, int location, int length); - /** * takes an index from the index manager and rezises if necessary also adjusts * refCount size to have increment/decrement be seamless @@ -160,25 +144,21 @@ public int decrementRefCount(int index) { * @return an index from the index manager */ int takeIndex() { - if (indexManager.isFull()) { + if (indexManager.isEmpty()) { if (indexManager.getCapacity() < capacity) { int oldCapacity = indexManager.getCapacity(); - int newCapacity = Math.min(capacity, 2 * oldCapacity); - indexManager = new IndexManager(indexManager, newCapacity); + int newCapacity = Math.min(capacity, 1 + (int) Math.floor(1.1 * oldCapacity)); + indexManager.extendCapacity(newCapacity); refCount = Arrays.copyOf(refCount, newCapacity); - locationList = Arrays.copyOf(locationList, newCapacity); - for (int i = oldCapacity; i < newCapacity; i++) { - locationList[i] = INFEASIBLE_POINTSTORE_LOCATION; - } + extendLocationList(newCapacity); } else { throw new IllegalStateException(" index manager in point store is full "); } } - int location = indexManager.takeIndex(); - return location; + return indexManager.takeIndex(); } - int getAmountToWrite(double[] tempPoint) { + protected int getAmountToWrite(double[] tempPoint) { if (checkShingleAlignment(startOfFreeSegment, tempPoint)) { if (!rotationEnabled || startOfFreeSegment % dimensions == (nextSequenceIndex - 1) * baseDimension % dimensions) { @@ -186,8 +166,8 @@ int getAmountToWrite(double[] tempPoint) { } } else if (!rotationEnabled) { return dimensions; - } + } // the following adds the padding for what exists; // then the padding for the new part; all mod (dimensions) // note that the expression is baseDimension when the condition @@ -224,57 +204,34 @@ public int add(double[] point, long sequenceNum) { } } int nextIndex; - if (!directLocationMap) { - // suppose there was shingling then only the contents of the most recent - // point has to be written, otherwise we need to write the full shingle - int amountToWrite = getAmountToWrite(tempPoint); + int amountToWrite = getAmountToWrite(tempPoint); + + if (startOfFreeSegment > currentStoreCapacity * dimensions - amountToWrite) { + // try compaction and then resizing + compact(); + // the compaction can change the array contents + amountToWrite = getAmountToWrite(tempPoint); if (startOfFreeSegment > currentStoreCapacity * dimensions - amountToWrite) { - // try compaction and then resizing - compact(); - // the compaction can change the array contents - amountToWrite = getAmountToWrite(tempPoint); - - if (startOfFreeSegment > currentStoreCapacity * dimensions - amountToWrite) { - checkState(dynamicResizingEnabled, " out of store, enable dynamic resizing "); - resizeStore(); - checkState(startOfFreeSegment + amountToWrite <= currentStoreCapacity * dimensions, "out of space"); - } + resizeStore(); + checkState(startOfFreeSegment + amountToWrite <= currentStoreCapacity * dimensions, "out of space"); } + } - nextIndex = takeIndex(); + nextIndex = takeIndex(); - locationList[nextIndex] = startOfFreeSegment - dimensions + amountToWrite; - if (amountToWrite <= dimensions) { - copyPoint(tempPoint, dimensions - amountToWrite, startOfFreeSegment, amountToWrite); - } else { - copyPoint(tempPoint, 0, startOfFreeSegment + amountToWrite - dimensions, dimensions); - } - startOfFreeSegment += amountToWrite; + setLocation(nextIndex, startOfFreeSegment - dimensions + amountToWrite); + if (amountToWrite <= dimensions) { + copyPoint(tempPoint, dimensions - amountToWrite, startOfFreeSegment, amountToWrite); } else { - nextIndex = takeIndex(); - int address = locationList[nextIndex]; - if (address == INFEASIBLE_POINTSTORE_LOCATION) { - if (startOfFreeSegment + dimensions > currentStoreCapacity * dimensions) { - checkState(dynamicResizingEnabled, " out of store, enable dynamic resizing "); - resizeStore(); - if (startOfFreeSegment + dimensions > currentStoreCapacity * dimensions) { - indexManager.releaseIndex(nextIndex); // put back the last index - compact(); - nextIndex = takeIndex(); - } - } - locationList[nextIndex] = address = startOfFreeSegment; - startOfFreeSegment = Math.max(startOfFreeSegment, address + dimensions); - } - copyPoint(tempPoint, 0, address, dimensions); + copyPoint(tempPoint, 0, startOfFreeSegment + amountToWrite - dimensions, dimensions); } - refCount[nextIndex] = 1; // has to be after compactions + startOfFreeSegment += amountToWrite; + + refCount[nextIndex] = 1; return nextIndex; } - abstract void resizeStore(); - /** * Increment the reference count for the given index. This operation assumes * that there is currently a point stored at the given index and will throw an @@ -286,8 +243,22 @@ public int add(double[] point, long sequenceNum) { * index is non positive. */ public int incrementRefCount(int index) { - indexManager.checkValidIndex(index); - return ++refCount[index]; + checkArgument(index >= 0 && index < locationListLength(), " index not supported by store"); + checkArgument((refCount[index] & 0xff) > 0, " not in use"); + Integer value = refCountMap.remove(index); + if (value == null) { + if ((refCount[index] & 0xff) == 255) { + refCountMap.put(index, 1); + return 256; + } else { + int newVal = (byte) ((refCount[index] & 0xff) + 1); + refCount[index] = (byte) newVal; + return newVal; + } + } else { + refCountMap.put(index, value + 1); + return value + 1; + } } @Override @@ -295,45 +266,6 @@ public int getDimensions() { return dimensions; } - /** - * Test whether the given point is equal to the point stored at the given index. - * This operation uses point-wise == to test for equality. - * - * @param index The index value of the point we are comparing to. - * @param point The point we are comparing for equality. - * @return true if the point stored at the index is equal to the given point, - * false otherwise. - * @throws IllegalArgumentException if the index value is not valid. - * @throws IllegalArgumentException if the current reference count for this - * index is non positive. - * @throws IllegalArgumentException if the length of the point does not match - * the point store's dimensions. - */ - - abstract public boolean pointEquals(int index, Point point); - - /** - * Get a copy of the point at the given index. - * - * @param index An index value corresponding to a storage location in this point - * store. - * @return a copy of the point stored at the given index. - * @throws IllegalArgumentException if the index value is not valid. - * @throws IllegalArgumentException if the current reference count for this - * index is non positive. - */ - @Override - abstract public Point get(int index); - - /** - * to print error messages - * - * @param index index of the point in the store - * @return string corresponding to the point - */ - @Override - abstract public String toString(int index); - /** * maximum capacity, in number of points of size dimensions */ @@ -350,7 +282,7 @@ public int getIndexCapacity() { /** * used in mapper - * + * * @return gets the shingle size (if known, otherwise is 1) */ public int getShingleSize() { @@ -360,7 +292,7 @@ public int getShingleSize() { /** * gets the current store capacity in the number of points with dimension many * values - * + * * @return capacity in number of points */ public int getCurrentStoreCapacity() { @@ -369,25 +301,33 @@ public int getCurrentStoreCapacity() { /** * used for mappers - * + * * @return the store that stores the values */ - public Store getStore() { + public float[] getStore() { return store; } /** * used for mapper - * + * * @return the array of counts referring to different points */ public int[] getRefCount() { - return refCount; + int[] newarray = new int[refCount.length]; + for (int i = 0; i < refCount.length; i++) { + newarray[i] = refCount[i] & 0xff; + Integer value = refCountMap.get(i); + if (value != null) { + newarray[i] += value; + } + } + return newarray; } /** * useful in mapper to not copy - * + * * @return the length of the prefix */ public int getStartOfFreeSegment() { @@ -396,62 +336,26 @@ public int getStartOfFreeSegment() { /** * used in mapper - * - * @return the list of locations where points are stored - */ - public int[] getLocationList() { - return locationList; - } - - /** - * used in mapper - * + * * @return if shingling is performed internally */ public boolean isInternalShinglingEnabled() { return internalShinglingEnabled; } - /** - * - * @return if the shingles performed internally are rotated as in a cyclic - * buffer - */ - public boolean isInternalRotationEnabled() { - return rotationEnabled; - } - /** * used in mapper and in extrapolation - * + * * @return the last timestamp seen */ public long getNextSequenceIndex() { return nextSequenceIndex; } - /** - * used in mapper - * - * @return ability to start from a small size an increase the store - */ - public boolean isDynamicResizingEnabled() { - return dynamicResizingEnabled; - } - - /** - * used in mapper, as well as an optimization for shingle size 1 - * - * @return is locationList being used - */ - public boolean isDirectLocationMap() { - return directLocationMap; - } - /** * used to obtain the most recent shingle seen so far in case of internal * shingling - * + * * @return for internal shingling, returns the last seen shingle */ public double[] getInternalShingle() { @@ -459,150 +363,105 @@ public double[] getInternalShingle() { return copyShingle(); } - /** - * - * @return the number of indices stored - */ - public int size() { - return indexManager.capacity - indexManager.freeIndexPointer - 1; - } - - /** - * a simple optimization that identifies the prefix of the arrays (refCount, - * referenceList) that are being used - * - * @return size of initial prefix in use - */ - public int getValidPrefix() { - return indexManager.occupied.previousSetBit(capacity) + 1; - } - - /** - * copy function for the store - * - * @param dest location to move to - * @param source moving from - * @param length number of values copied - */ - abstract void copyTo(int dest, int source, int length); - - /** - * the following function returns the locations which are in use; note that it - * adjusts for multivariate input when baseDimension is greater than 1. - * - * @return the bitset corresponding to the locations in use - */ - - BitSet inUse() { - BitSet result = new BitSet(currentStoreCapacity * dimensions / baseDimension); - - for (int i = 0; i < indexManager.capacity; i++) { - if (indexManager.occupied.get(i)) { - result.set(locationList[i] / baseDimension); - } else { - locationList[i] = INFEASIBLE_POINTSTORE_LOCATION; - } - } - return result; - } - /** * The following function eliminates redundant information that builds up in the * point store and shrinks the point store */ - public void compact() { - - int runningLocation = 0; - startOfFreeSegment = 0; - - // we first determine which locations are the start points of the shingles - // since the shingles extend for a length and can overlap this help define the - // region that should be copied - - HashMap movedTo = new HashMap<>(); + abstract int locationListLength(); - // the bit set corresponds to the locations that can be in use over the actual - // store array - // this is not the same as the number of points that can be stored - BitSet inUse = inUse(); - - // we make a pass over the store data - while (runningLocation < currentStoreCapacity * dimensions) { - // find the first eligible shingle to be copied - // for rotationEnabled, this should be a multiple of dimensions - - runningLocation = inUse.nextSetBit(runningLocation / baseDimension) * baseDimension; - if (runningLocation < 0) { // no next bit set - runningLocation = currentStoreCapacity * dimensions; + public void compact() { + Vector reverseReference = new Vector<>(); + for (int i = 0; i < locationListLength(); i++) { + int locn = getLocation(i); + if (locn < currentStoreCapacity * dimensions && locn >= 0) { + reverseReference.add(new Integer[] { locn, i }); } - - // we are now at the start of the data but for rotation enabled internal - // shingling - // we need to ensure that locations remain a multiple of dimensions - - if (rotationEnabled && runningLocation % dimensions != 0) { - // put back the items so that we begin from a multiple of dimensions - // in case rotation is enabled - while (runningLocation % dimensions != 0) { - runningLocation--; + } + reverseReference.sort((o1, o2) -> o1[0].compareTo(o2[0])); + int freshStart = 0; + int jStatic = 0; + int jDynamic = 0; + int jEnd = reverseReference.size(); + while (jStatic < jEnd) { + int blockStart = reverseReference.get(jStatic)[0]; + int blockEnd = blockStart + dimensions; + int initial = 0; + if (rotationEnabled) { + initial = (dimensions - freshStart + blockStart) % dimensions; + } + int k = jStatic + 1; + jDynamic = jStatic + 1; + while (k < jEnd) { + int newElem = reverseReference.get(k)[0]; + if (blockEnd >= newElem) { + k += 1; + jDynamic += 1; + blockEnd = max(blockEnd, newElem + dimensions); + } else { + k = jEnd; } } - /** - * recursively keep copying; if a new relevant shingle is found during the - * copying, the remainsToBeCopied is updated to dimensions - */ - if (runningLocation < currentStoreCapacity * dimensions) { - int remainsToBeCopied = dimensions; - int saveLocation = runningLocation; - int shadowLocation = startOfFreeSegment; - /** - * note that remainsToBeCopied corresponds to the shingle; the test for division - * by dimensions ensure that every rotated shingle - */ - while (runningLocation < currentStoreCapacity * dimensions - && (remainsToBeCopied > 0 || rotationEnabled && runningLocation % dimensions != 0)) { - if (baseDimension == 1 || runningLocation % baseDimension == 0) { - if (inUse.get(runningLocation / baseDimension)) { // need to copy dimension more bits - remainsToBeCopied = dimensions; - if (shadowLocation < runningLocation) { // actual move is necessary - assert (!rotationEnabled || (runningLocation - shadowLocation) % dimensions == 0); - movedTo.put(runningLocation, shadowLocation); - } - } + // aligning the boundaries + for (int i = 0; i < initial; i++) { + store[freshStart] = 0; + ++freshStart; + } + + for (int i = blockStart; i < blockEnd; i++) { + store[freshStart] = store[i]; + assert (!rotationEnabled || freshStart % dimensions == i % dimensions); + + if (jStatic < jEnd) { + int locn = reverseReference.get(jStatic)[0]; + if (i == locn) { + int newIdx = reverseReference.get(jStatic)[1]; + setLocation(newIdx, freshStart); + jStatic += 1; } - runningLocation++; - shadowLocation++; - remainsToBeCopied--; } - copyTo(startOfFreeSegment, saveLocation, runningLocation - saveLocation); - startOfFreeSegment += runningLocation - saveLocation; + freshStart += 1; } - } - // now fix the addressing, assuming something has moved - if (!movedTo.isEmpty()) { - for (int i = 0; i < indexManager.capacity; i++) { - if (movedTo.containsKey(locationList[i])) { // need not have moved - locationList[i] = movedTo.get(locationList[i]); - } + + if (jStatic != jDynamic) { + throw new IllegalStateException("There is discepancy in indices"); } } + startOfFreeSegment = freshStart; } /** * returns the number of copies of a point - * + * * @param i index of a point * @return number of copies of the point managed by the store */ public int getRefCount(int i) { - return refCount[i]; + int val = refCount[i] & 0xff; + Integer value = refCountMap.get(i); + if (value != null) { + val += value; + } + return val; } + @Override + public boolean isInternalRotationEnabled() { + return rotationEnabled; + } + + /** + * + * @return the number of indices stored + */ + public abstract int size(); + + public abstract int[] getLocationList(); + /** * transforms a point to a shingled point if internal shingling is turned on - * + * * @param point new input values * @return shingled point */ @@ -629,12 +488,12 @@ private double[] copyShingle() { /** * the following function is used to update the shingle in place; it can be used * to produce new copies as well - * + * * @param target the array containing the shingled point * @param point the new values * @return the array which now contains the updated shingle */ - private double[] constructShingleInPlace(double[] target, double[] point, boolean rotationEnabled) { + protected double[] constructShingleInPlace(double[] target, double[] point, boolean rotationEnabled) { if (!rotationEnabled) { for (int i = 0; i < dimensions - baseDimension; i++) { target[i] = target[i + baseDimension]; @@ -655,7 +514,7 @@ private double[] constructShingleInPlace(double[] target, double[] point, boolea * for extrapolation and imputation, in presence of internal shingling we need * to update the list of missing values from the space of the input dimensions * to the shingled dimensions - * + * * @param indexList list of missing values in the input point * @return list of missing values in the shingled point */ @@ -683,26 +542,26 @@ public int[] transformIndices(int[] indexList) { * a builder */ - public static class Builder> { + public static class Builder> { // We use Optional types for optional primitive fields when it doesn't make // sense to use a constant default. - private int dimensions; - private int shingleSize = 1; - private boolean internalShinglingEnabled = false; - private boolean dynamicResizingEnabled = true; - private boolean internalRotationEnabled = false; - private boolean directLocationEnabled = false; - private int capacity; - private Optional initialPointStoreSize = Optional.empty(); - private int currentStoreCapacity = 0; - private int indexCapacity = 0; - private double[] knownShingle = null; - private int[] locationList = null; - private int[] refCount = null; - private long nextTimeStamp = 0; - private int startOfFreeSegment = 0; + protected int dimensions; + protected int shingleSize = 1; + protected int baseDimension; + protected boolean internalRotationEnabled = false; + protected boolean internalShinglingEnabled = false; + protected int capacity; + protected Optional initialPointStoreSize = Optional.empty(); + protected int currentStoreCapacity = 0; + protected int indexCapacity = 0; + protected float[] store = null; + protected double[] knownShingle = null; + protected int[] locationList = null; + protected int[] refCount = null; + protected long nextTimeStamp = 0; + protected int startOfFreeSegment = 0; // dimension of the points being stored public T dimensions(int dimensions) { @@ -716,13 +575,6 @@ public T capacity(int capacity) { return (T) this; } - // can the pointstore allocate a smaller data structure and grow, - // may save space - public T dynamicResizingEnabled(boolean dynamicResizingEnabled) { - this.dynamicResizingEnabled = dynamicResizingEnabled; - return (T) this; - } - // initial size of the pointstore, dynamicResizing must be on // and value cannot exceed capacity public T initialSize(int initialPointStoreSize) { @@ -736,13 +588,6 @@ public T shingleSize(int shingleSize) { return (T) this; } - // is direct location enabled, without the location[] - // may be faster and save some space - public T directLocationEnabled(boolean directLocationEnabled) { - this.directLocationEnabled = directLocationEnabled; - return (T) this; - } - // is internal shingling enabled public T internalShinglingEnabled(boolean internalShinglingEnabled) { this.internalShinglingEnabled = internalShinglingEnabled; @@ -755,6 +600,16 @@ public T internalRotationEnabled(boolean internalRotationEnabled) { return (T) this; } + @Deprecated + public T directLocationEnabled(boolean value) { + return (T) this; + } + + @Deprecated + public T dynamicResizingEnabled(boolean value) { + return (T) this; + } + // the size of the array storing the specific points // this is used for serialization public T currentStoreCapacity(int currentStoreCapacity) { @@ -791,6 +646,11 @@ public T locationList(int[] locationList) { return (T) this; } + public T store(float[] store) { + this.store = store; + return (T) this; + } + // location of where points can be written // used for serialization public T startOfFreeSegment(int startOfFreeSegment) { @@ -805,21 +665,22 @@ public T nextTimeStamp(long nextTimeStamp) { return (T) this; } + public PointStore build() { + if (shingleSize * capacity < Character.MAX_VALUE) { + return new PointStoreSmall(this); + } else { + return new PointStoreLarge(this); + } + } } - public PointStore(Builder builder) { + public PointStore(PointStore.Builder builder) { checkArgument(builder.dimensions > 0, "dimensions must be greater than 0"); checkArgument(builder.capacity > 0, "capacity must be greater than 0"); checkArgument(builder.shingleSize == 1 || builder.dimensions == builder.shingleSize || builder.dimensions % builder.shingleSize == 0, "incorrect use of shingle size"); - checkArgument(!builder.internalRotationEnabled || builder.internalShinglingEnabled, - "rotation can be enabled for internal shingling only"); - builder.initialPointStoreSize.ifPresent(n -> { - checkState(dynamicResizingEnabled || builder.capacity >= (int) n, - " incorrect initialization, enable dynamic resizing"); - }); /** - * the following checks are due to mappers. + * the following checks are due to mappers (kept for future) */ if (builder.refCount != null || builder.locationList != null || builder.knownShingle != null) { checkArgument(builder.refCount != null, "reference count must be present"); @@ -835,43 +696,190 @@ public PointStore(Builder builder) { this.shingleSize = builder.shingleSize; this.dimensions = builder.dimensions; - this.directLocationMap = builder.directLocationEnabled; this.internalShinglingEnabled = builder.internalShinglingEnabled; this.rotationEnabled = builder.internalRotationEnabled; - this.currentStoreCapacity = builder.currentStoreCapacity; - this.dynamicResizingEnabled = builder.dynamicResizingEnabled; this.baseDimension = this.dimensions / this.shingleSize; this.capacity = builder.capacity; + this.refCountMap = new HashMap<>(); if (builder.refCount == null) { - int size = (int) builder.initialPointStoreSize.orElse(dynamicResizingEnabled ? 1 : builder.capacity); + int size = (int) builder.initialPointStoreSize.orElse(builder.capacity); currentStoreCapacity = size; - indexManager = new IndexManager(size); + this.indexManager = new IntervalManager(size); startOfFreeSegment = 0; - refCount = new int[size]; + refCount = new byte[size]; if (internalShinglingEnabled) { nextSequenceIndex = 0; internalShingle = new double[dimensions]; } - locationList = new int[size]; - Arrays.fill(locationList, INFEASIBLE_POINTSTORE_LOCATION); + store = new float[currentStoreCapacity * dimensions]; } else { - this.refCount = builder.refCount; - this.locationList = builder.locationList; + this.refCount = new byte[builder.refCount.length]; + for (int i = 0; i < refCount.length; i++) { + if (builder.refCount[i] >= 0 && builder.refCount[i] <= 255) { + refCount[i] = (byte) builder.refCount[i]; + } else if (builder.refCount[i] > 255) { + refCount[i] = (byte) 255; + refCountMap.put(i, builder.refCount[i] - 255); + } + } this.startOfFreeSegment = builder.startOfFreeSegment; this.nextSequenceIndex = builder.nextTimeStamp; - + this.currentStoreCapacity = builder.currentStoreCapacity; if (internalShinglingEnabled) { - this.internalShingle = Arrays.copyOf(builder.knownShingle, dimensions); + this.internalShingle = (builder.knownShingle != null) ? Arrays.copyOf(builder.knownShingle, dimensions) + : new double[dimensions]; } - BitSet bits = new BitSet(builder.indexCapacity); - for (int i = 0; i < builder.indexCapacity; i++) { - if (refCount[i] > 0) { + BitSet bits = new BitSet(refCount.length); + int count = 0; + for (int i = 0; i < refCount.length; i++) { + if ((refCount[i] & 0xff) > 0) { bits.set(i); + ++count; + } + } + int first = bits.nextClearBit(0); + Stack stack = new Stack<>(); + + while (first < refCount.length) { + int last = bits.nextSetBit(first) - 1; + if (last >= first) { + stack.push(new int[] { first, last }); + first = bits.nextClearBit(last + 1); + if (first < 0) { + break; + } + } else { // we do not all distiction between all full and all empty + if (first < refCount.length - 1) { + if (bits.nextClearBit(first + 1) == first + 1) { + stack.push(new int[] { first, refCount.length - 1 }); + } else { + stack.push(new int[] { first, first }); + } + } else { + stack.push(new int[] { refCount.length - 1, refCount.length - 1 }); + } + break; } } - indexManager = new IndexManager(builder.indexCapacity, bits); + indexManager = new IntervalManager(stack, builder.indexCapacity); + store = (builder.store == null) ? new float[currentStoreCapacity * dimensions] : builder.store; } } + void resizeStore() { + int maxCapacity = (rotationEnabled) ? 2 * capacity : capacity; + int newCapacity = (int) Math.floor(Math.min(1.1 * currentStoreCapacity, maxCapacity)); + if (newCapacity > currentStoreCapacity) { + float[] newStore = new float[newCapacity * dimensions]; + System.arraycopy(store, 0, newStore, 0, currentStoreCapacity * dimensions); + currentStoreCapacity = newCapacity; + store = newStore; + } + } + + boolean checkShingleAlignment(int location, double[] point) { + boolean test = (location - dimensions + baseDimension >= 0); + for (int i = 0; i < dimensions - baseDimension && test; i++) { + test = (((float) point[i]) == store[location - dimensions + baseDimension + i]); + } + return test; + } + + void copyPoint(double[] point, int src, int location, int length) { + for (int i = 0; i < length; i++) { + store[location + i] = (float) point[src + i]; + } + } + + protected abstract void checkFeasible(int index); + + /** + * Test whether the given point is equal to the point stored at the given index. + * This operation uses point-wise == to test for equality. + * + * @param index The index value of the point we are comparing to. + * @param point The point we are comparing for equality. + * @return true if the point stored at the index is equal to the given point, + * false otherwise. + * @throws IllegalArgumentException if the index value is not valid. + * @throws IllegalArgumentException if the current reference count for this + * index is nonpositive. + * @throws IllegalArgumentException if the length of the point does not match + * the point store's dimensions. + */ + + @Override + public boolean pointEquals(int index, float[] point) { + checkArgument(point.length == dimensions, "point.length must be equal to dimensions"); + checkArgument(index >= 0 && index < locationListLength(), " index not supported by store"); + checkFeasible(index); + int address = getLocation(index); + if (!rotationEnabled) { + for (int j = 0; j < dimensions; j++) { + if (point[j] != store[j + address]) { + return false; + } + } + } else { + for (int j = 0; j < dimensions; j++) { + if (point[(j + address) % dimensions] != store[j + address]) { + return false; + } + } + } + + return true; + } + + /** + * Get a copy of the point at the given index. + * + * @param index An index value corresponding to a storage location in this point + * store. + * @return a copy of the point stored at the given index. + * @throws IllegalArgumentException if the index value is not valid. + * @throws IllegalArgumentException if the current reference count for this + * index is nonpositive. + */ + @Override + public float[] get(int index) { + checkArgument(index >= 0 && index < locationListLength(), " index not supported by store"); + checkFeasible(index); + int address = getLocation(index); + + if (!rotationEnabled) { + return Arrays.copyOfRange(store, address, address + dimensions); + } else { + float[] answer = new float[dimensions]; + for (int i = 0; i < dimensions; i++) { + answer[(address + i) % dimensions] = store[address + i]; + } + return answer; + } + } + + public float[] getScaledPoint(int index, double factor) { + float[] answer = get(index); + for (int i = 0; i < dimensions; i++) { + answer[i] *= factor; + } + return answer; + } + + public String toString(int index) { + return Arrays.toString(get(index)); + } + + void copyTo(int dest, int source, int length) { + // validateInternalState(dest <= source, "error"); + for (int i = 0; i < length; i++) { + store[dest + i] = store[source + i]; + } + } + + public static Builder builder() { + return new Builder(); + } + } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreDouble.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreDouble.java index 0e45af0d..c7a69ee1 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreDouble.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreDouble.java @@ -16,38 +16,766 @@ package com.amazon.randomcutforest.store; import static com.amazon.randomcutforest.CommonUtils.checkArgument; +import static com.amazon.randomcutforest.CommonUtils.checkState; +import static java.lang.Math.max; import java.util.Arrays; +import java.util.BitSet; +import java.util.HashMap; +import java.util.Optional; +import java.util.Stack; +import java.util.Vector; -/** - * PointStore is a fixed size repository of points, where each point is a float - * array of a specified length. A PointStore counts references to points that - * are added, and frees space internally when a given point is no longer in use. - * The primary use of this store is to enable compression since the points in - * two different trees do not have to be stored separately. - * - * Stored points are referenced by index values which can be used to look up the - * point values and increment and decrement reference counts. Valid index values - * are between 0 (inclusive) and capacity (exclusive). - */ -public class PointStoreDouble extends PointStore { +public class PointStoreDouble implements IPointStore { - public PointStoreDouble(Builder builder) { - super(builder); - checkArgument(builder.store == null || builder.store.length == currentStoreCapacity * dimensions, - " incorrect store length"); - store = (builder.store == null) ? new double[currentStoreCapacity * dimensions] : builder.store; + public static int INFEASIBLE_POINTSTORE_INDEX = -1; + public static int INFEASIBLE_LOCN = (int) -2; + /** + * an index manager to manage free locations + */ + protected IntervalManager indexManager; + /** + * generic store class + */ + protected double[] store; + /** + * generic internal shingle, note that input is doubles + */ + protected double[] internalShingle; + /** + * enable rotation of shingles; use a cyclic buffer instead of sliding window + */ + boolean rotationEnabled; + /** + * last seen timestamp for internal shingling + */ + protected long nextSequenceIndex; + /** + * pointers to store locations, this decouples direct addressing and points can + * be moved internally + */ + protected int[] locationList; + /** + * refCount[i] counts of the number of trees that are currently using the point + * determined by locationList[i] or (for directLocationMapping) the point at + * store[i * dimensions] + */ + protected byte[] refCount; + + protected HashMap refCountMap; + /** + * first location where new data can be safely copied; + */ + int startOfFreeSegment; + /** + * overall dimension of the point (after shingling) + */ + int dimensions; + /** + * shingle size, if known. Setting shingle size = 1 rules out overlapping + */ + int shingleSize; + /** + * number of original dimensions which are shingled to produce and overall point + * dimensions = shingleSize * baseDimensions. However there is a possibility + * that even though the data is shingled, we may not choose to use the + * overlapping (say for out of order updates). + */ + int baseDimension; + + /** + * maximum capacity + */ + int capacity; + /** + * current capacity of store (number of shingled points) + */ + int currentStoreCapacity; + + /** + * enabling internal shingling + */ + boolean internalShinglingEnabled; + + /** + * Decrement the reference count for the given index. + * + * @param index The index value. + * @throws IllegalArgumentException if the index value is not valid. + * @throws IllegalArgumentException if the current reference count for this + * index is non positive. + */ + @Override + public int decrementRefCount(int index) { + checkArgument(index >= 0 && index < locationList.length, " index not supported by store"); + checkArgument((refCount[index] & 0xff) > 0, " cannot decrement index"); + Integer value = refCountMap.remove(index); + if (value == null) { + if ((refCount[index] & 0xff) == 1) { + indexManager.releaseIndex(index); + refCount[index] = (byte) 0; + locationList[index] = INFEASIBLE_LOCN; + return 0; + } else { + int newVal = (byte) ((refCount[index] & 0xff) - 1); + refCount[index] = (byte) newVal; + return newVal; + } + } else { + if (value > 1) { + refCountMap.put(index, value - 1); + } + return value - 1 + (refCount[index] & 0xff); + } } - public PointStoreDouble(int dimensions, int capacity) { - this(new Builder().dimensions(dimensions).shingleSize(1).capacity(capacity).indexCapacity(capacity) - .currentStoreCapacity(capacity)); + /** + * takes an index from the index manager and rezises if necessary also adjusts + * refCount size to have increment/decrement be seamless + * + * @return an index from the index manager + */ + int takeIndex() { + if (indexManager.isEmpty()) { + if (indexManager.getCapacity() < capacity) { + int oldCapacity = indexManager.getCapacity(); + int newCapacity = Math.min(capacity, 1 + (int) Math.floor(1.1 * oldCapacity)); + indexManager.extendCapacity(newCapacity); + refCount = Arrays.copyOf(refCount, newCapacity); + int oldlocationLength = locationList.length; + locationList = Arrays.copyOf(locationList, newCapacity); + for (int i = oldlocationLength; i < newCapacity; i++) { + locationList[i] = INFEASIBLE_LOCN; + } + } else { + throw new IllegalStateException(" index manager in point store is full "); + } + } + return indexManager.takeIndex(); + } + + protected int getAmountToWrite(double[] tempPoint) { + if (checkShingleAlignment(startOfFreeSegment, tempPoint)) { + if (!rotationEnabled + || startOfFreeSegment % dimensions == (nextSequenceIndex - 1) * baseDimension % dimensions) { + return baseDimension; + } + } else if (!rotationEnabled) { + return dimensions; + + } + // the following adds the padding for what exists; + // then the padding for the new part; all mod (dimensions) + // note that the expression is baseDimension when the condition + // startOfFreeSegment % dimensions == (nextSequenceIndex-1)*baseDimension % + // dimension + // is met + return dimensions + (dimensions - startOfFreeSegment % dimensions + + (int) ((nextSequenceIndex) * baseDimension) % dimensions) % dimensions; + } + + /** + * Add a point to the point store and return the index of the stored point. + * + * @param point The point being added to the store. + * @param sequenceNum sequence number of the point + * @return the index value of the stored point. + * @throws IllegalArgumentException if the length of the point does not match + * the point store's dimensions. + * @throws IllegalStateException if the point store is full. + */ + public int add(double[] point, long sequenceNum) { + checkArgument(internalShinglingEnabled || point.length == dimensions, + "point.length must be equal to dimensions"); + checkArgument(!internalShinglingEnabled || point.length == baseDimension, + "point.length must be equal to dimensions"); + + double[] tempPoint = point; + nextSequenceIndex++; + if (internalShinglingEnabled) { + // rotation is supported via the output and input is unchanged + tempPoint = constructShingleInPlace(internalShingle, point, false); + if (nextSequenceIndex < shingleSize) { + return INFEASIBLE_POINTSTORE_INDEX; + } + } + int nextIndex; + + int amountToWrite = getAmountToWrite(tempPoint); + + if (startOfFreeSegment > currentStoreCapacity * dimensions - amountToWrite) { + // try compaction and then resizing + compact(); + // the compaction can change the array contents + amountToWrite = getAmountToWrite(tempPoint); + if (startOfFreeSegment > currentStoreCapacity * dimensions - amountToWrite) { + resizeStore(); + if (startOfFreeSegment + amountToWrite > currentStoreCapacity * dimensions) { + System.out.println("AA"); + } + checkState(startOfFreeSegment + amountToWrite <= currentStoreCapacity * dimensions, "out of space"); + } + } + + nextIndex = takeIndex(); + + locationList[nextIndex] = startOfFreeSegment - dimensions + amountToWrite; + if (amountToWrite <= dimensions) { + copyPoint(tempPoint, dimensions - amountToWrite, startOfFreeSegment, amountToWrite); + } else { + copyPoint(tempPoint, 0, startOfFreeSegment + amountToWrite - dimensions, dimensions); + } + startOfFreeSegment += amountToWrite; + + refCount[nextIndex] = 1; + return nextIndex; + } + + /** + * Increment the reference count for the given index. This operation assumes + * that there is currently a point stored at the given index and will throw an + * exception if that's not the case. + * + * @param index The index value. + * @throws IllegalArgumentException if the index value is not valid. + * @throws IllegalArgumentException if the current reference count for this + * index is non positive. + */ + public int incrementRefCount(int index) { + checkArgument(index >= 0 && index < locationList.length, " index not supported by store"); + checkArgument((refCount[index] & 0xff) > 0, " not in use "); + Integer value = refCountMap.remove(index); + if (value == null) { + if ((refCount[index] & 0xff) == 255) { + refCountMap.put(index, 1); + return 256; + } else { + int newVal = (byte) ((refCount[index] & 0xff) + 1); + refCount[index] = (byte) newVal; + return newVal; + } + } else { + refCountMap.put(index, value + 1); + return value + 1; + } } @Override + public int getDimensions() { + return dimensions; + } + + /** + * maximum capacity, in number of points of size dimensions + */ + public int getCapacity() { + return capacity; + } + + /** + * capacity of the indices + */ + public int getIndexCapacity() { + return indexManager.getCapacity(); + } + + /** + * used in mapper + * + * @return gets the shingle size (if known, otherwise is 1) + */ + public int getShingleSize() { + return shingleSize; + } + + /** + * gets the current store capacity in the number of points with dimension many + * values + * + * @return capacity in number of points + */ + public int getCurrentStoreCapacity() { + return currentStoreCapacity; + } + + /** + * used for mappers + * + * @return the store that stores the values + */ + public double[] getStore() { + return store; + } + + /** + * used for mapper + * + * @return the array of counts referring to different points + */ + public int[] getRefCount() { + int[] newarray = new int[refCount.length]; + for (int i = 0; i < refCount.length; i++) { + newarray[i] = refCount[i] & 0xff; + Integer value = refCountMap.get(i); + if (value != null) { + newarray[i] += value; + } + } + return newarray; + } + + /** + * useful in mapper to not copy + * + * @return the length of the prefix + */ + public int getStartOfFreeSegment() { + return startOfFreeSegment; + } + + /** + * used in mapper + * + * @return if shingling is performed internally + */ + public boolean isInternalShinglingEnabled() { + return internalShinglingEnabled; + } + + /** + * used in mapper and in extrapolation + * + * @return the last timestamp seen + */ + public long getNextSequenceIndex() { + return nextSequenceIndex; + } + + /** + * used to obtain the most recent shingle seen so far in case of internal + * shingling + * + * @return for internal shingling, returns the last seen shingle + */ + public double[] getInternalShingle() { + checkState(internalShinglingEnabled, "internal shingling is not enabled"); + return copyShingle(); + } + + /** + * The following function eliminates redundant information that builds up in the + * point store and shrinks the point store + */ + + public void compact() { + + Vector reverseReference = new Vector<>(); + for (int i = 0; i < locationList.length; i++) { + int locn = locationList[i]; + if (locn < currentStoreCapacity * dimensions && locn >= 0) { + reverseReference.add(new Integer[] { locn, i }); + } + } + reverseReference.sort((o1, o2) -> o1[0].compareTo(o2[0])); + int freshStart = 0; + int jStatic = 0; + int jDynamic = 0; + int jEnd = reverseReference.size(); + while (jStatic < jEnd) { + int blockStart = reverseReference.get(jStatic)[0]; + int blockEnd = blockStart + dimensions; + int initial = 0; + if (rotationEnabled) { + initial = (dimensions - freshStart + blockStart) % dimensions; + } + int k = jStatic + 1; + jDynamic = jStatic + 1; + while (k < jEnd) { + int newElem = reverseReference.get(k)[0]; + if (blockEnd >= newElem) { + k += 1; + jDynamic += 1; + blockEnd = max(blockEnd, newElem + dimensions); + } else { + k = jEnd; + } + } + + // aligning the boundaries + for (int i = 0; i < initial; i++) { + store[freshStart] = 0; + ++freshStart; + } + + for (int i = blockStart; i < blockEnd; i++) { + store[freshStart] = store[i]; + assert (!rotationEnabled || freshStart % dimensions == i % dimensions); + + if (jStatic < jEnd) { + int locn = reverseReference.get(jStatic)[0]; + if (i == locn) { + int newIdx = reverseReference.get(jStatic)[1]; + locationList[newIdx] = freshStart; + jStatic += 1; + } + } + freshStart += 1; + } + + if (jStatic != jDynamic) { + throw new IllegalStateException("There is discepancy in indices"); + } + } + startOfFreeSegment = freshStart; + } + + /** + * returns the number of copies of a point + * + * @param i index of a point + * @return number of copies of the point managed by the store + */ + public int getRefCount(int i) { + int val = refCount[i] & 0xff; + Integer value = refCountMap.get(i); + if (value != null) { + val += value; + } + return val; + } + + @Override + public boolean isInternalRotationEnabled() { + return rotationEnabled; + } + + public int[] getLocationList() { + return locationList; + } + + /** + * + * @return the number of indices stored + */ + public int size() { + int count = 0; + for (int i = 0; i < locationList.length; i++) { + if (locationList[i] != INFEASIBLE_LOCN) { + ++count; + } + } + return count; + } + + /** + * transforms a point to a shingled point if internal shingling is turned on + * + * @param point new input values + * @return shingled point + */ + @Override + public double[] transformToShingledPoint(double[] point) { + checkArgument(internalShinglingEnabled, " only allowed for internal shingling"); + checkArgument(point.length == baseDimension, " incorrect length"); + return constructShingleInPlace(copyShingle(), point, rotationEnabled); + } + + private double[] copyShingle() { + if (!rotationEnabled) { + return Arrays.copyOf(internalShingle, dimensions); + } else { + double[] answer = new double[dimensions]; + int offset = (int) (nextSequenceIndex * baseDimension); + for (int i = 0; i < dimensions; i++) { + answer[(offset + i) % dimensions] = internalShingle[i]; + } + return answer; + } + } + + /** + * the following function is used to update the shingle in place; it can be used + * to produce new copies as well + * + * @param target the array containing the shingled point + * @param point the new values + * @return the array which now contains the updated shingle + */ + protected double[] constructShingleInPlace(double[] target, double[] point, boolean rotationEnabled) { + if (!rotationEnabled) { + for (int i = 0; i < dimensions - baseDimension; i++) { + target[i] = target[i + baseDimension]; + } + for (int i = 0; i < baseDimension; i++) { + target[dimensions - baseDimension + i] = (point[i] == 0.0) ? 0.0 : point[i]; + } + } else { + int offset = ((int) (nextSequenceIndex * baseDimension) % dimensions); + for (int i = 0; i < baseDimension; i++) { + target[offset + i] = (point[i] == 0.0) ? 0.0 : point[i]; + } + } + return target; + } + + /** + * for extrapolation and imputation, in presence of internal shingling we need + * to update the list of missing values from the space of the input dimensions + * to the shingled dimensions + * + * @param indexList list of missing values in the input point + * @return list of missing values in the shingled point + */ + @Override + public int[] transformIndices(int[] indexList) { + checkArgument(internalShinglingEnabled, " only allowed for internal shingling"); + checkArgument(indexList.length <= baseDimension, " incorrect length"); + int[] results = Arrays.copyOf(indexList, indexList.length); + if (!rotationEnabled) { + for (int i = 0; i < indexList.length; i++) { + checkArgument(results[i] < baseDimension, "incorrect index"); + results[i] += dimensions - baseDimension; + } + } else { + int offset = ((int) (nextSequenceIndex * baseDimension) % dimensions); + for (int i = 0; i < indexList.length; i++) { + checkArgument(results[i] < baseDimension, "incorrect index"); + results[i] = (results[i] + offset) % dimensions; + } + } + return results; + } + + /** + * a builder + */ + + public static class Builder> { + + // We use Optional types for optional primitive fields when it doesn't make + // sense to use a constant default. + + protected int dimensions; + protected int shingleSize = 1; + protected int baseDimension; + protected boolean internalRotationEnabled = false; + protected boolean internalShinglingEnabled = false; + protected int capacity; + protected Optional initialPointStoreSize = Optional.empty(); + protected int currentStoreCapacity = 0; + protected int indexCapacity = 0; + protected double[] store = null; + protected double[] knownShingle = null; + protected int[] locationList = null; + protected int[] refCount = null; + protected long nextTimeStamp = 0; + protected int startOfFreeSegment = 0; + + // dimension of the points being stored + public T dimensions(int dimensions) { + this.dimensions = dimensions; + return (T) this; + } + + // maximum number of points in the store + public T capacity(int capacity) { + this.capacity = capacity; + return (T) this; + } + + // initial size of the pointstore, dynamicResizing must be on + // and value cannot exceed capacity + public T initialSize(int initialPointStoreSize) { + this.initialPointStoreSize = Optional.of(initialPointStoreSize); + return (T) this; + } + + // shingleSize for opportunistic compression + public T shingleSize(int shingleSize) { + this.shingleSize = shingleSize; + return (T) this; + } + + // is internal shingling enabled + public T internalShinglingEnabled(boolean internalShinglingEnabled) { + this.internalShinglingEnabled = internalShinglingEnabled; + return (T) this; + } + + @Deprecated + public T directLocationEnabled(boolean value) { + return (T) this; + } + + @Deprecated + public T dynamicResizingEnabled(boolean value) { + return (T) this; + } + + // are shingles rotated + public T internalRotationEnabled(boolean internalRotationEnabled) { + this.internalRotationEnabled = internalRotationEnabled; + return (T) this; + } + + // the size of the array storing the specific points + // this is used for serialization + public T currentStoreCapacity(int currentStoreCapacity) { + this.currentStoreCapacity = currentStoreCapacity; + return (T) this; + } + + // the size of the pointset being tracked + // this is used for serialization + public T indexCapacity(int indexCapacity) { + this.indexCapacity = indexCapacity; + return (T) this; + } + + // last known shingle, if internalshingle is on + // this shingle is not rotated + // this is used for serialization + public T knownShingle(double[] knownShingle) { + this.knownShingle = knownShingle; + return (T) this; + } + + // count of the points being tracked + // used for serialization + public T refCount(int[] refCount) { + this.refCount = refCount; + return (T) this; + } + + // location of the points being tracked, if not directmapped + // used for serialization + public T locationList(int[] locationList) { + this.locationList = locationList; + return (T) this; + } + + public T store(double[] store) { + this.store = store; + return (T) this; + } + + // location of where points can be written + // used for serialization + public T startOfFreeSegment(int startOfFreeSegment) { + this.startOfFreeSegment = startOfFreeSegment; + return (T) this; + } + + // the next timeStamp to accept + // used for serialization + public T nextTimeStamp(long nextTimeStamp) { + this.nextTimeStamp = nextTimeStamp; + return (T) this; + } + + public PointStoreDouble build() { + return new PointStoreDouble(this); + } + } + + public PointStoreDouble(int dimensions, int capacity) { + this(PointStoreDouble.builder().capacity(capacity).dimensions(dimensions).shingleSize(1).initialSize(capacity)); + } + + public PointStoreDouble(Builder builder) { + checkArgument(builder.dimensions > 0, "dimensions must be greater than 0"); + checkArgument(builder.capacity > 0, "capacity must be greater than 0"); + checkArgument(builder.shingleSize == 1 || builder.dimensions == builder.shingleSize + || builder.dimensions % builder.shingleSize == 0, "incorrect use of shingle size"); + /** + * the following checks are due to mappers (kept for future) + */ + if (builder.refCount != null || builder.locationList != null || builder.knownShingle != null) { + checkArgument(builder.refCount != null, "reference count must be present"); + checkArgument(builder.locationList != null, "location list must be present"); + checkArgument(builder.refCount.length == builder.indexCapacity, "incorrect reference count length"); + // following may change if IndexManager is dynamically resized as well + checkArgument(builder.locationList.length == builder.indexCapacity, " incorrect length of locations"); + checkArgument( + builder.knownShingle == null + || builder.internalShinglingEnabled && builder.knownShingle.length == builder.dimensions, + "incorrect shingling information"); + } + + this.shingleSize = builder.shingleSize; + this.dimensions = builder.dimensions; + this.internalShinglingEnabled = builder.internalShinglingEnabled; + this.rotationEnabled = builder.internalRotationEnabled; + this.baseDimension = this.dimensions / this.shingleSize; + this.capacity = builder.capacity; + this.refCountMap = new HashMap<>(); + + if (builder.refCount == null) { + int size = (int) builder.initialPointStoreSize.orElse(builder.capacity); + currentStoreCapacity = size; + this.indexManager = new IntervalManager(size); + startOfFreeSegment = 0; + refCount = new byte[size]; + if (internalShinglingEnabled) { + nextSequenceIndex = 0; + internalShingle = new double[dimensions]; + } + store = new double[currentStoreCapacity * dimensions]; + locationList = new int[currentStoreCapacity]; + Arrays.fill(locationList, INFEASIBLE_LOCN); + } else { + this.refCount = new byte[builder.refCount.length]; + for (int i = 0; i < refCount.length; i++) { + if (builder.refCount[i] >= 0 && builder.refCount[i] <= 255) { + refCount[i] = (byte) builder.refCount[i]; + } else if (builder.refCount[i] > 255) { + refCount[i] = (byte) 255; + refCountMap.put(i, builder.refCount[i] - 255); + } + } + this.startOfFreeSegment = builder.startOfFreeSegment; + this.nextSequenceIndex = builder.nextTimeStamp; + this.currentStoreCapacity = builder.currentStoreCapacity; + if (internalShinglingEnabled) { + this.internalShingle = (builder.knownShingle != null) ? Arrays.copyOf(builder.knownShingle, dimensions) + : new double[dimensions]; + } + BitSet bits = new BitSet(refCount.length); + for (int i = 0; i < refCount.length; i++) { + if ((refCount[i] & 0xff) > 0) { + bits.set(i); + } + } + int first = bits.nextClearBit(0); + Stack stack = new Stack<>(); + + while (first < refCount.length) { + int last = bits.nextSetBit(first) - 1; + if (last >= first) { + stack.push(new int[] { first, last }); + first = last + 1; + } else { // we do not all distiction between all full and all empty + if (first < refCount.length - 1) { + if (bits.nextClearBit(first + 1) == first + 1) { + stack.push(new int[] { first, refCount.length - 1 }); + } else { + stack.push(new int[] { first, first }); + } + } else { + stack.push(new int[] { refCount.length - 1, refCount.length - 1 }); + } + break; + } + } + indexManager = new IntervalManager(stack, builder.indexCapacity); + store = (builder.store == null) ? new double[currentStoreCapacity * dimensions] : builder.store; + this.locationList = builder.locationList; + } + } + void resizeStore() { int maxCapacity = (rotationEnabled) ? 2 * capacity : capacity; - int newCapacity = Math.min(2 * currentStoreCapacity, maxCapacity); + int newCapacity = (int) Math.floor(Math.min(1.1 * currentStoreCapacity, maxCapacity)); if (newCapacity > currentStoreCapacity) { double[] newStore = new double[newCapacity * dimensions]; System.arraycopy(store, 0, newStore, 0, currentStoreCapacity * dimensions); @@ -56,18 +784,18 @@ void resizeStore() { } } - @Override boolean checkShingleAlignment(int location, double[] point) { boolean test = (location - dimensions + baseDimension >= 0); for (int i = 0; i < dimensions - baseDimension && test; i++) { - test = (point[i] == store[location - dimensions + baseDimension + i]); + test = (((float) point[i]) == store[location - dimensions + baseDimension + i]); } return test; } - @Override void copyPoint(double[] point, int src, int location, int length) { - System.arraycopy(point, src, store, location, length); + for (int i = 0; i < length; i++) { + store[location + i] = point[src + i]; + } } /** @@ -87,9 +815,10 @@ void copyPoint(double[] point, int src, int location, int length) { @Override public boolean pointEquals(int index, double[] point) { - indexManager.checkValidIndex(index); + checkArgument(index >= 0 && index < locationList.length, " index not supported by store"); checkArgument(point.length == dimensions, "point.length must be equal to dimensions"); int address = locationList[index]; + checkArgument(locationList[index] != INFEASIBLE_LOCN, " invalid point"); if (!rotationEnabled) { for (int j = 0; j < dimensions; j++) { if (point[j] != store[j + address]) { @@ -103,6 +832,7 @@ public boolean pointEquals(int index, double[] point) { } } } + return true; } @@ -118,8 +848,9 @@ public boolean pointEquals(int index, double[] point) { */ @Override public double[] get(int index) { - indexManager.checkValidIndex(index); + checkArgument(index >= 0 && index < locationList.length, " index not supported by store"); int address = locationList[index]; + checkArgument(locationList[index] != INFEASIBLE_LOCN, " invalid point"); if (!rotationEnabled) { return Arrays.copyOfRange(store, address, address + dimensions); } else { @@ -131,7 +862,6 @@ public double[] get(int index) { } } - // same as get; allows a multiplier to enable convex combinations public double[] getScaledPoint(int index, double factor) { double[] answer = get(index); for (int i = 0; i < dimensions; i++) { @@ -140,18 +870,14 @@ public double[] getScaledPoint(int index, double factor) { return answer; } - @Override public String toString(int index) { return Arrays.toString(get(index)); } - @Override void copyTo(int dest, int source, int length) { // validateInternalState(dest <= source, "error"); - if (dest < source) { - for (int i = 0; i < length; i++) { - store[dest + i] = store[source + i]; - } + for (int i = 0; i < length; i++) { + store[dest + i] = store[source + i]; } } @@ -159,16 +885,4 @@ public static Builder builder() { return new Builder(); } - public static class Builder extends PointStore.Builder { - private double[] store = null; - - public Builder store(double[] store) { - this.store = store; - return this; - } - - public PointStoreDouble build() { - return new PointStoreDouble(this); - } - } } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreFloat.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreFloat.java deleted file mode 100644 index 8617fa7b..00000000 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreFloat.java +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -package com.amazon.randomcutforest.store; - -import static com.amazon.randomcutforest.CommonUtils.checkArgument; - -import java.util.Arrays; - -/** - * PointStoreFloat is a PointStore defined on base type FLoat - */ -public class PointStoreFloat extends PointStore { - - public PointStoreFloat(Builder builder) { - super(builder); - checkArgument(builder.store == null || builder.store.length == currentStoreCapacity * dimensions, - " incorrect store length"); - store = (builder.store == null) ? new float[currentStoreCapacity * dimensions] : builder.store; - } - - public PointStoreFloat(int dimensions, int capacity) { - this(new Builder().dimensions(dimensions).shingleSize(1).capacity(capacity).initialSize(capacity)); - } - - @Override - void resizeStore() { - int maxCapacity = (rotationEnabled) ? 2 * capacity : capacity; - int newCapacity = Math.min(2 * currentStoreCapacity, maxCapacity); - if (newCapacity > currentStoreCapacity) { - float[] newStore = new float[newCapacity * dimensions]; - System.arraycopy(store, 0, newStore, 0, currentStoreCapacity * dimensions); - currentStoreCapacity = newCapacity; - store = newStore; - } - } - - @Override - boolean checkShingleAlignment(int location, double[] point) { - boolean test = (location - dimensions + baseDimension >= 0); - for (int i = 0; i < dimensions - baseDimension && test; i++) { - test = (((float) point[i]) == store[location - dimensions + baseDimension + i]); - } - return test; - } - - @Override - void copyPoint(double[] point, int src, int location, int length) { - for (int i = 0; i < length; i++) { - store[location + i] = (float) point[src + i]; - } - } - - /** - * Test whether the given point is equal to the point stored at the given index. - * This operation uses point-wise == to test for equality. - * - * @param index The index value of the point we are comparing to. - * @param point The point we are comparing for equality. - * @return true if the point stored at the index is equal to the given point, - * false otherwise. - * @throws IllegalArgumentException if the index value is not valid. - * @throws IllegalArgumentException if the current reference count for this - * index is nonpositive. - * @throws IllegalArgumentException if the length of the point does not match - * the point store's dimensions. - */ - - @Override - public boolean pointEquals(int index, float[] point) { - indexManager.checkValidIndex(index); - checkArgument(point.length == dimensions, "point.length must be equal to dimensions"); - int address = directLocationMap ? index * dimensions : locationList[index]; - if (!rotationEnabled) { - for (int j = 0; j < dimensions; j++) { - if (point[j] != store[j + address]) { - return false; - } - } - } else { - for (int j = 0; j < dimensions; j++) { - if (point[(j + address) % dimensions] != store[j + address]) { - return false; - } - } - } - - return true; - } - - /** - * Get a copy of the point at the given index. - * - * @param index An index value corresponding to a storage location in this point - * store. - * @return a copy of the point stored at the given index. - * @throws IllegalArgumentException if the index value is not valid. - * @throws IllegalArgumentException if the current reference count for this - * index is nonpositive. - */ - @Override - public float[] get(int index) { - indexManager.checkValidIndex(index); - int address = locationList[index]; - if (!rotationEnabled) { - return Arrays.copyOfRange(store, address, address + dimensions); - } else { - float[] answer = new float[dimensions]; - for (int i = 0; i < dimensions; i++) { - answer[(address + i) % dimensions] = store[address + i]; - } - return answer; - } - } - - public float[] getScaledPoint(int index, double factor) { - float[] answer = get(index); - for (int i = 0; i < dimensions; i++) { - answer[i] *= factor; - } - return answer; - } - - @Override - public String toString(int index) { - return Arrays.toString(get(index)); - } - - @Override - void copyTo(int dest, int source, int length) { - // validateInternalState(dest <= source, "error"); - for (int i = 0; i < length; i++) { - store[dest + i] = store[source + i]; - } - } - - public static Builder builder() { - return new Builder(); - } - - public static class Builder extends PointStore.Builder { - private float[] store = null; - - public PointStoreFloat.Builder store(float[] store) { - this.store = store; - return this; - } - - public PointStoreFloat build() { - return new PointStoreFloat(this); - } - } -} diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/RCF3PointStoreLarge.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreLarge.java similarity index 61% rename from Java/core/src/main/java/com/amazon/randomcutforest/store/RCF3PointStoreLarge.java rename to Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreLarge.java index 07484259..9571b52f 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/RCF3PointStoreLarge.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreLarge.java @@ -19,9 +19,9 @@ import java.util.Arrays; -public class RCF3PointStoreLarge extends RCF3PointStore { +public class PointStoreLarge extends PointStore { - static int INFEASIBLE_LOCN = (int) -1; + public static int INFEASIBLE_LOCN = (int) -1; protected int[] locationList; void setInfeasiblePointstoreLocationIndex(int index) { @@ -48,11 +48,36 @@ int locationListLength() { return locationList.length; } - public RCF3PointStoreLarge(RCF3PointStore.Builder builder) { + public PointStoreLarge(PointStore.Builder builder) { super(builder); checkArgument(dimensions * capacity < Integer.MAX_VALUE, " incorrect parameters"); - locationList = new int[currentStoreCapacity]; - Arrays.fill(locationList, INFEASIBLE_LOCN); + if (builder.locationList != null) { + locationList = Arrays.copyOf(builder.locationList, builder.locationList.length); + } else { + locationList = new int[currentStoreCapacity]; + Arrays.fill(locationList, INFEASIBLE_LOCN); + } + } + + @Override + public int size() { + int count = 0; + for (int i = 0; i < locationList.length; i++) { + if (locationList[i] != INFEASIBLE_LOCN) { + ++count; + } + } + return count; + } + + @Override + protected void checkFeasible(int index) { + checkArgument(locationList[index] != INFEASIBLE_LOCN, " invalid point"); + } + + @Override + public int[] getLocationList() { + return Arrays.copyOf(locationList, locationList.length); } } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/RCF3PointStoreSmall.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreSmall.java similarity index 56% rename from Java/core/src/main/java/com/amazon/randomcutforest/store/RCF3PointStoreSmall.java rename to Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreSmall.java index 1563f132..b6c8a360 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/RCF3PointStoreSmall.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreSmall.java @@ -19,7 +19,7 @@ import java.util.Arrays; -public class RCF3PointStoreSmall extends RCF3PointStore { +public class PointStoreSmall extends PointStore { public static char INFEASIBLE_SMALL_POINTSTORE_LOCN = (char) -1; protected char[] locationList; @@ -50,11 +50,46 @@ int locationListLength() { return locationList.length; } - public RCF3PointStoreSmall(RCF3PointStore.Builder builder) { + public PointStoreSmall(PointStore.Builder builder) { super(builder); checkArgument(shingleSize * capacity < Character.MAX_VALUE, " incorrect parameters"); - locationList = new char[currentStoreCapacity]; - Arrays.fill(locationList, INFEASIBLE_SMALL_POINTSTORE_LOCN); + if (builder.locationList != null) { + locationList = new char[builder.locationList.length]; + for (int i = 0; i < locationList.length; i++) { + locationList[i] = (char) builder.locationList[i]; + } + } else { + locationList = new char[currentStoreCapacity]; + Arrays.fill(locationList, INFEASIBLE_SMALL_POINTSTORE_LOCN); + } + } + + public PointStoreSmall(int dimensions, int capacity) { + this(PointStore.builder().capacity(capacity).dimensions(dimensions).shingleSize(1).initialSize(capacity)); + } + + @Override + protected void checkFeasible(int index) { + checkArgument(locationList[index] != INFEASIBLE_SMALL_POINTSTORE_LOCN, " invalid point"); } + @Override + public int size() { + int count = 0; + for (int i = 0; i < locationList.length; i++) { + if (locationList[i] != INFEASIBLE_SMALL_POINTSTORE_LOCN) { + ++count; + } + } + return count; + } + + @Override + public int[] getLocationList() { + int[] answer = new int[locationList.length]; + for (int i = 0; i < locationList.length; i++) { + answer[i] = locationList[i]; + } + return answer; + } } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/RCF3PointStore.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/RCF3PointStore.java deleted file mode 100644 index 29da4a42..00000000 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/RCF3PointStore.java +++ /dev/null @@ -1,810 +0,0 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -package com.amazon.randomcutforest.store; - -import static com.amazon.randomcutforest.CommonUtils.checkArgument; -import static com.amazon.randomcutforest.CommonUtils.checkState; -import static java.lang.Math.max; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.Optional; -import java.util.Vector; - -public abstract class RCF3PointStore implements IPointStore { - - public static int INFEASIBLE_POINTSTORE_INDEX = -1; - /** - * an index manager to manage free locations - */ - protected IntervalManager indexManager; - /** - * generic store class - */ - protected float[] store; - /** - * generic internal shingle, note that input is doubles - */ - protected double[] internalShingle; - /** - * enable rotation of shingles; use a cyclic buffer instead of sliding window - */ - boolean rotationEnabled; - /** - * last seen timestamp for internal shingling - */ - protected long nextSequenceIndex; - /** - * pointers to store locations, this decouples direct addressing and points can - * be moved internally - */ - // protected char[] locationList; - - /** - * refCount[i] counts of the number of trees that are currently using the point - * determined by locationList[i] or (for directLocationMapping) the point at - * store[i * dimensions] - */ - protected byte[] refCount; - - protected HashMap refCountMap; - /** - * first location where new data can be safely copied; - */ - int startOfFreeSegment; - /** - * overall dimension of the point (after shingling) - */ - int dimensions; - /** - * shingle size, if known. Setting shingle size = 1 rules out overlapping - */ - int shingleSize; - /** - * number of original dimensions which are shingled to produce and overall point - * dimensions = shingleSize * baseDimensions. However there is a possibility - * that even though the data is shingled, we may not choose to use the - * overlapping (say for out of order updates). - */ - int baseDimension; - - /** - * maximum capacity - */ - int capacity; - /** - * current capacity of store (number of shingled points) - */ - int currentStoreCapacity; - - /** - * enabling internal shingling - */ - boolean internalShinglingEnabled; - - abstract void setInfeasiblePointstoreLocationIndex(int index); - - abstract void extendLocationList(int newCapacity); - - abstract void setLocation(int index, int location); - - abstract int getLocation(int index); - - /** - * Decrement the reference count for the given index. - * - * @param index The index value. - * @throws IllegalArgumentException if the index value is not valid. - * @throws IllegalArgumentException if the current reference count for this - * index is non positive. - */ - @Override - public int decrementRefCount(int index) { - checkArgument((refCount[index] & 0xff) > 0, " cannot decrement index"); - Integer value = refCountMap.remove(index); - if (value == null) { - if ((refCount[index] & 0xff) == 1) { - indexManager.releaseIndex(index); - refCount[index] = (byte) 0; - setInfeasiblePointstoreLocationIndex(index); - return 0; - } else { - int newVal = (byte) ((refCount[index] & 0xff) - 1); - refCount[index] = (byte) newVal; - return newVal; - } - } else { - if (value > 1) { - refCountMap.put(index, value - 1); - } - return value - 1 + (refCount[index] & 0xff); - } - } - - /** - * takes an index from the index manager and rezises if necessary also adjusts - * refCount size to have increment/decrement be seamless - * - * @return an index from the index manager - */ - int takeIndex() { - if (indexManager.isEmpty()) { - if (indexManager.getCapacity() < capacity) { - int oldCapacity = indexManager.getCapacity(); - int newCapacity = Math.min(capacity, 1 + (int) Math.floor(1.1 * oldCapacity)); - indexManager.extendCapacity(newCapacity); - refCount = Arrays.copyOf(refCount, newCapacity); - extendLocationList(newCapacity); - } else { - throw new IllegalStateException(" index manager in point store is full "); - } - } - return indexManager.takeIndex(); - } - - protected int getAmountToWrite(double[] tempPoint) { - if (checkShingleAlignment(startOfFreeSegment, tempPoint)) { - if (!rotationEnabled - || startOfFreeSegment % dimensions == (nextSequenceIndex - 1) * baseDimension % dimensions) { - return baseDimension; - } - } else if (!rotationEnabled) { - return dimensions; - - } - // the following adds the padding for what exists; - // then the padding for the new part; all mod (dimensions) - // note that the expression is baseDimension when the condition - // startOfFreeSegment % dimensions == (nextSequenceIndex-1)*baseDimension % - // dimension - // is met - return dimensions + (dimensions - startOfFreeSegment % dimensions - + (int) ((nextSequenceIndex) * baseDimension) % dimensions) % dimensions; - } - - /** - * Add a point to the point store and return the index of the stored point. - * - * @param point The point being added to the store. - * @param sequenceNum sequence number of the point - * @return the index value of the stored point. - * @throws IllegalArgumentException if the length of the point does not match - * the point store's dimensions. - * @throws IllegalStateException if the point store is full. - */ - public int add(double[] point, long sequenceNum) { - checkArgument(internalShinglingEnabled || point.length == dimensions, - "point.length must be equal to dimensions"); - checkArgument(!internalShinglingEnabled || point.length == baseDimension, - "point.length must be equal to dimensions"); - - double[] tempPoint = point; - nextSequenceIndex++; - if (internalShinglingEnabled) { - // rotation is supported via the output and input is unchanged - tempPoint = constructShingleInPlace(internalShingle, point, false); - if (nextSequenceIndex < shingleSize) { - return INFEASIBLE_POINTSTORE_INDEX; - } - } - int nextIndex; - - int amountToWrite = getAmountToWrite(tempPoint); - - if (startOfFreeSegment > currentStoreCapacity * dimensions - amountToWrite) { - // try compaction and then resizing - compact(); - // the compaction can change the array contents - amountToWrite = getAmountToWrite(tempPoint); - if (startOfFreeSegment > currentStoreCapacity * dimensions - amountToWrite) { - resizeStore(); - checkState(startOfFreeSegment + amountToWrite <= currentStoreCapacity * dimensions, "out of space"); - } - } - - nextIndex = takeIndex(); - - setLocation(nextIndex, startOfFreeSegment - dimensions + amountToWrite); - if (amountToWrite <= dimensions) { - copyPoint(tempPoint, dimensions - amountToWrite, startOfFreeSegment, amountToWrite); - } else { - copyPoint(tempPoint, 0, startOfFreeSegment + amountToWrite - dimensions, dimensions); - } - startOfFreeSegment += amountToWrite; - - refCount[nextIndex] = 1; - return nextIndex; - } - - /** - * Increment the reference count for the given index. This operation assumes - * that there is currently a point stored at the given index and will throw an - * exception if that's not the case. - * - * @param index The index value. - * @throws IllegalArgumentException if the index value is not valid. - * @throws IllegalArgumentException if the current reference count for this - * index is non positive. - */ - public int incrementRefCount(int index) { - // indexManager.checkValidIndex(index); - Integer value = refCountMap.remove(index); - if (value == null) { - if ((refCount[index] & 0xff) == 255) { - refCountMap.put(index, 1); - return 256; - } else { - int newVal = (byte) ((refCount[index] & 0xff) + 1); - refCount[index] = (byte) newVal; - return newVal; - } - } else { - refCountMap.put(index, value + 1); - return value + 1; - } - } - - @Override - public int getDimensions() { - return dimensions; - } - - /** - * maximum capacity, in number of points of size dimensions - */ - public int getCapacity() { - return capacity; - } - - /** - * capacity of the indices - */ - public int getIndexCapacity() { - return indexManager.getCapacity(); - } - - /** - * used in mapper - * - * @return gets the shingle size (if known, otherwise is 1) - */ - public int getShingleSize() { - return shingleSize; - } - - /** - * gets the current store capacity in the number of points with dimension many - * values - * - * @return capacity in number of points - */ - public int getCurrentStoreCapacity() { - return currentStoreCapacity; - } - - /** - * used for mappers - * - * @return the store that stores the values - */ - public float[] getStore() { - return store; - } - - /** - * used for mapper - * - * @return the array of counts referring to different points - */ - public int[] getRefCount() { - int[] newarray = new int[refCount.length]; - for (int i = 0; i < refCount.length; i++) { - newarray[i] = refCount[i] & 0xff; - Integer value = refCountMap.get(i); - if (value != null) { - newarray[i] += value; - } - } - return newarray; - } - - /** - * useful in mapper to not copy - * - * @return the length of the prefix - */ - public int getStartOfFreeSegment() { - return startOfFreeSegment; - } - - /** - * used in mapper - * - * @return if shingling is performed internally - */ - public boolean isInternalShinglingEnabled() { - return internalShinglingEnabled; - } - - /** - * used in mapper and in extrapolation - * - * @return the last timestamp seen - */ - public long getNextSequenceIndex() { - return nextSequenceIndex; - } - - /** - * used to obtain the most recent shingle seen so far in case of internal - * shingling - * - * @return for internal shingling, returns the last seen shingle - */ - public double[] getInternalShingle() { - checkState(internalShinglingEnabled, "internal shingling is not enabled"); - return copyShingle(); - } - - /** - * The following function eliminates redundant information that builds up in the - * point store and shrinks the point store - */ - - abstract int locationListLength(); - - public void compact() { - - Vector reverseReference = new Vector<>(); - for (int i = 0; i < locationListLength(); i++) { - int locn = getLocation(i); - if (locn < currentStoreCapacity * dimensions && locn >= 0) { - reverseReference.add(new Integer[] { locn, i }); - } - } - reverseReference.sort((o1, o2) -> o1[0].compareTo(o2[0])); - int freshStart = 0; - int jStatic = 0; - int jDynamic = 0; - int jEnd = reverseReference.size(); - while (jStatic < jEnd) { - int blockStart = reverseReference.get(jStatic)[0]; - int blockEnd = blockStart + dimensions; - int initial = 0; - if (rotationEnabled) { - initial = (dimensions - freshStart + blockStart) % dimensions; - } - int k = jStatic + 1; - jDynamic = jStatic + 1; - while (k < jEnd) { - int newElem = reverseReference.get(k)[0]; - if (blockEnd >= newElem) { - k += 1; - jDynamic += 1; - blockEnd = max(blockEnd, newElem + dimensions); - } else { - k = jEnd; - } - } - - // aligning the boundaries - for (int i = 0; i < initial; i++) { - store[freshStart] = 0; - ++freshStart; - } - - for (int i = blockStart; i < blockEnd; i++) { - store[freshStart] = store[i]; - assert (!rotationEnabled || freshStart % dimensions == i % dimensions); - - if (jStatic < jEnd) { - int locn = reverseReference.get(jStatic)[0]; - if (i == locn) { - int newIdx = reverseReference.get(jStatic)[1]; - setLocation(newIdx, freshStart); - jStatic += 1; - } - } - freshStart += 1; - } - - if (jStatic != jDynamic) { - throw new IllegalStateException("There is discepancy in indices"); - } - } - startOfFreeSegment = freshStart; - } - - /** - * returns the number of copies of a point - * - * @param i index of a point - * @return number of copies of the point managed by the store - */ - public int getRefCount(int i) { - int val = refCount[i] & 0xff; - Integer value = refCountMap.get(i); - if (value != null) { - val += value; - } - return val; - } - - @Override - public boolean isInternalRotationEnabled() { - return rotationEnabled; - } - - /** - * transforms a point to a shingled point if internal shingling is turned on - * - * @param point new input values - * @return shingled point - */ - @Override - public double[] transformToShingledPoint(double[] point) { - checkArgument(internalShinglingEnabled, " only allowed for internal shingling"); - checkArgument(point.length == baseDimension, " incorrect length"); - return constructShingleInPlace(copyShingle(), point, rotationEnabled); - } - - private double[] copyShingle() { - if (!rotationEnabled) { - return Arrays.copyOf(internalShingle, dimensions); - } else { - double[] answer = new double[dimensions]; - int offset = (int) (nextSequenceIndex * baseDimension); - for (int i = 0; i < dimensions; i++) { - answer[(offset + i) % dimensions] = internalShingle[i]; - } - return answer; - } - } - - /** - * the following function is used to update the shingle in place; it can be used - * to produce new copies as well - * - * @param target the array containing the shingled point - * @param point the new values - * @return the array which now contains the updated shingle - */ - protected double[] constructShingleInPlace(double[] target, double[] point, boolean rotationEnabled) { - if (!rotationEnabled) { - for (int i = 0; i < dimensions - baseDimension; i++) { - target[i] = target[i + baseDimension]; - } - for (int i = 0; i < baseDimension; i++) { - target[dimensions - baseDimension + i] = (point[i] == 0.0) ? 0.0 : point[i]; - } - } else { - int offset = ((int) (nextSequenceIndex * baseDimension) % dimensions); - for (int i = 0; i < baseDimension; i++) { - target[offset + i] = (point[i] == 0.0) ? 0.0 : point[i]; - } - } - return target; - } - - /** - * for extrapolation and imputation, in presence of internal shingling we need - * to update the list of missing values from the space of the input dimensions - * to the shingled dimensions - * - * @param indexList list of missing values in the input point - * @return list of missing values in the shingled point - */ - @Override - public int[] transformIndices(int[] indexList) { - checkArgument(internalShinglingEnabled, " only allowed for internal shingling"); - checkArgument(indexList.length <= baseDimension, " incorrect length"); - int[] results = Arrays.copyOf(indexList, indexList.length); - if (!rotationEnabled) { - for (int i = 0; i < indexList.length; i++) { - checkArgument(results[i] < baseDimension, "incorrect index"); - results[i] += dimensions - baseDimension; - } - } else { - int offset = ((int) (nextSequenceIndex * baseDimension) % dimensions); - for (int i = 0; i < indexList.length; i++) { - checkArgument(results[i] < baseDimension, "incorrect index"); - results[i] = (results[i] + offset) % dimensions; - } - } - return results; - } - - /** - * a builder - */ - - public static class Builder> { - - // We use Optional types for optional primitive fields when it doesn't make - // sense to use a constant default. - - protected int dimensions; - protected int shingleSize = 1; - protected int baseDimension; - protected boolean internalRotationEnabled = false; - protected boolean internalShinglingEnabled = false; - protected int capacity; - protected Optional initialPointStoreSize = Optional.empty(); - protected int currentStoreCapacity = 0; - protected int indexCapacity = 0; - protected float[] store = null; - protected double[] knownShingle = null; - protected int[] locationList = null; - protected int[] refCount = null; - protected long nextTimeStamp = 0; - protected int startOfFreeSegment = 0; - - // dimension of the points being stored - public T dimensions(int dimensions) { - this.dimensions = dimensions; - return (T) this; - } - - // maximum number of points in the store - public T capacity(int capacity) { - this.capacity = capacity; - return (T) this; - } - - // initial size of the pointstore, dynamicResizing must be on - // and value cannot exceed capacity - public T initialSize(int initialPointStoreSize) { - this.initialPointStoreSize = Optional.of(initialPointStoreSize); - return (T) this; - } - - // shingleSize for opportunistic compression - public T shingleSize(int shingleSize) { - this.shingleSize = shingleSize; - return (T) this; - } - - // is internal shingling enabled - public T internalShinglingEnabled(boolean internalShinglingEnabled) { - this.internalShinglingEnabled = internalShinglingEnabled; - return (T) this; - } - - // are shingles rotated - public T internalRotationEnabled(boolean internalRotationEnabled) { - this.internalRotationEnabled = internalRotationEnabled; - return (T) this; - } - - // the size of the array storing the specific points - // this is used for serialization - public T currentStoreCapacity(int currentStoreCapacity) { - this.currentStoreCapacity = currentStoreCapacity; - return (T) this; - } - - // the size of the pointset being tracked - // this is used for serialization - public T indexCapacity(int indexCapacity) { - this.indexCapacity = indexCapacity; - return (T) this; - } - - // last known shingle, if internalshingle is on - // this shingle is not rotated - // this is used for serialization - public T knownShingle(double[] knownShingle) { - this.knownShingle = knownShingle; - return (T) this; - } - - // count of the points being tracked - // used for serialization - public T refCount(int[] refCount) { - this.refCount = refCount; - return (T) this; - } - - // location of the points being tracked, if not directmapped - // used for serialization - public T locationList(int[] locationList) { - this.locationList = locationList; - return (T) this; - } - - public T store(float[] store) { - this.store = store; - return (T) this; - } - - // location of where points can be written - // used for serialization - public T startOfFreeSegment(int startOfFreeSegment) { - this.startOfFreeSegment = startOfFreeSegment; - return (T) this; - } - - // the next timeStamp to accept - // used for serialization - public T nextTimeStamp(long nextTimeStamp) { - this.nextTimeStamp = nextTimeStamp; - return (T) this; - } - - public IPointStore build() { - if (shingleSize * capacity < Character.MAX_VALUE) { - return new RCF3PointStoreSmall(this); - } else { - return new RCF3PointStoreLarge(this); - } - } - } - - public RCF3PointStore(RCF3PointStore.Builder builder) { - checkArgument(builder.dimensions > 0, "dimensions must be greater than 0"); - checkArgument(builder.capacity > 0, "capacity must be greater than 0"); - checkArgument(builder.shingleSize == 1 || builder.dimensions == builder.shingleSize - || builder.dimensions % builder.shingleSize == 0, "incorrect use of shingle size"); - /** - * the following checks are due to mappers (kept for future) - */ - if (builder.refCount != null || builder.locationList != null || builder.knownShingle != null) { - checkArgument(builder.refCount != null, "reference count must be present"); - checkArgument(builder.locationList != null, "location list must be present"); - checkArgument(builder.refCount.length == builder.indexCapacity, "incorrect reference count length"); - // following may change if IndexManager is dynamically resized as well - checkArgument(builder.locationList.length == builder.indexCapacity, " incorrect length of locations"); - checkArgument( - builder.knownShingle == null - || builder.internalShinglingEnabled && builder.knownShingle.length == builder.dimensions, - "incorrect shingling information"); - } - - this.shingleSize = builder.shingleSize; - this.dimensions = builder.dimensions; - this.internalShinglingEnabled = builder.internalShinglingEnabled; - this.rotationEnabled = builder.internalRotationEnabled; - this.baseDimension = this.dimensions / this.shingleSize; - this.capacity = builder.capacity; - this.refCountMap = new HashMap<>(); - - if (builder.refCount == null) { - int size = (int) builder.initialPointStoreSize.orElse(builder.capacity); - currentStoreCapacity = size; - this.indexManager = new IntervalManager(size); - startOfFreeSegment = 0; - refCount = new byte[size]; - if (internalShinglingEnabled) { - nextSequenceIndex = 0; - internalShingle = new double[dimensions]; - } - store = new float[currentStoreCapacity * dimensions]; - } else { - throw new IllegalStateException("not yet supported"); - } - } - - void resizeStore() { - int maxCapacity = (rotationEnabled) ? 2 * capacity : capacity; - int newCapacity = (int) Math.floor(Math.min(1.1 * currentStoreCapacity, maxCapacity)); - if (newCapacity > currentStoreCapacity) { - float[] newStore = new float[newCapacity * dimensions]; - System.arraycopy(store, 0, newStore, 0, currentStoreCapacity * dimensions); - currentStoreCapacity = newCapacity; - store = newStore; - } - } - - boolean checkShingleAlignment(int location, double[] point) { - boolean test = (location - dimensions + baseDimension >= 0); - for (int i = 0; i < dimensions - baseDimension && test; i++) { - test = (((float) point[i]) == store[location - dimensions + baseDimension + i]); - } - return test; - } - - void copyPoint(double[] point, int src, int location, int length) { - for (int i = 0; i < length; i++) { - store[location + i] = (float) point[src + i]; - } - } - - /** - * Test whether the given point is equal to the point stored at the given index. - * This operation uses point-wise == to test for equality. - * - * @param index The index value of the point we are comparing to. - * @param point The point we are comparing for equality. - * @return true if the point stored at the index is equal to the given point, - * false otherwise. - * @throws IllegalArgumentException if the index value is not valid. - * @throws IllegalArgumentException if the current reference count for this - * index is nonpositive. - * @throws IllegalArgumentException if the length of the point does not match - * the point store's dimensions. - */ - - @Override - public boolean pointEquals(int index, float[] point) { - // indexManager.checkValidIndex(index); - checkArgument(point.length == dimensions, "point.length must be equal to dimensions"); - int address = getLocation(index); - if (!rotationEnabled) { - for (int j = 0; j < dimensions; j++) { - if (point[j] != store[j + address]) { - return false; - } - } - } else { - for (int j = 0; j < dimensions; j++) { - if (point[(j + address) % dimensions] != store[j + address]) { - return false; - } - } - } - - return true; - } - - /** - * Get a copy of the point at the given index. - * - * @param index An index value corresponding to a storage location in this point - * store. - * @return a copy of the point stored at the given index. - * @throws IllegalArgumentException if the index value is not valid. - * @throws IllegalArgumentException if the current reference count for this - * index is nonpositive. - */ - @Override - public float[] get(int index) { - // indexManager.checkValidIndex(index); - int address = getLocation(index); - if (!rotationEnabled) { - return Arrays.copyOfRange(store, address, address + dimensions); - } else { - float[] answer = new float[dimensions]; - for (int i = 0; i < dimensions; i++) { - answer[(address + i) % dimensions] = store[address + i]; - } - return answer; - } - } - - public float[] getScaledPoint(int index, double factor) { - float[] answer = get(index); - for (int i = 0; i < dimensions; i++) { - answer[i] *= factor; - } - return answer; - } - - public String toString(int index) { - return Arrays.toString(get(index)); - } - - void copyTo(int dest, int source, int length) { - // validateInternalState(dest <= source, "error"); - for (int i = 0; i < length; i++) { - store[dest + i] = store[source + i]; - } - } - - public static Builder builder() { - return new Builder(); - } - -} diff --git a/Java/core/src/test/java/com/amazon/randomcutforest/RandomCutForestShingledFunctionalTest.java b/Java/core/src/test/java/com/amazon/randomcutforest/RandomCutForestShingledFunctionalTest.java index f43f38cf..71aa5e17 100644 --- a/Java/core/src/test/java/com/amazon/randomcutforest/RandomCutForestShingledFunctionalTest.java +++ b/Java/core/src/test/java/com/amazon/randomcutforest/RandomCutForestShingledFunctionalTest.java @@ -28,7 +28,7 @@ import org.junit.jupiter.params.provider.ValueSource; import com.amazon.randomcutforest.config.Precision; -import com.amazon.randomcutforest.store.PointStoreFloat; +import com.amazon.randomcutforest.store.PointStore; import com.amazon.randomcutforest.testutils.MultiDimDataWithKey; import com.amazon.randomcutforest.testutils.NormalMixtureTestData; import com.amazon.randomcutforest.testutils.ShingledMultiDimDataWithKeys; @@ -161,11 +161,11 @@ public void InternalShinglingTest(boolean rotation) { assertEquals(firstResult, secondResult, 1e-10); assertEquals(secondResult, thirdResult, 1e-10); } - PointStoreFloat store = (PointStoreFloat) first.getUpdateCoordinator().getStore(); + PointStore store = (PointStore) first.getUpdateCoordinator().getStore(); assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length); - store = (PointStoreFloat) second.getUpdateCoordinator().getStore(); + store = (PointStore) second.getUpdateCoordinator().getStore(); assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length); - store = (PointStoreFloat) third.getUpdateCoordinator().getStore(); + store = (PointStore) third.getUpdateCoordinator().getStore(); assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length); } } diff --git a/Java/core/src/test/java/com/amazon/randomcutforest/state/RandomCutForestMapperTest.java b/Java/core/src/test/java/com/amazon/randomcutforest/state/RandomCutForestMapperTest.java index 7e57f842..7683f555 100644 --- a/Java/core/src/test/java/com/amazon/randomcutforest/state/RandomCutForestMapperTest.java +++ b/Java/core/src/test/java/com/amazon/randomcutforest/state/RandomCutForestMapperTest.java @@ -29,8 +29,8 @@ import com.amazon.randomcutforest.RandomCutForest; import com.amazon.randomcutforest.config.Precision; import com.amazon.randomcutforest.executor.PointStoreCoordinator; +import com.amazon.randomcutforest.store.PointStore; import com.amazon.randomcutforest.store.PointStoreDouble; -import com.amazon.randomcutforest.store.PointStoreFloat; import com.amazon.randomcutforest.testutils.NormalMixtureTestData; public class RandomCutForestMapperTest { @@ -86,8 +86,8 @@ public void assertCompactForestEquals(RandomCutForest forest, RandomCutForest fo assertEquals(store.size(), store2.size()); } else { - PointStoreFloat store = (PointStoreFloat) coordinator.getStore(); - PointStoreFloat store2 = (PointStoreFloat) coordinator2.getStore(); + PointStore store = (PointStore) coordinator.getStore(); + PointStore store2 = (PointStore) coordinator2.getStore(); assertArrayEquals(store.getRefCount(), store2.getRefCount()); assertArrayEquals(store.getStore(), store2.getStore()); assertEquals(store.getCapacity(), store2.getCapacity()); diff --git a/Java/core/src/test/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapperTest.java b/Java/core/src/test/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapperTest.java index adc3c231..b3606afa 100644 --- a/Java/core/src/test/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapperTest.java +++ b/Java/core/src/test/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapperTest.java @@ -21,7 +21,8 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import com.amazon.randomcutforest.store.PointStoreFloat; +import com.amazon.randomcutforest.store.PointStore; +import com.amazon.randomcutforest.store.PointStoreSmall; public class PointStoreFloatMapperTest { private PointStoreFloatMapper mapper; @@ -35,7 +36,7 @@ public void setUp() { public void testRoundTrip() { int dimensions = 2; int capacity = 4; - PointStoreFloat store = new PointStoreFloat(dimensions, capacity); + PointStore store = new PointStoreSmall(dimensions, capacity); double[] point1 = { 1.1, -22.2 }; int index1 = store.add(point1, 1); @@ -44,7 +45,7 @@ public void testRoundTrip() { double[] point3 = { 10.1, 100.1 }; int index3 = store.add(point3, 3); - PointStoreFloat store2 = mapper.toModel(mapper.toState(store)); + PointStore store2 = mapper.toModel(mapper.toState(store)); assertEquals(capacity, store2.getCapacity()); assertEquals(3, store2.size()); diff --git a/Java/core/src/test/java/com/amazon/randomcutforest/state/tree/CompactRandomCutTreeFloatMapperTest.java b/Java/core/src/test/java/com/amazon/randomcutforest/state/tree/CompactRandomCutTreeFloatMapperTest.java index fb4bbf9b..9d74af02 100644 --- a/Java/core/src/test/java/com/amazon/randomcutforest/state/tree/CompactRandomCutTreeFloatMapperTest.java +++ b/Java/core/src/test/java/com/amazon/randomcutforest/state/tree/CompactRandomCutTreeFloatMapperTest.java @@ -32,7 +32,7 @@ import com.amazon.randomcutforest.config.Precision; import com.amazon.randomcutforest.store.IPointStore; -import com.amazon.randomcutforest.store.PointStoreFloat; +import com.amazon.randomcutforest.store.PointStore; import com.amazon.randomcutforest.tree.CompactRandomCutTreeFloat; public class CompactRandomCutTreeFloatMapperTest { @@ -43,7 +43,8 @@ public class CompactRandomCutTreeFloatMapperTest { private static class TreeProvider implements ArgumentsProvider { @Override public Stream provideArguments(ExtensionContext extensionContext) throws Exception { - IPointStore pointStore = new PointStoreFloat(dimensions, capacity); + IPointStore pointStore = PointStore.builder().dimensions(dimensions).capacity(capacity) + .shingleSize(1).initialSize(capacity).build(); List indexes = new ArrayList<>(); for (int i = 0; i < capacity; i++) { diff --git a/Java/core/src/test/java/com/amazon/randomcutforest/store/PointStoreDoubleTest.java b/Java/core/src/test/java/com/amazon/randomcutforest/store/PointStoreDoubleTest.java index 67059a46..6416a201 100644 --- a/Java/core/src/test/java/com/amazon/randomcutforest/store/PointStoreDoubleTest.java +++ b/Java/core/src/test/java/com/amazon/randomcutforest/store/PointStoreDoubleTest.java @@ -166,7 +166,6 @@ public void internalshinglingTestNoRotation() { PointStoreDouble store = new PointStoreDouble.Builder().capacity(20 * shinglesize).dimensions(shinglesize) .shingleSize(shinglesize).indexCapacity(shinglesize).internalShinglingEnabled(true) .currentStoreCapacity(1).build(); - assertTrue(store.isDynamicResizingEnabled()); assertFalse(store.isInternalRotationEnabled()); Random random = new Random(0); double[] shingle = new double[shinglesize]; @@ -191,7 +190,6 @@ public void internalshinglingTestWithRotation() { PointStoreDouble store = new PointStoreDouble.Builder().capacity(20 * shinglesize).dimensions(shinglesize) .shingleSize(shinglesize).indexCapacity(shinglesize).internalShinglingEnabled(true) .internalRotationEnabled(true).currentStoreCapacity(1).build(); - assertTrue(store.isDynamicResizingEnabled()); assertTrue(store.isInternalRotationEnabled()); Random random = new Random(0); double[] shingle = new double[shinglesize]; diff --git a/Java/core/src/test/java/com/amazon/randomcutforest/store/PointStoreFloatTest.java b/Java/core/src/test/java/com/amazon/randomcutforest/store/PointStoreTest.java similarity index 93% rename from Java/core/src/test/java/com/amazon/randomcutforest/store/PointStoreFloatTest.java rename to Java/core/src/test/java/com/amazon/randomcutforest/store/PointStoreTest.java index 1ac6189f..e8c58971 100644 --- a/Java/core/src/test/java/com/amazon/randomcutforest/store/PointStoreFloatTest.java +++ b/Java/core/src/test/java/com/amazon/randomcutforest/store/PointStoreTest.java @@ -33,17 +33,17 @@ import com.amazon.randomcutforest.CommonUtils; -public class PointStoreFloatTest { +public class PointStoreTest { private int dimensions; private int capacity; - private PointStoreFloat pointStore; + private PointStore pointStore; @BeforeEach public void setUp() { dimensions = 2; capacity = 4; - pointStore = new PointStoreFloat(dimensions, capacity); + pointStore = new PointStoreSmall(dimensions, capacity); } @Test @@ -168,10 +168,9 @@ public void testPointEqualsInvalid() { @Test public void internalshinglingTestNoRotation() { int shinglesize = 10; - PointStoreFloat store = new PointStoreFloat.Builder().capacity(20 * shinglesize).dimensions(shinglesize) + PointStore store = new PointStore.Builder().capacity(20 * shinglesize).dimensions(shinglesize) .shingleSize(shinglesize).indexCapacity(shinglesize).internalShinglingEnabled(true) .currentStoreCapacity(1).build(); - assertTrue(store.isDynamicResizingEnabled()); assertFalse(store.isInternalRotationEnabled()); Random random = new Random(0); double[] shingle = new double[shinglesize]; @@ -192,10 +191,9 @@ public void internalshinglingTestNoRotation() { @Test public void internalshinglingTestWithRotation() { int shinglesize = 10; - PointStoreFloat store = new PointStoreFloat.Builder().capacity(20 * shinglesize).dimensions(shinglesize) + PointStore store = new PointStore.Builder().capacity(20 * shinglesize).dimensions(shinglesize) .shingleSize(shinglesize).indexCapacity(shinglesize).internalShinglingEnabled(true) .internalRotationEnabled(true).currentStoreCapacity(1).build(); - assertTrue(store.isDynamicResizingEnabled()); assertTrue(store.isInternalRotationEnabled()); Random random = new Random(0); double[] shingle = new double[shinglesize]; @@ -222,7 +220,7 @@ public void internalshinglingTestWithRotation() { @Test public void checkRotationAndCompact() { int shinglesize = 4; - PointStoreFloat store = new PointStoreFloat.Builder().capacity(2 * shinglesize).dimensions(shinglesize) + PointStore store = new PointStore.Builder().capacity(2 * shinglesize).dimensions(shinglesize) .shingleSize(shinglesize).indexCapacity(shinglesize).internalShinglingEnabled(true) .internalRotationEnabled(true).currentStoreCapacity(1).build(); for (int i = 0; i < 2 * shinglesize; i++) { @@ -265,9 +263,8 @@ public void checkRotationAndCompact() { @Test void CompactionTest() { int shinglesize = 2; - PointStoreFloat store = new PointStoreFloat.Builder().capacity(6).dimensions(shinglesize) - .shingleSize(shinglesize).indexCapacity(6).directLocationEnabled(false).internalShinglingEnabled(true) - .build(); + PointStore store = new PointStore.Builder().capacity(6).dimensions(shinglesize).shingleSize(shinglesize) + .indexCapacity(6).directLocationEnabled(false).internalShinglingEnabled(true).build(); store.add(new double[] { 0 }, 0L); for (int i = 0; i < 5; i++) { diff --git a/Java/core/src/test/java/com/amazon/randomcutforest/tree/CompactRandomCutTreeFloatTest.java b/Java/core/src/test/java/com/amazon/randomcutforest/tree/CompactRandomCutTreeFloatTest.java index 6c243391..341744eb 100644 --- a/Java/core/src/test/java/com/amazon/randomcutforest/tree/CompactRandomCutTreeFloatTest.java +++ b/Java/core/src/test/java/com/amazon/randomcutforest/tree/CompactRandomCutTreeFloatTest.java @@ -38,7 +38,7 @@ import com.amazon.randomcutforest.config.Config; import com.amazon.randomcutforest.sampler.Weighted; -import com.amazon.randomcutforest.store.PointStoreFloat; +import com.amazon.randomcutforest.store.PointStore; public class CompactRandomCutTreeFloatTest { @@ -50,8 +50,8 @@ public class CompactRandomCutTreeFloatTest { @BeforeEach public void setUp() { rng = mock(Random.class); - PointStoreFloat pointStoreFloat = new PointStoreFloat.Builder().indexCapacity(100).capacity(100) - .initialSize(100).dimensions(2).build(); + PointStore pointStoreFloat = new PointStore.Builder().indexCapacity(100).capacity(100).initialSize(100) + .dimensions(2).build(); tree = CompactRandomCutTreeFloat.builder().random(rng).centerOfMassEnabled(true).pointStore(pointStoreFloat) .storeSequenceIndexesEnabled(true).dimension(2).inputDimension(2).build(); @@ -367,8 +367,8 @@ public void testDeletePointInvalid() { @Test public void testUpdatesOnSmallBoundingBox() { // verifies on small bounding boxes random cuts and tree updates are functional - PointStoreFloat pointStoreFloat = new PointStoreFloat.Builder().indexCapacity(10).capacity(10) - .currentStoreCapacity(10).dimensions(1).build(); + PointStore pointStoreFloat = new PointStore.Builder().indexCapacity(10).capacity(10).currentStoreCapacity(10) + .dimensions(1).build(); CompactRandomCutTreeFloat tree = CompactRandomCutTreeFloat.builder().random(rng).pointStore(pointStoreFloat) .build(); diff --git a/Java/serialization/src/main/java/com/amazon/randomcutforest/serialize/json/v1/V1JsonToV2StateConverter.java b/Java/serialization/src/main/java/com/amazon/randomcutforest/serialize/json/v1/V1JsonToV2StateConverter.java index 0c7bc38b..d4559af4 100644 --- a/Java/serialization/src/main/java/com/amazon/randomcutforest/serialize/json/v1/V1JsonToV2StateConverter.java +++ b/Java/serialization/src/main/java/com/amazon/randomcutforest/serialize/json/v1/V1JsonToV2StateConverter.java @@ -34,8 +34,8 @@ import com.amazon.randomcutforest.state.store.PointStoreFloatMapper; import com.amazon.randomcutforest.state.store.PointStoreState; import com.amazon.randomcutforest.store.IPointStore; +import com.amazon.randomcutforest.store.PointStore; import com.amazon.randomcutforest.store.PointStoreDouble; -import com.amazon.randomcutforest.store.PointStoreFloat; import com.amazon.randomcutforest.tree.CompactRandomCutTreeDouble; import com.amazon.randomcutforest.tree.CompactRandomCutTreeFloat; import com.amazon.randomcutforest.tree.ITree; @@ -94,7 +94,8 @@ public SamplerConverter(int dimensions, int capacity, Precision precision, int m .maxSize(pointStore.getCapacity() + 1).storeSequenceIndexesEnabled(false) .centerOfMassEnabled(false).boundingBoxCacheFraction(1.0).build(); } else { - pointStore = new PointStoreFloat(dimensions, capacity); + pointStore = PointStore.builder().dimensions(dimensions).capacity(capacity).shingleSize(1) + .initialSize(capacity).build(); globalTree = new CompactRandomCutTreeFloat.Builder().pointStore(pointStore) .maxSize(pointStore.getCapacity() + 1).storeSequenceIndexesEnabled(false) .centerOfMassEnabled(false).boundingBoxCacheFraction(1.0).build(); @@ -108,7 +109,7 @@ public PointStoreState getPointStoreState(Precision precision) { if (precision == Precision.FLOAT_64) { return new PointStoreDoubleMapper().toState((PointStoreDouble) pointStore); } else { - return new PointStoreFloatMapper().toState((PointStoreFloat) pointStore); + return new PointStoreFloatMapper().toState((PointStore) pointStore); } } From c210eb792619a75c2e54d82ae9bfd2df7fff6a38 Mon Sep 17 00:00:00 2001 From: Sudipto Guha Date: Tue, 4 Jan 2022 12:42:23 -0800 Subject: [PATCH 2/4] changing intervalmanager to indexinterval manager and cleanup --- ...Manager.java => IndexIntervalManager.java} | 43 ++++++++++-- .../randomcutforest/store/PointStore.java | 67 ++++++------------- .../store/PointStoreDouble.java | 45 +------------ .../tree/AbstractNodeStore.java | 6 +- 4 files changed, 66 insertions(+), 95 deletions(-) rename Java/core/src/main/java/com/amazon/randomcutforest/store/{IntervalManager.java => IndexIntervalManager.java} (73%) diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/IntervalManager.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/IndexIntervalManager.java similarity index 73% rename from Java/core/src/main/java/com/amazon/randomcutforest/store/IntervalManager.java rename to Java/core/src/main/java/com/amazon/randomcutforest/store/IndexIntervalManager.java index 1a7ba22f..d591c4b4 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/IntervalManager.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/store/IndexIntervalManager.java @@ -19,21 +19,22 @@ import static com.amazon.randomcutforest.CommonUtils.checkState; import java.util.Arrays; +import java.util.BitSet; import java.util.Stack; /** * This class defines common functionality for Store classes, including * maintaining the stack of free pointers. */ -// to be renamed to IndexIntervalManager in next PR alongside ser/de changes -public class IntervalManager { + +public class IndexIntervalManager { protected int capacity; protected int[] freeIndexesStart; protected int[] freeIndexesEnd; protected int lastInUse; - public IntervalManager(int capacity) { + public IndexIntervalManager(int capacity) { checkArgument(capacity > 0, "incorrect parameters"); freeIndexesEnd = new int[1]; freeIndexesStart = new int[1]; @@ -43,19 +44,53 @@ public IntervalManager(int capacity) { freeIndexesEnd[0] = capacity - 1; } - public IntervalManager(Stack stack, int capacity) { + public IndexIntervalManager(int[] refCount, int capacity) { checkArgument(capacity > 0, "incorrect parameters"); + BitSet bits = new BitSet(refCount.length); + int numUsed = 0; + for (int i = 0; i < refCount.length; i++) { + if ((refCount[i] & 0xff) > 0) { + bits.set(i); + ++numUsed; + } + } + int first = bits.nextClearBit(0); + Stack stack = new Stack<>(); + while (first < refCount.length) { + int last = bits.nextSetBit(first) - 1; + if (last >= first) { + stack.push(new int[] { first, last }); + first = bits.nextClearBit(last + 1); + if (first < 0) { + break; + } + } else { // we do not all distiction between all full and all empty + if (first < refCount.length - 1) { + if (bits.nextClearBit(first + 1) == first + 1) { + stack.push(new int[] { first, refCount.length - 1 }); + } else { + stack.push(new int[] { first, first }); + } + } else { + stack.push(new int[] { refCount.length - 1, refCount.length - 1 }); + } + break; + } + } lastInUse = stack.size(); freeIndexesEnd = new int[lastInUse + 1]; freeIndexesStart = new int[lastInUse + 1]; this.capacity = capacity; int count = 0; + int free = 0; while (stack.size() > 0) { int[] interval = stack.pop(); freeIndexesStart[count] = interval[0]; freeIndexesEnd[count] = interval[1]; ++count; + free += (interval[1] - interval[0] + 1); } + checkArgument(free + numUsed == refCount.length, "incorrect bit conversions"); } public void extendCapacity(int newCapacity) { diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStore.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStore.java index 0c363e65..60cd9b56 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStore.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStore.java @@ -20,10 +20,8 @@ import static java.lang.Math.max; import java.util.Arrays; -import java.util.BitSet; import java.util.HashMap; import java.util.Optional; -import java.util.Stack; import java.util.Vector; public abstract class PointStore implements IPointStore { @@ -32,7 +30,7 @@ public abstract class PointStore implements IPointStore { /** * an index manager to manage free locations */ - protected IntervalManager indexManager; + protected IndexIntervalManager indexManager; /** * generic store class */ @@ -370,6 +368,15 @@ public double[] getInternalShingle() { abstract int locationListLength(); + void alignBoundaries(int initial, int freshStart) { + int locn = freshStart; + for (int i = 0; i < initial; i++) { + store[locn] = 0; + ++locn; + } + + } + public void compact() { Vector reverseReference = new Vector<>(); for (int i = 0; i < locationListLength(); i++) { @@ -403,14 +410,11 @@ public void compact() { } } - // aligning the boundaries - for (int i = 0; i < initial; i++) { - store[freshStart] = 0; - ++freshStart; - } + alignBoundaries(initial, freshStart); + freshStart += initial; + int start = freshStart; for (int i = blockStart; i < blockEnd; i++) { - store[freshStart] = store[i]; assert (!rotationEnabled || freshStart % dimensions == i % dimensions); if (jStatic < jEnd) { @@ -423,6 +427,7 @@ public void compact() { } freshStart += 1; } + copyTo(start, blockStart, blockEnd - blockStart); if (jStatic != jDynamic) { throw new IllegalStateException("There is discepancy in indices"); @@ -705,7 +710,7 @@ public PointStore(PointStore.Builder builder) { if (builder.refCount == null) { int size = (int) builder.initialPointStoreSize.orElse(builder.capacity); currentStoreCapacity = size; - this.indexManager = new IntervalManager(size); + this.indexManager = new IndexIntervalManager(size); startOfFreeSegment = 0; refCount = new byte[size]; if (internalShinglingEnabled) { @@ -730,39 +735,8 @@ public PointStore(PointStore.Builder builder) { this.internalShingle = (builder.knownShingle != null) ? Arrays.copyOf(builder.knownShingle, dimensions) : new double[dimensions]; } - BitSet bits = new BitSet(refCount.length); - int count = 0; - for (int i = 0; i < refCount.length; i++) { - if ((refCount[i] & 0xff) > 0) { - bits.set(i); - ++count; - } - } - int first = bits.nextClearBit(0); - Stack stack = new Stack<>(); - - while (first < refCount.length) { - int last = bits.nextSetBit(first) - 1; - if (last >= first) { - stack.push(new int[] { first, last }); - first = bits.nextClearBit(last + 1); - if (first < 0) { - break; - } - } else { // we do not all distiction between all full and all empty - if (first < refCount.length - 1) { - if (bits.nextClearBit(first + 1) == first + 1) { - stack.push(new int[] { first, refCount.length - 1 }); - } else { - stack.push(new int[] { first, first }); - } - } else { - stack.push(new int[] { refCount.length - 1, refCount.length - 1 }); - } - break; - } - } - indexManager = new IntervalManager(stack, builder.indexCapacity); + + indexManager = new IndexIntervalManager(builder.refCount, builder.indexCapacity); store = (builder.store == null) ? new float[currentStoreCapacity * dimensions] : builder.store; } } @@ -872,9 +846,10 @@ public String toString(int index) { } void copyTo(int dest, int source, int length) { - // validateInternalState(dest <= source, "error"); - for (int i = 0; i < length; i++) { - store[dest + i] = store[source + i]; + if (dest < source) { + for (int i = 0; i < length; i++) { + store[dest + i] = store[source + i]; + } } } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreDouble.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreDouble.java index c7a69ee1..d8981658 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreDouble.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreDouble.java @@ -20,10 +20,8 @@ import static java.lang.Math.max; import java.util.Arrays; -import java.util.BitSet; import java.util.HashMap; import java.util.Optional; -import java.util.Stack; import java.util.Vector; public class PointStoreDouble implements IPointStore { @@ -33,7 +31,7 @@ public class PointStoreDouble implements IPointStore { /** * an index manager to manage free locations */ - protected IntervalManager indexManager; + protected IndexIntervalManager indexManager; /** * generic store class */ @@ -210,9 +208,6 @@ public int add(double[] point, long sequenceNum) { amountToWrite = getAmountToWrite(tempPoint); if (startOfFreeSegment > currentStoreCapacity * dimensions - amountToWrite) { resizeStore(); - if (startOfFreeSegment + amountToWrite > currentStoreCapacity * dimensions) { - System.out.println("AA"); - } checkState(startOfFreeSegment + amountToWrite <= currentStoreCapacity * dimensions, "out of space"); } } @@ -713,7 +708,7 @@ public PointStoreDouble(Builder builder) { if (builder.refCount == null) { int size = (int) builder.initialPointStoreSize.orElse(builder.capacity); currentStoreCapacity = size; - this.indexManager = new IntervalManager(size); + this.indexManager = new IndexIntervalManager(size); startOfFreeSegment = 0; refCount = new byte[size]; if (internalShinglingEnabled) { @@ -740,34 +735,7 @@ public PointStoreDouble(Builder builder) { this.internalShingle = (builder.knownShingle != null) ? Arrays.copyOf(builder.knownShingle, dimensions) : new double[dimensions]; } - BitSet bits = new BitSet(refCount.length); - for (int i = 0; i < refCount.length; i++) { - if ((refCount[i] & 0xff) > 0) { - bits.set(i); - } - } - int first = bits.nextClearBit(0); - Stack stack = new Stack<>(); - - while (first < refCount.length) { - int last = bits.nextSetBit(first) - 1; - if (last >= first) { - stack.push(new int[] { first, last }); - first = last + 1; - } else { // we do not all distiction between all full and all empty - if (first < refCount.length - 1) { - if (bits.nextClearBit(first + 1) == first + 1) { - stack.push(new int[] { first, refCount.length - 1 }); - } else { - stack.push(new int[] { first, first }); - } - } else { - stack.push(new int[] { refCount.length - 1, refCount.length - 1 }); - } - break; - } - } - indexManager = new IntervalManager(stack, builder.indexCapacity); + indexManager = new IndexIntervalManager(builder.refCount, builder.indexCapacity); store = (builder.store == null) ? new double[currentStoreCapacity * dimensions] : builder.store; this.locationList = builder.locationList; } @@ -874,13 +842,6 @@ public String toString(int index) { return Arrays.toString(get(index)); } - void copyTo(int dest, int source, int length) { - // validateInternalState(dest <= source, "error"); - for (int i = 0; i < length; i++) { - store[dest + i] = store[source + i]; - } - } - public static Builder builder() { return new Builder(); } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/tree/AbstractNodeStore.java b/Java/core/src/main/java/com/amazon/randomcutforest/tree/AbstractNodeStore.java index 9d7a15c2..eff8ea97 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/tree/AbstractNodeStore.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/tree/AbstractNodeStore.java @@ -26,7 +26,7 @@ import com.amazon.randomcutforest.MultiVisitor; import com.amazon.randomcutforest.Visitor; import com.amazon.randomcutforest.store.IPointStoreView; -import com.amazon.randomcutforest.store.IntervalManager; +import com.amazon.randomcutforest.store.IndexIntervalManager; /** * A fixed-size buffer for storing interior tree nodes. An interior node is @@ -53,7 +53,7 @@ public abstract class AbstractNodeStore { protected final int dimensions; protected final float[] cutValue; protected double nodeCacheFraction; - protected IntervalManager freeNodeManager; + protected IndexIntervalManager freeNodeManager; protected double[] rangeSumData; protected float[] boundingBoxData; protected final IPointStoreView pointStoreView; @@ -68,7 +68,7 @@ public AbstractNodeStore(int capacity, int dimensions, double nodeCacheFraction, IPointStoreView pointStoreView) { this.capacity = capacity; this.dimensions = dimensions; - freeNodeManager = new IntervalManager(capacity - 1); + freeNodeManager = new IndexIntervalManager(capacity - 1); this.nodeCacheFraction = nodeCacheFraction; cutValue = new float[capacity - 1]; leafMass = new HashMap<>(); From 4e94d4ec4448bf9b73e4e72177862bbc420d54c2 Mon Sep 17 00:00:00 2001 From: Sudipto Guha Date: Thu, 6 Jan 2022 21:30:20 -0800 Subject: [PATCH 3/4] changes preparing for serde --- .../state/store/PointStoreFloatMapper.java | 37 ++++++++++++--- .../store/IndexIntervalManager.java | 46 ++++++++++++------- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java b/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java index b134896d..497ea6f5 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java @@ -15,16 +15,16 @@ package com.amazon.randomcutforest.state.store; -import static com.amazon.randomcutforest.CommonUtils.checkArgument; -import static com.amazon.randomcutforest.CommonUtils.checkNotNull; - -import lombok.Getter; -import lombok.Setter; - import com.amazon.randomcutforest.config.Precision; import com.amazon.randomcutforest.state.IStateMapper; import com.amazon.randomcutforest.store.PointStore; import com.amazon.randomcutforest.util.ArrayPacking; +import lombok.Getter; +import lombok.Setter; + +import static com.amazon.randomcutforest.CommonUtils.checkArgument; +import static com.amazon.randomcutforest.CommonUtils.checkNotNull; +import static com.amazon.randomcutforest.CommonUtils.toFloatArray; @Getter @Setter @@ -89,4 +89,29 @@ public PointStoreState toState(PointStore model) { state.setPointData(ArrayPacking.pack(model.getStore(), model.getStartOfFreeSegment())); return state; } + + public PointStore convertFromDouble(PointStoreState state) { + checkNotNull(state.getRefCount(), "refCount must not be null"); + checkNotNull(state.getPointData(), "pointData must not be null"); + checkArgument(Precision.valueOf(state.getPrecision()) == Precision.FLOAT_64, + "precision must be " + Precision.FLOAT_64); + int indexCapacity = state.getIndexCapacity(); + int dimensions = state.getDimensions(); + float[] store = toFloatArray( + ArrayPacking.unpackDoubles(state.getPointData(), state.getCurrentStoreCapacity() * dimensions)); + int startOfFreeSegment = state.getStartOfFreeSegment(); + int[] refCount = ArrayPacking.unpackInts(state.getRefCount(), indexCapacity, state.isCompressed()); + int[] locationList = new int[indexCapacity]; + int[] tempList = ArrayPacking.unpackInts(state.getLocationList(), state.isCompressed()); + System.arraycopy(tempList, 0, locationList, 0, tempList.length); + + return PointStore.builder().internalRotationEnabled(state.isRotationEnabled()) + .internalShinglingEnabled(state.isInternalShinglingEnabled()) + .dynamicResizingEnabled(state.isDynamicResizingEnabled()) + .directLocationEnabled(state.isDirectLocationMap()).indexCapacity(indexCapacity) + .currentStoreCapacity(state.getCurrentStoreCapacity()).capacity(state.getCapacity()) + .shingleSize(state.getShingleSize()).dimensions(state.getDimensions()).locationList(locationList) + .nextTimeStamp(state.getLastTimeStamp()).startOfFreeSegment(startOfFreeSegment).refCount(refCount) + .knownShingle(state.getInternalShingle()).store(store).build(); + } } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/IndexIntervalManager.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/IndexIntervalManager.java index d591c4b4..955ee4f3 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/IndexIntervalManager.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/store/IndexIntervalManager.java @@ -44,19 +44,27 @@ public IndexIntervalManager(int capacity) { freeIndexesEnd[0] = capacity - 1; } - public IndexIntervalManager(int[] refCount, int capacity) { - checkArgument(capacity > 0, "incorrect parameters"); + static BitSet toBits(int[] refCount) { + checkArgument(refCount != null, " not meaninglful"); BitSet bits = new BitSet(refCount.length); - int numUsed = 0; for (int i = 0; i < refCount.length; i++) { if ((refCount[i] & 0xff) > 0) { bits.set(i); - ++numUsed; } } + return bits; + } + + public IndexIntervalManager(int[] refCount, int capacity) { + this(capacity, refCount.length, toBits(refCount)); + } + + public IndexIntervalManager(int capacity, int length, BitSet bits) { + checkArgument(bits != null, " null bitset not allowed"); + this.capacity = capacity; int first = bits.nextClearBit(0); Stack stack = new Stack<>(); - while (first < refCount.length) { + while (first < length) { int last = bits.nextSetBit(first) - 1; if (last >= first) { stack.push(new int[] { first, last }); @@ -65,14 +73,14 @@ public IndexIntervalManager(int[] refCount, int capacity) { break; } } else { // we do not all distiction between all full and all empty - if (first < refCount.length - 1) { + if (first < length - 1) { if (bits.nextClearBit(first + 1) == first + 1) { - stack.push(new int[] { first, refCount.length - 1 }); + stack.push(new int[] { first, length - 1 }); } else { stack.push(new int[] { first, first }); } } else { - stack.push(new int[] { refCount.length - 1, refCount.length - 1 }); + stack.push(new int[] { length - 1, length - 1 }); } break; } @@ -82,15 +90,12 @@ public IndexIntervalManager(int[] refCount, int capacity) { freeIndexesStart = new int[lastInUse + 1]; this.capacity = capacity; int count = 0; - int free = 0; while (stack.size() > 0) { int[] interval = stack.pop(); freeIndexesStart[count] = interval[0]; freeIndexesEnd[count] = interval[1]; ++count; - free += (interval[1] - interval[0] + 1); } - checkArgument(free + numUsed == refCount.length, "incorrect bit conversions"); } public void extendCapacity(int newCapacity) { @@ -116,17 +121,18 @@ public boolean isEmpty() { * @return the maximum number of nodes whose data can be stored. */ public int getCapacity() { - if (capacity == 0) { - System.out.println("HUH"); - } return capacity; } /** - * @return the number of nodes whose data is currently stored. + * @return the number of indices whioch are being maintained */ public int size() { - return capacity - lastInUse; + int sum = 0; + for (int i = 0; i < lastInUse; i++) { + sum += freeIndexesEnd[i] - freeIndexesStart[i] + 1; + } + return sum; } /** @@ -173,4 +179,12 @@ public void releaseIndex(int index) { lastInUse += 1; } + public int[] getFreeIndices() { + int[] answer = new int[2 * lastInUse]; + for (int i = 0; i < 2 * lastInUse; i += 2) { + answer[i] = freeIndexesStart[i / 2]; + answer[i + 1] = freeIndexesEnd[i / 2]; + } + return answer; + } } From 25db57e0a9ea4b484ea378538bb283467283b76b Mon Sep 17 00:00:00 2001 From: Sudipto Guha Date: Fri, 7 Jan 2022 15:01:37 -0800 Subject: [PATCH 4/4] optimization and backward compatibility --- .../amazon/randomcutforest/state/Version.java | 1 + .../state/store/PointStoreDoubleMapper.java | 14 ++++++++++ .../state/store/PointStoreFloatMapper.java | 28 +++++++++++++++---- .../store/PointStoreLarge.java | 4 +-- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/state/Version.java b/Java/core/src/main/java/com/amazon/randomcutforest/state/Version.java index 0bbf6dcb..5429239f 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/state/Version.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/state/Version.java @@ -18,4 +18,5 @@ public class Version { public static final String V2_0 = "2.0"; public static final String V2_1 = "2.1"; + public static final String V3_0 = "3.0"; } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreDoubleMapper.java b/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreDoubleMapper.java index 7c5f2368..749940e1 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreDoubleMapper.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreDoubleMapper.java @@ -25,6 +25,7 @@ import com.amazon.randomcutforest.config.Precision; import com.amazon.randomcutforest.state.IStateMapper; +import com.amazon.randomcutforest.state.Version; import com.amazon.randomcutforest.store.PointStoreDouble; import com.amazon.randomcutforest.util.ArrayPacking; @@ -51,6 +52,9 @@ public PointStoreDouble toModel(PointStoreState state, long seed) { Arrays.fill(locationList, PointStoreDouble.INFEASIBLE_LOCN); int[] tempList = ArrayPacking.unpackInts(state.getLocationList(), state.isCompressed()); System.arraycopy(tempList, 0, locationList, 0, tempList.length); + if (!state.getVersion().equals(Version.V3_0)) { + transformArray(locationList, dimensions / state.getShingleSize()); + } return PointStoreDouble.builder().internalRotationEnabled(state.isRotationEnabled()) .internalShinglingEnabled(state.isInternalShinglingEnabled()) @@ -66,6 +70,7 @@ public PointStoreDouble toModel(PointStoreState state, long seed) { public PointStoreState toState(PointStoreDouble model) { model.compact(); PointStoreState state = new PointStoreState(); + state.setVersion(Version.V3_0); state.setCompressed(compressionEnabled); state.setDimensions(model.getDimensions()); state.setCapacity(model.getCapacity()); @@ -92,4 +97,13 @@ public PointStoreState toState(PointStoreDouble model) { state.setPointData(ArrayPacking.pack(model.getStore(), model.getStartOfFreeSegment())); return state; } + + void transformArray(int[] location, int baseDimension) { + checkArgument(baseDimension > 0, "incorrect invocation"); + for (int i = 0; i < location.length; i++) { + if (location[i] > 0) { + location[i] = location[i] / baseDimension; + } + } + } } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java b/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java index 497ea6f5..425db1cb 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/state/store/PointStoreFloatMapper.java @@ -15,16 +15,18 @@ package com.amazon.randomcutforest.state.store; +import static com.amazon.randomcutforest.CommonUtils.checkArgument; +import static com.amazon.randomcutforest.CommonUtils.checkNotNull; +import static com.amazon.randomcutforest.CommonUtils.toFloatArray; + +import lombok.Getter; +import lombok.Setter; + import com.amazon.randomcutforest.config.Precision; import com.amazon.randomcutforest.state.IStateMapper; +import com.amazon.randomcutforest.state.Version; import com.amazon.randomcutforest.store.PointStore; import com.amazon.randomcutforest.util.ArrayPacking; -import lombok.Getter; -import lombok.Setter; - -import static com.amazon.randomcutforest.CommonUtils.checkArgument; -import static com.amazon.randomcutforest.CommonUtils.checkNotNull; -import static com.amazon.randomcutforest.CommonUtils.toFloatArray; @Getter @Setter @@ -64,6 +66,7 @@ public PointStore toModel(PointStoreState state, long seed) { public PointStoreState toState(PointStore model) { model.compact(); PointStoreState state = new PointStoreState(); + state.setVersion(Version.V3_0); state.setCompressed(compressionEnabled); state.setDimensions(model.getDimensions()); state.setCapacity(model.getCapacity()); @@ -104,6 +107,9 @@ public PointStore convertFromDouble(PointStoreState state) { int[] locationList = new int[indexCapacity]; int[] tempList = ArrayPacking.unpackInts(state.getLocationList(), state.isCompressed()); System.arraycopy(tempList, 0, locationList, 0, tempList.length); + if (!state.getVersion().equals(Version.V3_0)) { + transformArray(locationList, dimensions / state.getShingleSize()); + } return PointStore.builder().internalRotationEnabled(state.isRotationEnabled()) .internalShinglingEnabled(state.isInternalShinglingEnabled()) @@ -114,4 +120,14 @@ public PointStore convertFromDouble(PointStoreState state) { .nextTimeStamp(state.getLastTimeStamp()).startOfFreeSegment(startOfFreeSegment).refCount(refCount) .knownShingle(state.getInternalShingle()).store(store).build(); } + + void transformArray(int[] location, int baseDimension) { + checkArgument(baseDimension > 0, "incorrect invocation"); + for (int i = 0; i < location.length; i++) { + if (location[i] > 0) { + location[i] = location[i] / baseDimension; + } + } + } + } diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreLarge.java b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreLarge.java index 9571b52f..d04552fb 100644 --- a/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreLarge.java +++ b/Java/core/src/main/java/com/amazon/randomcutforest/store/PointStoreLarge.java @@ -37,11 +37,11 @@ void extendLocationList(int newCapacity) { }; void setLocation(int index, int location) { - locationList[index] = location; + locationList[index] = location / baseDimension; } int getLocation(int index) { - return locationList[index]; + return baseDimension * locationList[index]; } int locationListLength() {