Skip to content

Commit

Permalink
HUDI-4284 Implement bloom lookup tree as red-black tree
Browse files Browse the repository at this point in the history
  • Loading branch information
yabola committed Jun 26, 2022
1 parent 35afdb4 commit 03e741a
Show file tree
Hide file tree
Showing 8 changed files with 873 additions and 120 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,93 +18,90 @@

package org.apache.hudi.index.bloom;

import java.io.Serializable;
import org.apache.hudi.common.util.rbtree.RedBlackTree;

import java.util.HashSet;
import java.util.Set;

/**
* Look up tree implemented as interval trees to search for any given key in (N logN) time complexity.
* Look up tree implemented as red-black trees to search for any given key in (N logN) time complexity.
*/
class KeyRangeLookupTree implements Serializable {

private KeyRangeNode root;
class KeyRangeLookupTree extends RedBlackTree<KeyRangeNode, RecordKeyRange> {

/**
* @return the root of the tree. Could be {@code null}
* Flag for whether sub-tree min-max metrics need to be recalculated. When inserting or deleting nodes,
* we need to recalculated.
*/
public KeyRangeNode getRoot() {
return root;
private volatile boolean needReloadMetrics = false;

@Override
public void insert(KeyRangeNode newNode) {
needReloadMetrics = true;
super.insert(newNode);
}

@Override
public void remove(RecordKeyRange key) {
needReloadMetrics = true;
super.remove(key);
}

/**
* Inserts a new {@link KeyRangeNode} to this look up tree.
*
* @param newNode the new {@link KeyRangeNode} to be inserted
* If current root and newNode matches with min record key and max record key, merge two nodes. In other words, add
* files from {@code newNode}.
* @param oldNode previously inserted node
* @param newNode newly inserted same node
*/
void insert(KeyRangeNode newNode) {
root = insert(getRoot(), newNode);
@Override
protected void processWhenInsertSame(KeyRangeNode oldNode, KeyRangeNode newNode) {
oldNode.addFiles(newNode.getFileNameList());
}

/**
* Inserts a new {@link KeyRangeNode} to this look up tree.
*
* If no root exists, make {@code newNode} as the root and return the new root.
*
* If current root and newNode matches with min record key and max record key, merge two nodes. In other words, add
* files from {@code newNode} to current root. Return current root.
*
* If current root is < newNode if current root has no right sub tree update current root's right sub tree max and min
* set newNode as right sub tree else update root's right sub tree min and max with newNode's min and max record key
* as applicable recursively call insert() with root's right subtree as new root
*
* else // current root is >= newNode if current root has no left sub tree update current root's left sub tree max and
* min set newNode as left sub tree else update root's left sub tree min and max with newNode's min and max record key
* as applicable recursively call insert() with root's left subtree as new root
*
* @param root refers to the current root of the look up tree
* @param newNode newNode the new {@link KeyRangeNode} to be inserted
* Traverse the tree to calculate sub-tree min-max metrics.
*/
private KeyRangeNode insert(KeyRangeNode root, KeyRangeNode newNode) {
if (root == null) {
root = newNode;
return root;
private void calculateSubTreeMinMax(KeyRangeNode node) {
if(node == null){
return;
}
if (node.getLeft() != null) {
calculateSubTreeMinMax(node.getLeft());
node.setLeftSubTreeMin(minRecord(node.getLeft()));
node.setLeftSubTreeMax(maxRecord(node.getLeft()));
}
if(node.getRight() != null){
calculateSubTreeMinMax(node.getRight());
node.setRightSubTreeMin(minRecord(node.getRight()));
node.setRightSubTreeMax(maxRecord(node.getRight()));
}
}

if (root.compareTo(newNode) == 0) {
root.addFiles(newNode.getFileNameList());
return root;
/**
* Get the minimum value among the node and its child nodes.
*/
private String minRecord(KeyRangeNode node) {
String min = node.getKey().getMinRecordKey();
if (node.getLeft() != null && node.getLeftSubTreeMin().compareTo(min) < 0) {
min = node.getLeftSubTreeMin();
}
if (node.getRight() != null && node.getRightSubTreeMin().compareTo(min) < 0) {
min = node.getRightSubTreeMin();
}
return min;
}

if (root.compareTo(newNode) < 0) {
if (root.getRight() == null) {
root.setRightSubTreeMax(newNode.getMaxRecordKey());
root.setRightSubTreeMin(newNode.getMinRecordKey());
root.setRight(newNode);
} else {
if (root.getRightSubTreeMax().compareTo(newNode.getMaxRecordKey()) < 0) {
root.setRightSubTreeMax(newNode.getMaxRecordKey());
}
if (root.getRightSubTreeMin().compareTo(newNode.getMinRecordKey()) > 0) {
root.setRightSubTreeMin(newNode.getMinRecordKey());
}
insert(root.getRight(), newNode);
}
} else {
if (root.getLeft() == null) {
root.setLeftSubTreeMax(newNode.getMaxRecordKey());
root.setLeftSubTreeMin(newNode.getMinRecordKey());
root.setLeft(newNode);
} else {
if (root.getLeftSubTreeMax().compareTo(newNode.getMaxRecordKey()) < 0) {
root.setLeftSubTreeMax(newNode.getMaxRecordKey());
}
if (root.getLeftSubTreeMin().compareTo(newNode.getMinRecordKey()) > 0) {
root.setLeftSubTreeMin(newNode.getMinRecordKey());
}
insert(root.getLeft(), newNode);
}
/**
* Get the maximum value among the node and its child nodes.
*/
private String maxRecord(KeyRangeNode node) {
String max = node.getKey().getMaxRecordKey();
if (node.getLeft() != null && node.getLeftSubTreeMax().compareTo(max) > 0) {
max = node.getLeftSubTreeMax();
}
if (node.getRight() != null && node.getRightSubTreeMax().compareTo(max) > 0) {
max = node.getRightSubTreeMax();
}
return root;
return max;
}

/**
Expand All @@ -114,6 +111,9 @@ private KeyRangeNode insert(KeyRangeNode root, KeyRangeNode newNode) {
* @return the {@link Set} of matching index file names
*/
Set<String> getMatchingIndexFiles(String lookupKey) {
if(needReloadMetrics){
calculateSubTreeMinMax(getRoot());
}
Set<String> matchingFileNameSet = new HashSet<>();
getMatchingIndexFiles(getRoot(), lookupKey, matchingFileNameSet);
return matchingFileNameSet;
Expand All @@ -122,7 +122,7 @@ Set<String> getMatchingIndexFiles(String lookupKey) {
/**
* Fetches all the matching index files where the key could possibly be present.
*
* @param root refers to the current root of the look up tree
* @param root refers to the current root of the look up tree
* @param lookupKey the key to be searched for
*/
private void getMatchingIndexFiles(KeyRangeNode root, String lookupKey, Set<String> matchingFileNameSet) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,37 +18,33 @@

package org.apache.hudi.index.bloom;

import java.io.Serializable;
import org.apache.hudi.common.util.rbtree.RedBlackTreeNode;

import java.util.ArrayList;
import java.util.List;

/**
* Represents a node in the {@link KeyRangeLookupTree}. Holds information pertaining to a single index file, viz file
* Represents a red-black tree node in the {@link KeyRangeLookupTree}. Holds information pertaining to a single index file, viz file
* name, min record key and max record key.
*/
class KeyRangeNode implements Comparable<KeyRangeNode>, Serializable {
class KeyRangeNode extends RedBlackTreeNode<RecordKeyRange> {

private final List<String> fileNameList = new ArrayList<>();
private final String minRecordKey;
private final String maxRecordKey;
private String rightSubTreeMax = null;
private String leftSubTreeMax = null;
private String rightSubTreeMin = null;
private String leftSubTreeMin = null;
private KeyRangeNode left = null;
private KeyRangeNode right = null;

/**
* Instantiates a new {@link KeyRangeNode}.
*
* @param minRecordKey min record key of the index file
* @param maxRecordKey max record key of the index file
* @param fileName file name of the index file
* @param fileName file name of the index file
*/
KeyRangeNode(String minRecordKey, String maxRecordKey, String fileName) {
super(new RecordKeyRange(minRecordKey, maxRecordKey));
this.fileNameList.add(fileName);
this.minRecordKey = minRecordKey;
this.maxRecordKey = maxRecordKey;
}

/**
Expand All @@ -62,40 +58,24 @@ void addFiles(List<String> newFiles) {

@Override
public String toString() {
return "KeyRangeNode{minRecordKey='" + minRecordKey + '\'' + ", maxRecordKey='" + maxRecordKey + '\''
+ ", fileNameList=" + fileNameList + ", rightSubTreeMax='" + rightSubTreeMax + '\'' + ", leftSubTreeMax='"
+ leftSubTreeMax + '\'' + ", rightSubTreeMin='" + rightSubTreeMin + '\'' + ", leftSubTreeMin='" + leftSubTreeMin
+ '\'' + '}';
}

/**
* Compares the min record key of two nodes, followed by max record key.
*
* @param that the {@link KeyRangeNode} to be compared with
* @return the result of comparison. 0 if both min and max are equal in both. 1 if this {@link KeyRangeNode} is
* greater than the {@code that} keyRangeNode. -1 if {@code that} keyRangeNode is greater than this {@link
* KeyRangeNode}
*/
@Override
public int compareTo(KeyRangeNode that) {
int compareValue = minRecordKey.compareTo(that.minRecordKey);
if (compareValue == 0) {
return maxRecordKey.compareTo(that.maxRecordKey);
} else {
return compareValue;
}
final RecordKeyRange key = getKey();
String range = key != null ? "minRecordKey='" + key.getMinRecordKey() + '\'' + ", maxRecordKey='"
+ key.getMaxRecordKey() + "', " : "";
return "KeyRangeNode{" + range + "fileNameList=" + fileNameList
+ ", rightSubTreeMax='" + rightSubTreeMax + '\'' + ", leftSubTreeMax='" + leftSubTreeMax + '\''
+ ", rightSubTreeMin='" + rightSubTreeMin + '\'' + ", leftSubTreeMin='" + leftSubTreeMin + '\'' + '}';
}

public List<String> getFileNameList() {
return fileNameList;
public KeyRangeNode getLeft() {
return (KeyRangeNode) super.getLeft();
}

public String getMinRecordKey() {
return minRecordKey;
public KeyRangeNode getRight() {
return (KeyRangeNode) super.getRight();
}

public String getMaxRecordKey() {
return maxRecordKey;
public List<String> getFileNameList() {
return fileNameList;
}

public String getRightSubTreeMin() {
Expand Down Expand Up @@ -130,19 +110,11 @@ public void setLeftSubTreeMax(String leftSubTreeMax) {
this.leftSubTreeMax = leftSubTreeMax;
}

public KeyRangeNode getLeft() {
return left;
}

public void setLeft(KeyRangeNode left) {
this.left = left;
}

public KeyRangeNode getRight() {
return right;
public String getMinRecordKey(){
return getKey().getMinRecordKey();
}

public void setRight(KeyRangeNode right) {
this.right = right;
public String getMaxRecordKey(){
return getKey().getMaxRecordKey();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hudi.index.bloom;

import java.io.Serializable;

/**
* Represents the min and max record key in the {@link KeyRangeNode}.
*/
class RecordKeyRange implements Comparable<RecordKeyRange>, Serializable {

private final String minRecordKey;
private final String maxRecordKey;

/**
* Instantiates a new {@link RecordKeyRange}.
* @param minRecordKey min record key
* @param maxRecordKey max record key
*/
RecordKeyRange(String minRecordKey, String maxRecordKey) {
this.minRecordKey = minRecordKey;
this.maxRecordKey = maxRecordKey;
}

@Override
public String toString() {
return "RecordKeyRange{minRecordKey='" + minRecordKey + '\'' + ", maxRecordKey='" + maxRecordKey + '}';
}

/**
* Compares the min record key, followed by max record key.
*
* @param that the {@link RecordKeyRange} to be compared with
* @return the result of comparison. 0 if both min and max are equal in both. 1 if this {@link RecordKeyRange} is
* greater than the {@code that} keyRangeNode. -1 if {@code that} keyRangeNode is greater than this {@link
* RecordKeyRange}
*/
@Override
public int compareTo(RecordKeyRange that) {
int compareValue = minRecordKey.compareTo(that.minRecordKey);
if (compareValue == 0) {
return maxRecordKey.compareTo(that.maxRecordKey);
} else {
return compareValue;
}
}

public String getMinRecordKey() {
return minRecordKey;
}

public String getMaxRecordKey() {
return maxRecordKey;
}
}
Loading

0 comments on commit 03e741a

Please sign in to comment.