Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redesign of the DatePartitionedQueryPlanner (formerly know as the FederatedQueryPlanner) #2717

Open
wants to merge 23 commits into
base: integration
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
ec81fe0
investigating why index holes are not being applied
hlgp Jan 15, 2025
29878aa
Changed IndexHole to ValueIndexHole
ivakegg Jan 21, 2025
4162351
Changed getters and setters from indexHoles to valueIndexHoles
ivakegg Jan 21, 2025
e2fae6f
Changed FederatedQueryPlanner to DatePartitionedQueryPlanner
ivakegg Jan 21, 2025
298cbe1
More renaming of stuff to make Hannah happy
ivakegg Jan 21, 2025
320ce0e
Working through new algorithm
ivakegg Jan 22, 2025
afeb191
Redesigned process by which index holes are determined such that we know
ivakegg Jan 29, 2025
efe9cf5
Merge branch 'integration' into task/hp-indexHoleInvestigation
ivakegg Jan 29, 2025
491f242
merge conflict updates
ivakegg Jan 29, 2025
fc66ff6
Added reprocessing of a query plan for subplans
ivakegg Jan 29, 2025
43ccdbc
Updated to handle expansion after pullup
ivakegg Jan 30, 2025
549130b
7.16.0-planner
ivakegg Jan 30, 2025
8e5d9e2
updated javadoc
ivakegg Jan 30, 2025
31b97c8
Merge branch 'integration' into task/hp-indexHoleInvestigation
ivakegg Jan 30, 2025
19e0ad5
Revert "7.16.0-planner"
ivakegg Jan 30, 2025
bb6045c
A little refactoring
ivakegg Jan 31, 2025
efa428a
Added some javadoc
ivakegg Jan 31, 2025
9d60bbc
Changed from using _HOLE_ to _EVAL_ to avoid field index lookups
ivakegg Feb 3, 2025
e5fabf4
Simplified unindexed fields visitor per review comments
ivakegg Feb 3, 2025
b217f49
Updated per review comments
ivakegg Feb 7, 2025
8e8ae1c
Added additional logic to the unindexed pushdown visitor and added test
ivakegg Feb 11, 2025
b3a8814
Missed a copy
ivakegg Feb 11, 2025
47ec37c
Use actual metadata utils version
ivakegg Feb 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
<version.datawave.common-utils>3.0.0</version.datawave.common-utils>
<version.datawave.dictionary-api>4.0.1</version.datawave.dictionary-api>
<version.datawave.mapreduce-query-api>1.0.0</version.datawave.mapreduce-query-api>
<version.datawave.metadata-utils>4.0.8</version.datawave.metadata-utils>
<version.datawave.metadata-utils>4.0.10</version.datawave.metadata-utils>
<version.datawave.metrics-reporter>3.0.0</version.datawave.metrics-reporter>
<version.datawave.query-api>1.0.0</version.datawave.query-api>
<version.datawave.query-metric-api>4.0.7</version.datawave.query-metric-api>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
import java.io.Serializable;

/**
* This class represents a hole in the global index. Used by the PushdownMissingIndexRangeNodesVisitor.
* This class represents a hole in the global index for a set of values in a specified date range. Used by the PushdownMissingIndexRangeNodesVisitor.
*/
public class IndexHole implements Serializable, Comparable<IndexHole> {
public class IndexValueHole implements Serializable, Comparable<IndexValueHole> {
private static final long serialVersionUID = -6778479621810682281L;

private String startValue;
private String endValue;
private String startDate;
private String endDate;

public IndexHole() {}
public IndexValueHole() {}

/**
* Create an index with a date range and value range.
Expand All @@ -23,7 +23,7 @@ public IndexHole() {}
* @param valueRange
* the start and end values of the known hole
*/
public IndexHole(String[] dateRange, String[] valueRange) {
public IndexValueHole(String[] dateRange, String[] valueRange) {
setStartValue(valueRange[0]);
setEndValue(valueRange[1]);
setStartDate(dateRange[0]);
Expand Down Expand Up @@ -106,8 +106,8 @@ public String toString() {

@Override
public boolean equals(Object o) {
if (o instanceof IndexHole) {
IndexHole hole = (IndexHole) o;
if (o instanceof IndexValueHole) {
IndexValueHole hole = (IndexValueHole) o;
return startValue.equals(hole.startValue) && endValue.equals(hole.endValue) && startDate.equals(hole.startDate) && endDate.equals(hole.endDate);
}
return false;
Expand All @@ -129,7 +129,7 @@ public int hashCode() {
* the index hole
* @return the comparison
*/
public int compareTo(IndexHole hole) {
public int compareTo(IndexValueHole hole) {
int comparison = startValue.compareTo(hole.startValue);
if (comparison == 0) {
comparison = endValue.compareTo(hole.endValue);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
// Filter results on datatypes. Default to having no filters
private Set<String> datatypeFilter = UniversalSet.instance();
// A set of sorted index holes
private List<IndexHole> indexHoles = new ArrayList<>();
private List<IndexValueHole> indexValueHoles = new ArrayList<>();
// a set of user specified mappings
private Set<String> renameFields = new HashSet<>(0);
// Limit fields returned per event
Expand Down Expand Up @@ -527,7 +527,7 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
* The minimum percentage threshold that the count for an index row must meet compared to the count for the corresponding frequency row in the metadata
* table in order to NOT be considered a field index hole. The value must be between 0.0-1.0, where 1.0 is equivalent to 100%.
*/
private double fieldIndexHoleMinThreshold = 1.0d;
private double indexFieldHoleMinThreshold = 1.0d;

/**
* The set of date types that, if the query's end date is the current date, will NOT result in any date range adjustments or the addition of a
Expand Down Expand Up @@ -646,7 +646,7 @@ public void copyFrom(ShardQueryConfiguration other) {
this.setUnevaluatedFields(null == other.getUnevaluatedFields() ? null : Sets.newHashSet(other.getUnevaluatedFields()));
this.setDatatypeFilter(null == other.getDatatypeFilter() ? null
: (other.getDatatypeFilter() instanceof UniversalSet) ? UniversalSet.instance() : Sets.newHashSet(other.getDatatypeFilter()));
this.setIndexHoles(null == other.getIndexHoles() ? null : Lists.newArrayList(other.getIndexHoles()));
this.setIndexValueHoles(null == other.getIndexValueHoles() ? null : Lists.newArrayList(other.getIndexValueHoles()));
this.setProjectFields(null == other.getProjectFields() ? null : Sets.newHashSet(other.getProjectFields()));
this.setRenameFields(null == other.getRenameFields() ? null : Sets.newHashSet(other.getRenameFields()));
this.setDisallowlistedFields(null == other.getDisallowlistedFields() ? null : Sets.newHashSet(other.getDisallowlistedFields()));
Expand Down Expand Up @@ -783,7 +783,7 @@ public void copyFrom(ShardQueryConfiguration other) {
this.setCardinalityThreshold(other.getCardinalityThreshold());
this.setUseQueryTreeScanHintRules(other.isUseQueryTreeScanHintRules());
this.setQueryTreeScanHintRules(other.getQueryTreeScanHintRules());
this.setFieldIndexHoleMinThreshold(other.getFieldIndexHoleMinThreshold());
this.setIndexFieldHoleMinThreshold(other.getIndexFieldHoleMinThreshold());
this.setNoExpansionIfCurrentDateTypes(
other.getNoExpansionIfCurrentDateTypes() == null ? null : Sets.newHashSet(other.getNoExpansionIfCurrentDateTypes()));
this.setShardsAndDaysHintAllowed(other.isShardsAndDaysHintAllowed());
Expand Down Expand Up @@ -2335,12 +2335,12 @@ public void setAccrueStats(boolean accrueStats) {

}

public List<IndexHole> getIndexHoles() {
return indexHoles;
public List<IndexValueHole> getIndexValueHoles() {
return indexValueHoles;
}

public void setIndexHoles(List<IndexHole> indexHoles) {
this.indexHoles = indexHoles;
public void setIndexValueHoles(List<IndexValueHole> indexValueHoles) {
this.indexValueHoles = indexValueHoles;
}

public boolean getCollectTimingDetails() {
Expand Down Expand Up @@ -2789,12 +2789,12 @@ public void setRebuildDatatypeFilterPerShard(boolean rebuildDatatypeFilterPerSha
this.rebuildDatatypeFilterPerShard = rebuildDatatypeFilterPerShard;
}

public double getFieldIndexHoleMinThreshold() {
return fieldIndexHoleMinThreshold;
public double getIndexFieldHoleMinThreshold() {
return indexFieldHoleMinThreshold;
}

public void setFieldIndexHoleMinThreshold(double fieldIndexHoleMinThreshold) {
this.fieldIndexHoleMinThreshold = fieldIndexHoleMinThreshold;
public void setIndexFieldHoleMinThreshold(double indexFieldHoleMinThreshold) {
this.indexFieldHoleMinThreshold = indexFieldHoleMinThreshold;
}

public boolean getReduceIngestTypes() {
Expand Down Expand Up @@ -3004,7 +3004,7 @@ public boolean equals(Object o) {
Objects.equals(getNonEventKeyPrefixes(), that.getNonEventKeyPrefixes()) &&
Objects.equals(getUnevaluatedFields(), that.getUnevaluatedFields()) &&
Objects.equals(getDatatypeFilter(), that.getDatatypeFilter()) &&
Objects.equals(getIndexHoles(), that.getIndexHoles()) &&
Objects.equals(getIndexValueHoles(), that.getIndexValueHoles()) &&
Objects.equals(getProjectFields(), that.getProjectFields()) &&
Objects.equals(getRenameFields(), that.getRenameFields()) &&
Objects.equals(getDisallowlistedFields(), that.getDisallowlistedFields()) &&
Expand Down Expand Up @@ -3147,7 +3147,7 @@ public int hashCode() {
getNonEventKeyPrefixes(),
getUnevaluatedFields(),
getDatatypeFilter(),
getIndexHoles(),
getIndexValueHoles(),
getProjectFields(),
getRenameFields(),
getDisallowlistedFields(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.commons.jexl3.parser.ASTDivNode;
import org.apache.commons.jexl3.parser.ASTEQNode;
import org.apache.commons.jexl3.parser.ASTERNode;
import org.apache.commons.jexl3.parser.ASTEWNode;
import org.apache.commons.jexl3.parser.ASTFalseNode;
import org.apache.commons.jexl3.parser.ASTGENode;
import org.apache.commons.jexl3.parser.ASTGTNode;
Expand All @@ -25,21 +26,23 @@
import org.apache.commons.jexl3.parser.ASTModNode;
import org.apache.commons.jexl3.parser.ASTMulNode;
import org.apache.commons.jexl3.parser.ASTNENode;
import org.apache.commons.jexl3.parser.ASTNEWNode;
import org.apache.commons.jexl3.parser.ASTNRNode;
import org.apache.commons.jexl3.parser.ASTNSWNode;
import org.apache.commons.jexl3.parser.ASTNotNode;
import org.apache.commons.jexl3.parser.ASTNullLiteral;
import org.apache.commons.jexl3.parser.ASTNumberLiteral;
import org.apache.commons.jexl3.parser.ASTOrNode;
import org.apache.commons.jexl3.parser.ASTReference;
import org.apache.commons.jexl3.parser.ASTReferenceExpression;
import org.apache.commons.jexl3.parser.ASTSWNode;
import org.apache.commons.jexl3.parser.ASTSizeFunction;
import org.apache.commons.jexl3.parser.ASTStringLiteral;
import org.apache.commons.jexl3.parser.ASTSubNode;
import org.apache.commons.jexl3.parser.ASTTrueNode;
import org.apache.commons.jexl3.parser.ASTUnaryMinusNode;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.JexlNodes;
import org.apache.commons.jexl3.parser.ParseException;
import org.apache.log4j.Logger;

import com.google.common.collect.Sets;
Expand Down Expand Up @@ -188,156 +191,54 @@ public Object visit(ASTAndNode node, Object data) {
return data;
}

public Object visit(ASTEQNode node, Object data) {
StringBuilder sb = (StringBuilder) data;

private StringBuilder buildSimpleExpression(JexlNode node, String operand, StringBuilder sb) {
int numChildren = node.jjtGetNumChildren();

if (2 != numChildren) {
QueryException qe = new QueryException(DatawaveErrorCode.NODE_PROCESSING_ERROR, "An ASTEQNode has more than two children");
QueryException qe = new QueryException(DatawaveErrorCode.NODE_PROCESSING_ERROR,
"An " + node.getClass().getSimpleName() + " must have exactly two children");
throw new IllegalArgumentException(qe);
}

node.jjtGetChild(0).jjtAccept(this, sb);

sb.append(" == ");
sb.append(' ').append(operand).append(' ');

node.jjtGetChild(1).jjtAccept(this, sb);

return sb;
}

public Object visit(ASTNENode node, Object data) {
StringBuilder sb = (StringBuilder) data;

int numChildren = node.jjtGetNumChildren();

if (2 != numChildren) {
QueryException qe = new QueryException(DatawaveErrorCode.NODE_PROCESSING_ERROR, "An ASTNENode has more than two children");
throw new IllegalArgumentException(qe);
}

node.jjtGetChild(0).jjtAccept(this, sb);

sb.append(" != ");

node.jjtGetChild(1).jjtAccept(this, sb);
public Object visit(ASTEQNode node, Object data) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This class has some great cleanup

return buildSimpleExpression(node, "==", (StringBuilder) data);
}

return sb;
public Object visit(ASTNENode node, Object data) {
return buildSimpleExpression(node, "!=", (StringBuilder) data);
}

public Object visit(ASTLTNode node, Object data) {
StringBuilder sb = (StringBuilder) data;

int numChildren = node.jjtGetNumChildren();

if (2 != numChildren) {
QueryException qe = new QueryException(DatawaveErrorCode.NODE_PROCESSING_ERROR, "An ASTLTNode has more than two children");
throw new IllegalArgumentException(qe);
}

node.jjtGetChild(0).jjtAccept(this, sb);

sb.append(" < ");

node.jjtGetChild(1).jjtAccept(this, sb);

return sb;
return buildSimpleExpression(node, "<", (StringBuilder) data);
}

public Object visit(ASTGTNode node, Object data) {
StringBuilder sb = (StringBuilder) data;

int numChildren = node.jjtGetNumChildren();

if (2 != numChildren) {
QueryException qe = new QueryException(DatawaveErrorCode.NODE_PROCESSING_ERROR, "An ASTGTNode has more than two children");
throw new IllegalArgumentException(qe);
}

node.jjtGetChild(0).jjtAccept(this, sb);

sb.append(" > ");

node.jjtGetChild(1).jjtAccept(this, sb);

return sb;
return buildSimpleExpression(node, ">", (StringBuilder) data);
}

public Object visit(ASTLENode node, Object data) {
StringBuilder sb = (StringBuilder) data;

int numChildren = node.jjtGetNumChildren();

if (2 != numChildren) {
QueryException qe = new QueryException(DatawaveErrorCode.NODE_PROCESSING_ERROR, "An ASTLENode has more than two children");
throw new IllegalArgumentException(qe);
}

node.jjtGetChild(0).jjtAccept(this, sb);

sb.append(" <= ");

node.jjtGetChild(1).jjtAccept(this, sb);

return sb;
return buildSimpleExpression(node, "<=", (StringBuilder) data);
}

public Object visit(ASTGENode node, Object data) {
StringBuilder sb = (StringBuilder) data;

int numChildren = node.jjtGetNumChildren();

if (2 != numChildren) {
QueryException qe = new QueryException(DatawaveErrorCode.NODE_PROCESSING_ERROR, "An ASTGENode has more than two children");
throw new IllegalArgumentException(qe);
}

node.jjtGetChild(0).jjtAccept(this, sb);

sb.append(" >= ");

node.jjtGetChild(1).jjtAccept(this, sb);

return sb;
return buildSimpleExpression(node, ">=", (StringBuilder) data);
}

public Object visit(ASTERNode node, Object data) {
StringBuilder sb = (StringBuilder) data;

int numChildren = node.jjtGetNumChildren();

if (2 != numChildren) {
QueryException qe = new QueryException(DatawaveErrorCode.NODE_PROCESSING_ERROR, "An ASTERNode has more than two children");
throw new IllegalArgumentException(qe);
}

node.jjtGetChild(0).jjtAccept(this, sb);

sb.append(" =~ ");

node.jjtGetChild(1).jjtAccept(this, sb);

return sb;
return buildSimpleExpression(node, "=~", (StringBuilder) data);
}

public Object visit(ASTNRNode node, Object data) {
StringBuilder sb = (StringBuilder) data;

int numChildren = node.jjtGetNumChildren();

if (2 != numChildren) {
QueryException qe = new QueryException(DatawaveErrorCode.NODE_PROCESSING_ERROR, "An ASTNRNode has more than two children");
throw new IllegalArgumentException(qe);
}

node.jjtGetChild(0).jjtAccept(this, sb);

sb.append(" !~ ");

node.jjtGetChild(1).jjtAccept(this, sb);

return sb;
return buildSimpleExpression(node, "!~", (StringBuilder) data);
}

public Object visit(ASTNotNode node, Object data) {
Expand Down Expand Up @@ -516,6 +417,26 @@ public Object visit(ASTAddNode node, Object data) {
return sb;
}

@Override
protected Object visit(ASTSWNode node, Object data) {
return buildSimpleExpression(node, "=^", (StringBuilder) data);
}

@Override
protected Object visit(ASTNSWNode node, Object data) {
return buildSimpleExpression(node, "!^", (StringBuilder) data);
}

@Override
protected Object visit(ASTEWNode node, Object data) {
return buildSimpleExpression(node, "=$", (StringBuilder) data);
}

@Override
protected Object visit(ASTNEWNode node, Object data) {
return buildSimpleExpression(node, "!$", (StringBuilder) data);
}

@Override
public Object visit(ASTSubNode node, Object data) {
StringBuilder sb = (StringBuilder) data;
Expand Down
Loading
Loading