Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-9099: Correctly handle repeats in ORDERED and UNORDERED intervals #1097

Merged
merged 8 commits into from
Feb 6, 2020
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -24,14 +24,14 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.util.ArrayUtil;

class CachingMatchesIterator extends FilterMatchesIterator {
class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMatchesIterator {

private boolean positioned = false;
private int[] posAndOffsets = new int[4*4];
private Query[] matchingQueries = new Query[4];
private int count = 0;

CachingMatchesIterator(MatchesIterator in) {
CachingMatchesIterator(IntervalMatchesIterator in) {
super(in);
}

@@ -133,4 +133,13 @@ public Query getQuery() {
};
}

@Override
public int gaps() {
return ((IntervalMatchesIterator)in).gaps();
}

@Override
public int width() {
return ((IntervalMatchesIterator)in).width();
}
}
Original file line number Diff line number Diff line change
@@ -65,10 +65,10 @@ public final IntervalIterator intervals(String field, LeafReaderContext ctx) thr
protected abstract IntervalIterator combine(List<IntervalIterator> iterators);

@Override
public final MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
List<MatchesIterator> subs = new ArrayList<>();
public final IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
List<IntervalMatchesIterator> subs = new ArrayList<>();
for (IntervalsSource source : subSources) {
MatchesIterator mi = source.matches(field, ctx, doc);
IntervalMatchesIterator mi = source.matches(field, ctx, doc);
if (mi == null) {
return null;
}
@@ -87,13 +87,13 @@ public final MatchesIterator matches(String field, LeafReaderContext ctx, int do
return isMinimizing ? new MinimizingConjunctionMatchesIterator(it, subs) : new ConjunctionMatchesIterator(it, subs);
}

private static class ConjunctionMatchesIterator implements MatchesIterator {
private static class ConjunctionMatchesIterator implements IntervalMatchesIterator {

final IntervalIterator iterator;
final List<MatchesIterator> subs;
final List<IntervalMatchesIterator> subs;
boolean cached = true;

private ConjunctionMatchesIterator(IntervalIterator iterator, List<MatchesIterator> subs) {
private ConjunctionMatchesIterator(IntervalIterator iterator, List<IntervalMatchesIterator> subs) {
this.iterator = iterator;
this.subs = subs;
}
@@ -152,9 +152,19 @@ public MatchesIterator getSubMatches() throws IOException {
public Query getQuery() {
throw new UnsupportedOperationException();
}

@Override
public int gaps() {
return iterator.gaps();
}

@Override
public int width() {
return iterator.width();
}
}

private static class SingletonMatchesIterator extends FilterMatchesIterator {
static class SingletonMatchesIterator extends FilterMatchesIterator {

boolean exhausted = false;

Original file line number Diff line number Diff line change
@@ -48,12 +48,12 @@ public final IntervalIterator intervals(String field, LeafReaderContext ctx) thr
}

@Override
public final MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
MatchesIterator minIt = minuend.matches(field, ctx, doc);
public final IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
IntervalMatchesIterator minIt = minuend.matches(field, ctx, doc);
if (minIt == null) {
return null;
}
MatchesIterator subIt = subtrahend.matches(field, ctx, doc);
IntervalMatchesIterator subIt = subtrahend.matches(field, ctx, doc);
if (subIt == null) {
return minIt;
}
Original file line number Diff line number Diff line change
@@ -82,15 +82,24 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO
}

@Override
public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
List<MatchesIterator> subMatches = new ArrayList<>();
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
List<IntervalMatchesIterator> subMatches = new ArrayList<>();
for (IntervalsSource subSource : subSources) {
MatchesIterator mi = subSource.matches(field, ctx, doc);
IntervalMatchesIterator mi = subSource.matches(field, ctx, doc);
if (mi != null) {
subMatches.add(mi);
}
}
return MatchesUtils.disjunction(subMatches);
if (subMatches.size() == 0) {
return null;
}
DisjunctionIntervalIterator it = new DisjunctionIntervalIterator(
subMatches.stream().map(m -> IntervalMatches.wrapMatches(m, doc)).collect(Collectors.toList())
);
if (it.advance(doc) != doc) {
return null;
}
return new DisjunctionMatchesIterator(it, subMatches);
}

@Override
@@ -196,6 +205,21 @@ private void reset() throws IOException {
current = EMPTY;
}

int currentOrd() {
if (current == EMPTY) {
return -1;
}
if (current == EXHAUSTED) {
return NO_MORE_INTERVALS;
}
for (int i = 0; i < iterators.size(); i++) {
if (iterators.get(i) == current) {
return i;
}
}
throw new IllegalStateException();
}

@Override
public int nextInterval() throws IOException {
if (current == EMPTY || current == EXHAUSTED) {
@@ -344,4 +368,68 @@ public float matchCost() {
}
};

private static class DisjunctionMatchesIterator implements IntervalMatchesIterator {

final DisjunctionIntervalIterator it;
final List<IntervalMatchesIterator> subs;

private DisjunctionMatchesIterator(DisjunctionIntervalIterator it, List<IntervalMatchesIterator> subs) {
this.it = it;
this.subs = subs;
}

@Override
public boolean next() throws IOException {
return it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS;
}

@Override
public int startPosition() {
return it.start();
}

@Override
public int endPosition() {
return it.end();
}

@Override
public int startOffset() throws IOException {
int ord = it.currentOrd();
assert ord != -1 && ord != IntervalIterator.NO_MORE_INTERVALS;
return subs.get(ord).startOffset();
}

@Override
public int endOffset() throws IOException {
int ord = it.currentOrd();
assert ord != -1 && ord != IntervalIterator.NO_MORE_INTERVALS;
return subs.get(ord).endOffset();
}

@Override
public MatchesIterator getSubMatches() throws IOException {
int ord = it.currentOrd();
assert ord != -1 && ord != IntervalIterator.NO_MORE_INTERVALS;
return subs.get(ord).getSubMatches();
}

@Override
public Query getQuery() {
int ord = it.currentOrd();
assert ord != -1 && ord != IntervalIterator.NO_MORE_INTERVALS;
return subs.get(ord).getQuery();
}

@Override
public int gaps() {
return it.gaps();
}

@Override
public int width() {
return it.width();
}
}

}
Original file line number Diff line number Diff line change
@@ -49,8 +49,8 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO
}

@Override
public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
MatchesIterator in = source.matches(field, ctx, doc);
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
IntervalMatchesIterator in = source.matches(field, ctx, doc);
if (in == null) {
return null;
}
Original file line number Diff line number Diff line change
@@ -108,8 +108,8 @@ protected boolean accept() {
}

@Override
public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
MatchesIterator mi = in.matches(field, ctx, doc);
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
IntervalMatchesIterator mi = in.matches(field, ctx, doc);
if (mi == null) {
return null;
}
Original file line number Diff line number Diff line change
@@ -43,7 +43,7 @@ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IO
}

@Override
public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
return source.matches(this.field, ctx, doc);
}

Original file line number Diff line number Diff line change
@@ -72,6 +72,13 @@ public abstract class IntervalIterator extends DocIdSetIterator {
*/
public abstract int gaps();

/**
* The width of the current interval
*/
public int width() {
return end() - start() + 1;
}

/**
* Advance the iterator to the next interval
*
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@

final class IntervalMatches {

static MatchesIterator asMatches(IntervalIterator iterator, MatchesIterator source, int doc) throws IOException {
static IntervalMatchesIterator asMatches(IntervalIterator iterator, IntervalMatchesIterator source, int doc) throws IOException {
if (source == null) {
return null;
}
@@ -34,7 +34,7 @@ static MatchesIterator asMatches(IntervalIterator iterator, MatchesIterator sour
if (iterator.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
return null;
}
return new MatchesIterator() {
return new IntervalMatchesIterator() {

boolean cached = true;

@@ -67,6 +67,16 @@ public int endOffset() throws IOException {
return source.endOffset();
}

@Override
public int gaps() {
return iterator.gaps();
}

@Override
public int width() {
return iterator.width();
}

@Override
public MatchesIterator getSubMatches() throws IOException {
return source.getSubMatches();
@@ -79,32 +89,41 @@ public Query getQuery() {
};
}

enum State { UNPOSITIONED, ITERATING, EXHAUSTED }
enum State { UNPOSITIONED, ITERATING, NO_MORE_INTERVALS, EXHAUSTED }

static IntervalIterator wrapMatches(MatchesIterator mi, int doc) {
static IntervalIterator wrapMatches(IntervalMatchesIterator mi, int doc) {
return new IntervalIterator() {

State state = State.UNPOSITIONED;

@Override
public int start() {
if (state == State.NO_MORE_INTERVALS) {
return NO_MORE_INTERVALS;
}
assert state == State.ITERATING;
return mi.startPosition();
}

@Override
public int end() {
if (state == State.NO_MORE_INTERVALS) {
return NO_MORE_INTERVALS;
}
assert state == State.ITERATING;
return mi.endPosition();
}

@Override
public int gaps() {
assert state == State.ITERATING;
if (mi instanceof IntervalMatchesIterator) {
return ((IntervalMatchesIterator)mi).gaps();
}
return 0;
return mi.gaps();
}

@Override
public int width() {
assert state == State.ITERATING;
return mi.width();
}

@Override
@@ -113,6 +132,7 @@ public int nextInterval() throws IOException {
if (mi.next()) {
return mi.startPosition();
}
state = State.NO_MORE_INTERVALS;
return NO_MORE_INTERVALS;
}

@@ -127,6 +147,7 @@ public int docID() {
case UNPOSITIONED:
return -1;
case ITERATING:
case NO_MORE_INTERVALS:
return doc;
case EXHAUSTED:
}
Original file line number Diff line number Diff line change
@@ -20,19 +20,24 @@
import org.apache.lucene.search.MatchesIterator;

/**
* An extension of MatchesIterator that allows the gaps from a wrapped
* IntervalIterator to be reported.
* An extension of MatchesIterator that allows it to be treated as
* an IntervalIterator
*
* This is necessary because {@link MatchesIterator#getSubMatches()} returns
* the submatches of all nested matches as a flat iterator, but
* {@link IntervalIterator#gaps()} only returns the gaps between its immediate
* sub-matches, so we can't calculate the latter using the former.
* This is necessary to get access to {@link IntervalIterator#gaps()}
* and {@link IntervalIterator#width()} when constructing matches
*/
interface IntervalMatchesIterator extends MatchesIterator {
public interface IntervalMatchesIterator extends MatchesIterator {

/**
* The number of top-level gaps inside the current match
* @see IntervalIterator#gaps()
*/
int gaps();

/**
* The width of the current match
* @see IntervalIterator#width()
*/
int width();

}
Loading