Skip to content

Commit

Permalink
performance improvement of find????Word
Browse files Browse the repository at this point in the history
  • Loading branch information
takawitter committed Jul 20, 2017
1 parent afa0b27 commit 589adb5
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 27 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- findShortestWord --
1892.181 ms in 100000call: org.trie4j.patricia.TailPatriciaTrie
138.379 ms in 100000call: org.trie4j.patricia.OptTailPatriciaTrie
2309.173 ms in 100000call: org.trie4j.doublearray.DoubleArray
130.442 ms in 100000call: org.trie4j.doublearray.OptDoubleArray
3084.871 ms in 100000call: org.trie4j.louds.TailLOUDSTrie
523.670 ms in 100000call: org.trie4j.louds.OptTailLOUDSTrie
2560.991 ms in 100000call: org.trie4j.louds.InlinedTailLOUDSTrie
533.103 ms in 100000call: org.trie4j.louds.OptInlinedTailLOUDSTrie
-- findLongestWord --
1613.162 ms in 100000call: org.trie4j.patricia.TailPatriciaTrie
208.330 ms in 100000call: org.trie4j.patricia.OptTailPatriciaTrie
2031.218 ms in 100000call: org.trie4j.doublearray.DoubleArray
150.888 ms in 100000call: org.trie4j.doublearray.OptDoubleArray
2679.538 ms in 100000call: org.trie4j.louds.TailLOUDSTrie
586.631 ms in 100000call: org.trie4j.louds.OptTailLOUDSTrie
2322.417 ms in 100000call: org.trie4j.louds.InlinedTailLOUDSTrie
533.238 ms in 100000call: org.trie4j.louds.OptInlinedTailLOUDSTrie
79 changes: 54 additions & 25 deletions trie4j/src/main/java/org/trie4j/doublearray/DoubleArray.java
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,60 @@ public int getTermId(String text) {
return term.get(nid) ? term.rank1(nid) - 1 : -1;
}

@Override
public int findShortestWord(CharSequence chars, int start, int end, StringBuilder word) {
for(int i = start; i < end; i++){
int nodeIndex = 0;
try{
for(int j = i; j < end; j++){
int cid = findCharId(chars.charAt(j));
if(cid == -1) break;
int b = base[nodeIndex];
if(b == BASE_EMPTY) break;
int next = b + cid;
if(nodeIndex != check[next]) break;
nodeIndex = next;
if(term.get(nodeIndex)){
if(word != null) word.append(chars, i, j + 1);
return i;
}
}
} catch(ArrayIndexOutOfBoundsException e){
break;
}
}
return -1;
}

@Override
public int findLongestWord(CharSequence chars, int start, int end, StringBuilder word) {
for(int i = start; i < end; i++){
int nodeIndex = 0;
try{
int lastJ = -1;
for(int j = i; j < end; j++){
int cid = findCharId(chars.charAt(j));
if(cid == -1) break;
int b = base[nodeIndex];
if(b == BASE_EMPTY) break;
int next = b + cid;
if(nodeIndex != check[next]) break;
nodeIndex = next;
if(term.get(nodeIndex)){
lastJ = j;
}
}
if(lastJ != -1){
if(word != null) word.append(chars, i, lastJ + 1);
return i;
}
} catch(ArrayIndexOutOfBoundsException e){
break;
}
}
return -1;
}

@Override
public Iterable<String> commonPrefixSearch(String query) {
List<String> ret = new ArrayList<String>();
Expand Down Expand Up @@ -268,31 +322,6 @@ public Iterable<Pair<String, Integer>> commonPrefixSearchWithTermId(
return ret;
}

@Override
public int findWord(CharSequence chars, int start, int end, StringBuilder word) {
for(int i = start; i < end; i++){
int nodeIndex = 0;
try{
for(int j = i; j < end; j++){
int cid = findCharId(chars.charAt(j));
if(cid == -1) break;
int b = base[nodeIndex];
if(b == BASE_EMPTY) break;
int next = b + cid;
if(nodeIndex != check[next]) break;
nodeIndex = next;
if(term.get(nodeIndex)){
if(word != null) word.append(chars, i, j + 1);
return i;
}
}
} catch(ArrayIndexOutOfBoundsException e){
break;
}
}
return -1;
}

@Override
public Iterable<String> predictiveSearch(String prefix) {
List<String> ret = new ArrayList<String>();
Expand Down
60 changes: 60 additions & 0 deletions trie4j/src/main/java/org/trie4j/louds/InlinedTailLOUDSTrie.java
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,66 @@ public int size() {
return size;
}

@Override
public int findShortestWord(CharSequence chars, int start, int end, StringBuilder word) {
TailCharIterator tci = new TailCharIterator(tails, -1);
for(int i = start; i < end; i++){
int nodeId = 0; // root
for(int j = i; j < end; j++){
int child = getChildNode(nodeId, chars.charAt(j));
if(child == -1) break;
tci.setIndex(tail[child]);
boolean found = true;
while(tci.hasNext()){
j++;
found = false;
if(j >= end) break;
if(chars.charAt(j) != tci.next()) break;
found = true;
}
if(!found) break;
if(term.get(child)){
word.append(chars, i, j + 1);
return i;
}
nodeId = child;
}
}
return -1;
}

@Override
public int findLongestWord(CharSequence chars, int start, int end, StringBuilder word) {
TailCharIterator tci = new TailCharIterator(tails, -1);
for(int i = start; i < end; i++){
int nodeId = 0; // root
int lastJ = -1;
for(int j = i; j < end; j++){
int child = getChildNode(nodeId, chars.charAt(j));
if(child == -1) break;
tci.setIndex(tail[child]);
boolean found = true;
while(tci.hasNext()){
j++;
found = false;
if(j >= end) break;
if(chars.charAt(j) != tci.next()) break;
found = true;
}
if(!found) break;
if(term.get(child)){
lastJ = j;
}
nodeId = child;
}
if(lastJ != -1){
word.append(chars, i, lastJ + 1);
return i;
}
}
return -1;
}

@Override
public Iterable<String> commonPrefixSearch(String query) {
List<String> ret = new ArrayList<String>();
Expand Down
62 changes: 62 additions & 0 deletions trie4j/src/main/java/org/trie4j/louds/TailLOUDSTrie.java
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,68 @@ public void dump(Writer writer) throws IOException{
writer.write("\n");
}

@Override
public int findShortestWord(CharSequence chars, int start, int end, StringBuilder word) {
Range r = new Range();
TailCharIterator tci = tailArray.newIterator();
for(int i = start; i < end; i++){
int nodeId = 0; // root
for(int j = i; j < end; j++){
int child = getChildNode(nodeId, chars.charAt(j), r);
if(child == -1) break;
tci.setIndex(tailArray.getIteratorOffset(child));
boolean found = true;
while(tci.hasNext()){
j++;
found = false;
if(j >= end) break;
if(chars.charAt(j) != tci.next()) break;
found = true;
}
if(!found) break;
if(term.get(child)){
word.append(chars, i, j + 1);
return i;
}
nodeId = child;
}
}
return -1;
}

@Override
public int findLongestWord(CharSequence chars, int start, int end, StringBuilder word) {
Range r = new Range();
TailCharIterator tci = tailArray.newIterator();
for(int i = start; i < end; i++){
int nodeId = 0; // root
int lastJ = -1;
for(int j = i; j < end; j++){
int child = getChildNode(nodeId, chars.charAt(j), r);
if(child == -1) break;
tci.setIndex(tailArray.getIteratorOffset(child));
boolean found = true;
while(tci.hasNext()){
j++;
found = false;
if(j >= end) break;
if(chars.charAt(j) != tci.next()) break;
found = true;
}
if(!found) break;
if(term.get(child)){
lastJ = j;
}
nodeId = child;
}
if(lastJ != -1){
word.append(chars, i, lastJ + 1);
return i;
}
}
return -1;
}

@Override
public Iterable<Pair<String, Integer>> commonPrefixSearchWithTermId(String query) {
List<Pair<String, Integer>> ret = new ArrayList<Pair<String, Integer>>();
Expand Down
38 changes: 36 additions & 2 deletions trie4j/src/main/java/org/trie4j/patricia/TailPatriciaTrie.java
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ public CharSequence getTails() {
}

@Override
public int findWord(CharSequence chars, int start, int end, StringBuilder word){
public int findShortestWord(CharSequence chars, int start, int end, StringBuilder word){
TailCharIterator it = new TailCharIterator(tails, -1);
for(int i = start; i < end; i++){
TailPatriciaTrieNode node = root;
Expand Down Expand Up @@ -155,7 +155,41 @@ public int findWord(CharSequence chars, int start, int end, StringBuilder word){
}
return -1;
}


@Override
public int findLongestWord(CharSequence chars, int start, int end, StringBuilder word){
TailCharIterator it = new TailCharIterator(tails, -1);
for(int i = start; i < end; i++){
TailPatriciaTrieNode node = root;
int lastJ = -1;
for(int j = i; j < end; j++){
node = node.getChild(chars.charAt(j));
if(node == null) break;
boolean matched = true;
it.setIndex(node.getTailIndex());
while(it.hasNext()){
j++;
if(j == end || chars.charAt(j) != it.next()){
matched = false;
break;
}
}
if(matched){
if(node.isTerminate()){
lastJ = j;
}
} else{
break;
}
}
if(lastJ != -1){
if(word != null) word.append(chars, i, lastJ + 1);
return i;
}
}
return -1;
}

@Override
public Iterable<String> commonPrefixSearch(final String query) {
if(query.length() == 0) return new ArrayList<String>(0);
Expand Down

0 comments on commit 589adb5

Please sign in to comment.