Skip to content

Commit

Permalink
Lucene 9.0 (#52)
Browse files Browse the repository at this point in the history
* Update to lucene 9.0
* Update to grpc-java v1.43.2 and Protobuf to 3.19.2 to avoid CVE-2021-22569.
* Update mongo java driver to 4.4.0
* Add better logging for channel close
* Make sure to clean/close up internal connections
* General warning cleanup, package cleanup, and refactoring for simplicity
* Clear out the old closed nodes in the test helper
* Fix java 17 deprecation warning
* Update to gradle 7.4
* Add new zulia query parser based on org.apache.lucene.queryparser.flexible.standard
  * New parser supports  minimum should match added via (term term2 term3)~n syntax
  * New parser supports  multiple field search via field1,field2:...
  * New parser supports GTE/LTE/GT/LT operators like pubYear>=2020
  * Legacy parser is still available with legacy option on Query
  * Legacy parser supports dismax but new one does not yet
* new boolean facet handling maps to True/False only from true/t/y/yes f/false/n/no
* change boolean sort to be numeric based doc values
* Ensure field name cannot contain a comma
* Add taxonomy stats support for a boolean by treating it like 0,1
  • Loading branch information
mdavis95 authored Feb 26, 2022
1 parent 2ddc0f3 commit b56eb01
Show file tree
Hide file tree
Showing 77 changed files with 6,436 additions and 883 deletions.
3 changes: 3 additions & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ configure<ReckonExtension> {
allprojects {
group = "io.zulia"
}
apply {
from("javacc.gradle")
}

defaultTasks("build")
subprojects {
Expand Down
6 changes: 3 additions & 3 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
org.gradle.jvmargs=-Djavax.xml.accessExternalSchema=all
luceneVersion=8.9.0
luceneVersion=9.0.0
mongoDriverVersion=4.4.0
grpcVersion=1.40.1
protobufVersion=3.17.3
grpcVersion=1.43.2
protobufVersion=3.19.2
micronautVersion=3.2.3
okHttpVersion=4.9.3
gsonVersion=2.8.9
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
294 changes: 294 additions & 0 deletions javacc.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
import java.nio.charset.Charset
import java.util.function.Function

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// This adds javacc generation support.

configure(rootProject) {
configurations {
javacc
}

dependencies {
javacc "net.java.dev.javacc:javacc:7.0.4"
}

repositories {
mavenCentral()
}

task javacc() {
description "Regenerate sources for corresponding javacc grammar files."
group "generation"

dependsOn allprojects.collect { prj -> prj.tasks.withType(JavaCCTask) }
}

ext {
/**
* Utility function to read a file, apply changes to its content and write it back.
*/
modifyFile = { File path, Function<String, String> modify ->
Function<String, String> normalizeEols = { text -> text.replace("\r\n", "\n") }
modify = normalizeEols.andThen(modify).andThen(normalizeEols)

String original = path.getText("UTF-8")
String modified = modify.apply(original)
if (!original.equals(modified)) {
path.write(modified, "UTF-8")
}
}
}
}

def commonCleanups = { FileTree generatedFiles ->
// This is a minor typo in a comment that nonetheless people have hand-corrected in the past.
generatedFiles.matching({ include "CharStream.java" }).each { file ->
modifyFile(file, { text ->
return text.replace(
"implemetation",
"implementation");
})
}

generatedFiles.each { file ->
modifyFile(file, { text ->
// Normalize EOLs and tabs (EOLs are a side-effect of modifyFile).
text = text.replace("\t", " ");
text = text.replaceAll("JavaCC - OriginalChecksum=[^*]+", "(filtered)")
text = text.replace("StringBuffer", "StringBuilder")
return text
})
}

generatedFiles.matching({ include "*TokenManager.java" }).each { file ->
modifyFile(file, { text ->
// Remove redundant imports.
text = text.replaceAll(
/(?m)^import .+/,
"")
// Add CharStream imports.
text = text.replaceAll(
/package (.+)/,
'''
package $1
import org.apache.lucene.queryparser.charstream.CharStream;
'''.trim())
// Eliminates redundant cast message.
text = text.replace(
"int hiByte = (int)(curChar >> 8);",
"int hiByte = curChar >> 8;")
// Access to forbidden APIs.
text = text.replace(
"public java.io.PrintStream debugStream = System.out;",
"// (debugStream omitted).")
text = text.replace(
"public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; }",
"// (setDebugStream omitted).")
text = text.replace(
"public class QueryParserTokenManager ",
'@SuppressWarnings("unused") public class QueryParserTokenManager ')
text = text.replace(
"public class StandardSyntaxParserTokenManager ",
'@SuppressWarnings("unused") public class StandardSyntaxParserTokenManager ')
return text
})
}
}

configure(project(":zulia-query-parser")) {

task javaccParserZuliaFlexibleInternal(type: JavaCCTask) {
description "Regenerate flexible query parser from ZuliaSyntaxParser.jj"
group "generation"

javaccFile = file('src/main/java/io/zulia/server/search/queryparser/parser/ZuliaSyntaxParser.jj')

afterGenerate << commonCleanups
afterGenerate << { FileTree generatedFiles ->
generatedFiles.matching { include "ParseException.java" }.each { file ->
modifyFile(file, { text ->
// Modify constructor.
text = text.replace(
"class ParseException extends Exception",
"class ParseException extends QueryNodeParseException")

// Modify imports.
text = text.replace(
"package org.apache.lucene.queryparser.flexible.zulia.parser;", '''\
package org.apache.lucene.queryparser.flexible.zulia.parser;
import org.apache.lucene.queryparser.flexible.messages.*;
import org.apache.lucene.queryparser.flexible.core.*;
import org.apache.lucene.queryparser.flexible.core.messages.*;
''')

// Modify constructors and code bits
text = text.replaceAll(
/(?s)[ ]*public ParseException\(Token currentTokenVal[^}]+[}]/, '''\
public ParseException(Token currentTokenVal,
int[][] expectedTokenSequencesVal, String[] tokenImageVal)
{
super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, initialise(
currentTokenVal, expectedTokenSequencesVal, tokenImageVal)));
this.currentToken = currentTokenVal;
this.expectedTokenSequences = expectedTokenSequencesVal;
this.tokenImage = tokenImageVal;
}
''')

text = text.replaceAll(
/(?s)[ ]*public ParseException\(String message\)[^}]+[}]/, '''\
public ParseException(Message message)
{
super(message);
}
''')

text = text.replaceAll(
/(?s)[ ]*public ParseException\(\)[^}]+[}]/, '''\
public ParseException()
{
super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, "Error"));
}
''')
return text
})
}

generatedFiles.matching { include "ZuliaSyntaxParser.java" }.each { file ->
modifyFile(file, { text ->
// Remove redundant cast
text = text.replace(
"new java.util.ArrayList<int[]>",
"new java.util.ArrayList<>")
text = text.replace(
"new ArrayList<QueryNode>()",
"new ArrayList<>()")
text = text.replace(
"Collections.<QueryNode> singletonList",
"Collections.singletonList")
text = text.replace(
"public class StandardSyntaxParser ",
'@SuppressWarnings({"unused","null"}) public class StandardSyntaxParser ')
return text
})
}
}
}

task javacc() {
description "Regenerate query parsers (javacc syntax definitions)."
group "generation"
}

}

// We always regenerate, no need to declare outputs.
class JavaCCTask extends DefaultTask {
@InputFile
File javaccFile

/**
* Apply closures to all generated files before they're copied back
* to mainline code.
*/
// A subtle bug here is that this makes it not an input... should be a list of replacements instead?
@Internal
List<Closure<FileTree>> afterGenerate = new ArrayList<>()

@OutputFiles
List<File> getGeneratedSources() {
// Return the list of generated files.
def baseDir = javaccFile.parentFile
def baseName = javaccFile.name.replace(".jj", "")

return [
project.file("${baseDir}/${baseName}.java"),
project.file("${baseDir}/${baseName}Constants.java"),
project.file("${baseDir}/${baseName}TokenManager.java"),
project.file("${baseDir}/ParseException.java"),
project.file("${baseDir}/Token.java"),
project.file("${baseDir}/TokenMgrError.java")
]
}

JavaCCTask() {
dependsOn(project.rootProject.configurations.javacc)
}

@TaskAction
def generate() {
if (!javaccFile || !javaccFile.exists()) {
throw new GradleException("Input file does not exist: ${javaccFile}")
}

// Run javacc generation into temporary folder so that we know all the generated files
// and can post-process them easily.
def tempDir = this.getTemporaryDir()
tempDir.mkdirs()
project.delete project.fileTree(tempDir, { include: "**/*.java" })

def targetDir = javaccFile.parentFile
logger.lifecycle("Recompiling JavaCC: ${project.rootDir.relativePath(javaccFile)}")

def output = new ByteArrayOutputStream()
def result = project.javaexec {
classpath {
project.rootProject.configurations.javacc
}

ignoreExitValue = true
standardOutput = output
errorOutput = output

main = "org.javacc.parser.Main"
args += [
"-OUTPUT_DIRECTORY=${tempDir}",
javaccFile
]
}

// Unless we request verbose logging, don't emit javacc output.
if (result.exitValue != 0) {
throw new GradleException("JavaCC failed to compile ${javaccFile}, here is the compilation output:\n${output}")
}

// Make sure we don't have warnings.
if (output.toString(Charset.defaultCharset()).contains("Warning:")) {
throw new GradleException("JavaCC emitted warnings for ${javaccFile}, here is the compilation output:\n${output}")
}

// Apply any custom modifications.
def generatedFiles = project.fileTree(tempDir)

afterGenerate.each { closure ->
closure.call(generatedFiles)
}

// Copy back to mainline sources.
project.copy {
from tempDir
into targetDir

// We don't need CharStream interface as we redirect to our own.
exclude "CharStream.java"
}
}
}
2 changes: 1 addition & 1 deletion zulia-analyzer/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ defaultTasks("build", "installDist")
dependencies {
api(project(":zulia-common"))

api("org.apache.lucene:lucene-analyzers-common:$luceneVersion")
api("org.apache.lucene:lucene-analysis-common:$luceneVersion")
api("info.debatty:java-lsh:0.12")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import io.zulia.client.command.base.SimpleCommand;
import io.zulia.client.pool.ZuliaConnection;
import io.zulia.client.result.BatchDeleteResult;
import io.zulia.client.result.QueryResult;
import io.zulia.client.result.SearchResult;
import io.zulia.message.ZuliaServiceOuterClass;
import io.zulia.message.ZuliaServiceOuterClass.DeleteResponse;

Expand All @@ -28,7 +28,7 @@ public BatchDelete addDelete(Delete delete) {
return this;
}

public BatchDelete deleteDocumentFromQueryResult(QueryResult queryResult) {
public BatchDelete deleteDocumentFromQueryResult(SearchResult queryResult) {

for (ScoredResult sr : queryResult.getResults()) {
Delete delete = new DeleteDocument(sr.getUniqueId(), sr.getIndexName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import io.zulia.client.command.base.SimpleCommand;
import io.zulia.client.pool.ZuliaConnection;
import io.zulia.client.result.BatchFetchResult;
import io.zulia.client.result.QueryResult;
import io.zulia.client.result.SearchResult;
import io.zulia.message.ZuliaQuery;
import io.zulia.message.ZuliaServiceGrpc.ZuliaServiceBlockingStub;
import io.zulia.message.ZuliaServiceOuterClass.BatchFetchRequest;
Expand Down Expand Up @@ -46,7 +46,7 @@ public BatchFetch addFetchDocumentsFromUniqueIds(Collection<String> uniqueIds, S
return this;
}

public BatchFetch addFetchDocumentsFromResults(QueryResult qr) {
public BatchFetch addFetchDocumentsFromResults(SearchResult qr) {
return addFetchDocumentsFromResults(qr.getResults());
}

Expand Down
Loading

0 comments on commit b56eb01

Please sign in to comment.