Skip to content

Commit

Permalink
Merge pull request apache#3 from apache/master
Browse files Browse the repository at this point in the history
pull from upstream finally
  • Loading branch information
desultir authored Aug 1, 2016
2 parents 4ed97f0 + d86c369 commit ce035ce
Show file tree
Hide file tree
Showing 752 changed files with 30,732 additions and 13,531 deletions.
39 changes: 32 additions & 7 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<project name="lucene-solr" default="-projecthelp" basedir=".">
<import file="lucene/common-build.xml"/>

<property name="jgit-version" value="4.2.0.201601211800-r"/>
<property name="jgit-version" value="4.4.1.201607150455-r"/>

<property name="tests.heap-dump-dir" location="heapdumps"/>

Expand Down Expand Up @@ -151,6 +151,7 @@
(~$/\$$Id\b/$) : 'svn keyword',
(~$/\$$Header\b/$) : 'svn keyword',
(~$/\$$Source\b/$) : 'svn keyword',
(~$/^\uFEFF/$) : 'UTF-8 byte order mark'
];
def baseDir = properties['validate.baseDir'];
Expand All @@ -165,10 +166,17 @@
}
def javadocsPattern = ~$/(?sm)^\Q/**\E(.*?)\Q*/\E/$;
def commentPattern = ~$/(?sm)^\Q/*\E(.*?)\Q*/\E/$;
def lineSplitter = ~$/[\r\n]+/$;
def licenseMatcher = Defaults.createDefaultMatcher();
def validLoggerPattern = ~$/(?s)\b(private\s|static\s|final\s){3}+\s*Logger\s+\p{javaJavaIdentifierStart}+\s+=\s+\QLoggerFactory.getLogger(MethodHandles.lookup().lookupClass());\E/$;
def packagePattern = ~$/(?m)^\s*package\s+org\.apache.*;/$;
def isLicense = { matcher, ratDocument ->
licenseMatcher.reset();
return lineSplitter.split(matcher.group(1)).any{ licenseMatcher.match(ratDocument, it) };
}
ant.fileScanner{
fileset(dir: baseDir){
extensions.each{
Expand Down Expand Up @@ -196,17 +204,32 @@
}
}
def javadocsMatcher = javadocsPattern.matcher(text);
def ratDocument = new FileDocument(f);
while (javadocsMatcher.find()) {
def ratDocument = new FileDocument(f);
licenseMatcher.reset();
if (lineSplitter.split(javadocsMatcher.group(1)).any{ licenseMatcher.match(ratDocument, it) }) {
if (isLicense(javadocsMatcher, ratDocument)) {
reportViolation(f, String.format(Locale.ENGLISH, 'javadoc-style license header [%s]',
ratDocument.getMetaData().value(MetaData.RAT_URL_LICENSE_FAMILY_NAME)));
}
}
if (f.toString().endsWith('.java') && text.contains('org.slf4j.LoggerFactory')) {
if (!validLoggerPattern.matcher(text).find()) {
reportViolation(f, 'invalid logging pattern [not private static final, uses static class name]');
if (f.toString().endsWith('.java')) {
if (text.contains('org.slf4j.LoggerFactory')) {
if (!validLoggerPattern.matcher(text).find()) {
reportViolation(f, 'invalid logging pattern [not private static final, uses static class name]');
}
}
def packageMatcher = packagePattern.matcher(text);
if (packageMatcher.find()) {
def packageStartPos = packageMatcher.start();
def commentMatcher = commentPattern.matcher(text);
while (commentMatcher.find()) {
if (isLicense(commentMatcher, ratDocument)) {
if (commentMatcher.start() < packageStartPos) {
break; // This file is all good, so break loop: license header precedes package definition
} else {
reportViolation(f, 'package declaration precedes license header');
}
}
}
}
}
};
Expand Down Expand Up @@ -739,13 +762,15 @@ Test args: [${args}]</echo>
<target name="jenkins-hourly">
<antcall>
<param name="is.jenkins.build" value="true"/>
<param name="tests.haltonfailure" value="false"/>
<target name="-jenkins-base"/>
</antcall>
</target>

<target name="jenkins-nightly">
<antcall>
<param name="is.jenkins.build" value="true"/>
<param name="tests.haltonfailure" value="false"/>
<param name="tests.nightly" value="true"/>
<target name="-jenkins-base"/>
</antcall>
Expand Down
1 change: 0 additions & 1 deletion dev-tools/idea/.idea/copyright/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions dev-tools/maven/lucene/analysis/common/pom.xml.template
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,18 @@
</excludes>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
7 changes: 7 additions & 0 deletions dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@
<url>${vc-browse-base-url};f=${module-directory}</url>
</scm>
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<!-- lucene-test-framework dependency must be declared before lucene-core -->
<!-- This dependency cannot be put into solr-parent, because local -->
Expand Down
5 changes: 1 addition & 4 deletions dev-tools/maven/solr/test-framework/pom.xml.template
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,7 @@
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
<resources>
<resource>
<directory>${module-path}</directory>
<excludes>
<exclude>**/*.java</exclude>
</excludes>
<directory>${module-path}/src/resources</directory>
</resource>
</resources>
<plugins>
Expand Down
94 changes: 94 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@ http://s.apache.org/luceneversions

======================= Lucene 7.0.0 =======================

API Changes

* LUCENE-2605: Classic QueryParser no longer splits on whitespace by default.
Use setSplitOnWhitespace(true) to get the old behavior. (Steve Rowe)

* LUCENE-7369: Similarity.coord and BooleanQuery.disableCoord are removed.
(Adrien Grand)

* LUCENE-7368: Removed query normalization. (Adrien Grand)

* LUCENE-7355: AnalyzingQueryParser has been removed as its functionality has
been folded into the classic QueryParser. (Adrien Grand)

Bug Fixes

Improvements
Expand All @@ -15,10 +28,19 @@ Other

* LUCENE-6968: LSH Filter (Tommaso Teofili, Andy Hind, Cao Manh Dat)

* LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward)

======================= Lucene 6.2.0 =======================

API Changes

* ScoringWrapperSpans was removed since it had no purpose or effect as of Lucene 5.5.

New Features

* LUCENE-7381: Add point based DoubleRangeField and RangeFieldQuery for
indexing and querying on Ranges up to 4 dimensions (Nick Knize)

* LUCENE-7302: IndexWriter methods that change the index now return a
long "sequence number" indicating the effective equivalent
single-threaded execution order (Mike McCandless)
Expand All @@ -31,10 +53,38 @@ New Features
analyzer for the Ukrainian language (Andriy Rysin via Mike
McCandless)

* LUCENE-7373: Directory.renameFile, which did both renaming and fsync
of the directory metadata, has been deprecated; use the new separate
methods Directory.rename and Directory.syncMetaData instead (Robert Muir,
Uwe Schindler, Mike McCandless)

* LUCENE-7355: Added Analyzer#normalize(), which only applies normalization to
an input string. (Adrien Grand)

* LUCENE-7380: Add Polygon.fromGeoJSON for more easily creating
Polygon instances from a standard GeoJSON string (Robert Muir, Mike
McCandless)

* SOLR-9279: Queries module: new ComparisonBoolFunction base class
(Doug Turnbull via David Smiley)

Bug Fixes

* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)

* LUCENE-7340: MemoryIndex.toString() could throw NPE; fixed. Renamed to toStringDebug().
(Daniel Collins, David Smiley)

* LUCENE-7382: Fix bug introduced by LUCENE-7355 that used the
wrong default AttributeFactory for new Tokenizers.
(Terry Smith, Uwe Schindler)

* LUCENE-7389: Fix FieldType.setDimensions(...) validation for the dimensionNumBytes
parameter. (Martijn van Groningen)

* LUCENE-7391: Fix performance regression in MemoryIndex's fields() introduced
in Lucene 6. (Steve Mason via David Smiley)

Improvements

* LUCENE-7323: Compound file writing now verifies the incoming
Expand Down Expand Up @@ -66,12 +116,48 @@ Improvements
and empty boolean queries now rewrite to MatchNoDocsQuery instead of
vice/versa (Jim Ferenczi via Mike McCandless)

* LUCENE-7359: Add equals() and hashCode() to Explanation (Alan Woodward)

* LUCENE-7353: ScandinavianFoldingFilterFactory and
ScandinavianNormalizationFilterFactory now implement MultiTermAwareComponent.
(Adrien Grand)

* LUCENE-2605: Add classic QueryParser option setSplitOnWhitespace() to
control whether to split on whitespace prior to text analysis. Default
behavior remains unchanged: split-on-whitespace=true. (Steve Rowe)

* LUCENE-7276: MatchNoDocsQuery now includes an optional reason for
why it was used (Jim Ferenczi via Mike McCandless)

* LUCENE-7355: AnalyzingQueryParser now only applies the subset of the analysis
chain that is about normalization for range/fuzzy/wildcard queries.
(Adrien Grand)

* LUCENE-7376: Add support for ToParentBlockJoinQuery to fast vector highlighter's
FieldQuery. (Martijn van Groningen)

* LUCENE-7385: Improve/fix assert messages in SpanScorer. (David Smiley)

* LUCENE-7393: Add ICUTokenizer option to parse Myanmar text as syllables instead of words,
because the ICU word-breaking algorithm has some issues. This allows for the previous
tokenization used before Lucene 5. (AM, Robert Muir)

Optimizations

* LUCENE-7330, LUCENE-7339: Speed up conjunction queries. (Adrien Grand)

* LUCENE-7356: SearchGroup tweaks. (Christine Poerschke)

* LUCENE-7351: Doc id compression for points. (Adrien Grand)

* LUCENE-7371: Point values are now better compressed using run-length
encoding. (Adrien Grand)

* LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
(Adrien Grand)

* LUCENE-7396: Faster flush of points. (Adrien Grand, Mike McCandless)

Other

* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
Expand All @@ -81,7 +167,15 @@ Other

* LUCENE-7346: Update forbiddenapis to version 2.2.
(Uwe Schindler)

* LUCENE-7360: Explanation.toHtml() is deprecated. (Alan Woodward)

* LUCENE-7372: Factor out an org.apache.lucene.search.FilterWeight class.
(Christine Poerschke, Adrien Grand, David Smiley)

* LUCENE-7384: Removed ScoringWrapperSpans. And tweaked SpanWeight.buildSimWeight() to
reuse the existing Similarity instead of creating a new one. (David Smiley)

======================= Lucene 6.1.0 =======================

New Features
Expand Down
33 changes: 33 additions & 0 deletions lucene/MIGRATE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,36 @@ yielding better compression ratios. In case you would still like to compress on
top of the codec, you can do it on the application side by using the utility
classes from the java.util.zip package.

## Explanation.toHtml() removed (LUCENE-7360)

Clients wishing to render Explanations as HTML should implement their own
utilities for this.

## Similarity.coord and BooleanQuery.disableCoord removed (LUCENE-7369)

Coordination factors were a workaround for the fact that the ClassicSimilarity
does not have strong enough term frequency saturation. This causes disjunctions
to get better scores on documents that have many occurrences of a few query
terms than on documents that match most clauses, which is most of time
undesirable. The new BM25Similarity does not suffer from this problem since it
has better saturation for the contribution of the term frequency so the coord
factors have been removed from scores. Things now work as if coords were always
disabled when constructing boolean queries.

## Weight.getValueForNormalization() and Weight.normalize() removed (LUCENE-7368)

Query normalization's goal was to make scores comparable across queries, which
was only implemented by the ClassicSimilarity. Since ClassicSimilarity is not
the default similarity anymore, this functionality has been removed. Boosts are
now propagated through Query#createWeight.

## AnalyzingQueryParser removed (LUCENE-7355)

The functionality of AnalyzingQueryParser has been folded into the classic
QueryParser, which now passes terms through Analyzer#normalize when generating
queries.

## CommonQueryParserConfiguration.setLowerCaseExpandedTerms removed (LUCENE-7355)

This option has been removed as expanded terms are now normalized through
Analyzer#normalize.
Original file line number Diff line number Diff line change
Expand Up @@ -143,5 +143,13 @@ protected TokenStreamComponents createComponents(String fieldName) {
}
return new TokenStreamComponents(source, new ArabicStemFilter(result));
}

@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new LowerCaseFilter(in);
result = new DecimalDigitFilter(result);
result = new ArabicNormalizationFilter(result);
return result;
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,11 @@ public TokenStreamComponents createComponents(String fieldName) {
result = new BulgarianStemFilter(result);
return new TokenStreamComponents(source, result);
}

@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -127,5 +127,12 @@ protected TokenStreamComponents createComponents(String fieldName) {
result = new SetKeywordMarkerFilter(result, excltable);
return new TokenStreamComponents(source, new BrazilianStemFilter(result));
}

@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,12 @@ protected TokenStreamComponents createComponents(String fieldName) {
result = new SnowballFilter(result, new CatalanStemmer());
return new TokenStreamComponents(source, result);
}

@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new LowerCaseFilter(result);
return result;
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.analysis.charfilter;

import java.io.IOException;
Expand Down
Loading

0 comments on commit ce035ce

Please sign in to comment.