From e3d0af9e54c3177d3dba40897722fbb83cbf0c84 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 31 Jan 2024 15:27:56 +0100 Subject: [PATCH] Modernize BWC testing with parameterized tests (#13046) This change modernizes the BWC tests to leverage RandomizedRunners Parameterized Tests that allow us to have more structured and hopefully more extendible BWC tests in the future. This change doesn't add any new tests but tries to make the ones we have more structured and support growth down the road. Basically, every index type got it's own Test class that doesn't require to loop over all the indices in each test. Each test case is run with all versions specified. Several sanity checks are applied in the base class to make individual tests smaller and much easier to read. Co-authored-by: Michael McCandless Co-authored-by: Adrien Grand --- dev-tools/scripts/addBackcompatIndexes.py | 8 +- .../BackwardsCompatibilityTestBase.java | 253 ++ .../TestAncientIndicesCompatibility.java | 392 +++ .../TestBackwardsCompatibility.java | 2505 ----------------- .../TestBasicBackwardsCompatibility.java | 899 ++++++ .../TestBinaryBackwardsCompatibility.java | 87 + .../TestDVUpdateBackwardsCompatibility.java | 269 ++ .../TestEmptyIndexBackwardsCompatibility.java | 66 + .../TestGenerateBwcIndices.java | 125 + .../TestIndexSortBackwardsCompatibility.java | 230 ++ ...estIndexUpgradeBackwardsCompatibility.java | 255 ++ .../TestMoreTermsBackwardsCompatibility.java | 96 + 12 files changed, 2676 insertions(+), 2509 deletions(-) create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java delete mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBinaryBackwardsCompatibility.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestDVUpdateBackwardsCompatibility.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestEmptyIndexBackwardsCompatibility.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexUpgradeBackwardsCompatibility.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestMoreTermsBackwardsCompatibility.java diff --git a/dev-tools/scripts/addBackcompatIndexes.py b/dev-tools/scripts/addBackcompatIndexes.py index 3d05d85a52b7..0be87ff7a55d 100755 --- a/dev-tools/scripts/addBackcompatIndexes.py +++ b/dev-tools/scripts/addBackcompatIndexes.py @@ -71,7 +71,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp '-Ptests.useSecurityManager=false', '-p lucene/%s' % module, 'test', - '--tests TestBackwardsCompatibility.%s' % test, + '--tests TestGenerateBwcIndices.%s' % test, '-Dtests.bwcdir=%s' % temp_dir, '-Dtests.codec=default' ]) @@ -99,7 +99,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp def update_backcompat_tests(types, index_version, current_version): print(' adding new indexes %s to backcompat tests...' % types, end='', flush=True) module = 'lucene/backward-codecs' - filename = '%s/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java' % module + filename = '%s/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java' % module if not current_version.is_back_compat_with(index_version): matcher = re.compile(r'final String\[\] unsupportedNames = {|};') elif 'sorted' in types: @@ -165,7 +165,7 @@ def __call__(self, buffer, match, line): def check_backcompat_tests(): print(' checking backcompat tests...', end='', flush=True) - scriptutil.run('./gradlew -p lucene/backward-codecs test --tests TestBackwardsCompatibility') + scriptutil.run('./gradlew -p lucene/backward-codecs test --tests TestGenerateBwcIndices') print('ok') def download_from_cdn(version, remotename, localname): @@ -248,7 +248,7 @@ def main(): create_and_add_index(source, 'moreterms', c.version, current_version, c.temp_dir) create_and_add_index(source, 'dvupdates', c.version, current_version, c.temp_dir) create_and_add_index(source, 'emptyIndex', c.version, current_version, c.temp_dir) - print ('\nMANUAL UPDATE REQUIRED: edit TestBackwardsCompatibility to enable moreterms, dvupdates, and empty index testing') + print ('\nMANUAL UPDATE REQUIRED: edit TestGenerateBwcIndices to enable moreterms, dvupdates, and empty index testing') print('\nAdding backwards compatibility tests') update_backcompat_tests(['cfs', 'nocfs'], c.version, current_version) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java new file mode 100644 index 000000000000..97592854f6bd --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; +import org.junit.After; +import org.junit.Before; + +public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase { + + protected final Version version; + private static final Version LATEST_PREVIOUS_MAJOR = getLatestPreviousMajorVersion(); + protected final String indexPattern; + protected static final Set BINARY_SUPPORTED_VERSIONS; + + static { + String[] oldVersions = + new String[] { + "8.0.0", "8.0.0", "8.1.0", "8.1.0", "8.1.1", "8.1.1", "8.2.0", "8.2.0", "8.3.0", "8.3.0", + "8.3.1", "8.3.1", "8.4.0", "8.4.0", "8.4.1", "8.4.1", "8.5.0", "8.5.0", "8.5.1", "8.5.1", + "8.5.2", "8.5.2", "8.6.0", "8.6.0", "8.6.1", "8.6.1", "8.6.2", "8.6.2", "8.6.3", "8.6.3", + "8.7.0", "8.7.0", "8.8.0", "8.8.0", "8.8.1", "8.8.1", "8.8.2", "8.8.2", "8.9.0", "8.9.0", + "8.10.0", "8.10.0", "8.10.1", "8.10.1", "8.11.0", "8.11.0", "8.11.1", "8.11.1", "8.11.2", + "8.11.2", "8.12.0", "9.0.0", "9.1.0", "9.2.0", "9.3.0", "9.4.0", "9.4.1", "9.4.2", + "9.5.0", "9.6.0", "9.7.0", "9.8.0", "9.9.0", "9.9.1", "9.9.2", "9.10.0" + }; + + Set binaryVersions = new HashSet<>(); + for (String version : oldVersions) { + try { + Version v = Version.parse(version); + assertTrue("Unsupported binary version: " + v, v.major >= Version.MIN_SUPPORTED_MAJOR - 1); + binaryVersions.add(v); + } catch (ParseException ex) { + throw new RuntimeException(ex); + } + } + List allCurrentVersions = getAllCurrentVersions(); + for (Version version : allCurrentVersions) { + // make sure we never miss a version. + assertTrue("Version: " + version + " missing", binaryVersions.remove(version)); + } + BINARY_SUPPORTED_VERSIONS = binaryVersions; + } + + /** + * This is a base constructor for parameterized BWC tests. The constructor arguments are provided + * by {@link com.carrotsearch.randomizedtesting.RandomizedRunner} during test execution. A {@link + * com.carrotsearch.randomizedtesting.annotations.ParametersFactory} specified in a subclass + * provides a list lists of arguments for the tests and RandomizedRunner will execute the test for + * each of the argument list. + * + * @param version the version this test should run for + * @param indexPattern an index pattern in order to open an index of see {@link + * #createPattern(String, String)} + */ + protected BackwardsCompatibilityTestBase( + @Name("version") Version version, @Name("pattern") String indexPattern) { + this.version = version; + this.indexPattern = indexPattern; + } + + Directory directory; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + assertNull( + "Index name " + version + " should not exist found", + TestAncientIndicesCompatibility.class.getResourceAsStream( + indexName(LATEST_PREVIOUS_MAJOR))); + if (supportsVersion(version) == false) { + assertNull( + "Index name " + version + " should not exist found", + TestAncientIndicesCompatibility.class.getResourceAsStream(indexName(version))); + } + assumeTrue("This test doesn't support version: " + version, supportsVersion(version)); + if (version.equals(Version.LATEST)) { + directory = newDirectory(); + createIndex(directory); + } else { + Path dir = createTempDir(); + InputStream resource = + TestAncientIndicesCompatibility.class.getResourceAsStream(indexName(version)); + assertNotNull("Index name " + version + " not found: " + indexName(version), resource); + TestUtil.unzip(resource, dir); + directory = newFSDirectory(dir); + } + verifyUsesDefaultCodec(directory, indexName(version)); + } + + @Override + @After + public void tearDown() throws Exception { + super.tearDown(); + IOUtils.close(directory); + directory = null; + } + + private static Version getLatestPreviousMajorVersion() { + Version lastPrevMajorVersion = null; + for (Version v : getAllCurrentVersions()) { + if (v.major == Version.LATEST.major - 1 + && (lastPrevMajorVersion == null || v.onOrAfter(lastPrevMajorVersion))) { + lastPrevMajorVersion = v; + } + } + return lastPrevMajorVersion; + } + + /** + * Creates an index pattern of the form '$name.$version$suffix.zip' where version is filled in + * afterward via {@link String#formatted(Object...)} during the test runs. + * + * @param name name of the index + * @param suffix index suffix ie. '-cfs' + */ + static String createPattern(String name, String suffix) { + return name + ".%1$s" + suffix + ".zip"; + } + + public static List getAllCurrentVersions() { + Pattern constantPattern = Pattern.compile("LUCENE_(\\d+)_(\\d+)_(\\d+)(_ALPHA|_BETA)?"); + List versions = new ArrayList<>(); + for (Field field : Version.class.getDeclaredFields()) { + if (Modifier.isStatic(field.getModifiers()) && field.getType() == Version.class) { + Matcher constant = constantPattern.matcher(field.getName()); + Version v; + try { + v = (Version) field.get(Version.class); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + if (constant.matches() == false) { + continue; + } + versions.add(v); + } + } + return versions; + } + + public static Iterable allVersion(String name, String... suffixes) { + List patterns = new ArrayList<>(); + for (String suffix : suffixes) { + patterns.add(createPattern(name, suffix)); + } + List versionAndPatterns = new ArrayList<>(); + List versionList = getAllCurrentVersions(); + for (Version v : versionList) { + if (v.equals(LATEST_PREVIOUS_MAJOR) + == false) { // the latest prev-major has not yet been released + for (Object p : patterns) { + versionAndPatterns.add(new Object[] {v, p}); + } + } + } + return versionAndPatterns; + } + + public String indexName(Version version) { + return String.format(Locale.ROOT, indexPattern, version); + } + + protected boolean supportsVersion(Version version) { + return true; + } + + protected abstract void createIndex(Directory directory) throws IOException; + + public final void createBWCIndex() throws IOException { + Path indexDir = getIndexDir().resolve(indexName(Version.LATEST)); + Files.deleteIfExists(indexDir); + try (Directory dir = newFSDirectory(indexDir)) { + createIndex(dir); + } + } + + private Path getIndexDir() { + String path = System.getProperty("tests.bwcdir"); + assumeTrue( + "backcompat creation tests must be run with -Dtests.bwcdir=/path/to/write/indexes", + path != null); + return Paths.get(path); + } + + void verifyUsesDefaultCodec(Directory dir, String name) throws IOException { + DirectoryReader r = DirectoryReader.open(dir); + for (LeafReaderContext context : r.leaves()) { + SegmentReader air = (SegmentReader) context.reader(); + Codec codec = air.getSegmentInfo().info.getCodec(); + assertTrue( + "codec used in " + + name + + " (" + + codec.getName() + + ") is not a default codec (does not begin with Lucene)", + codec.getName().startsWith("Lucene")); + } + r.close(); + } + + // encodes a long into a BytesRef as VLong so that we get varying number of bytes when we update + static BytesRef toBytes(long value) { + BytesRef bytes = new BytesRef(10); // negative longs may take 10 bytes + while ((value & ~0x7FL) != 0L) { + bytes.bytes[bytes.length++] = (byte) ((value & 0x7FL) | 0x80L); + value >>>= 7; + } + bytes.bytes[bytes.length++] = (byte) value; + return bytes; + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java new file mode 100644 index 000000000000..e7b48fee25a3 --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java @@ -0,0 +1,392 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintStream; +import java.nio.file.Path; +import org.apache.lucene.index.CheckIndex; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexFormatTooOldException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.store.BaseDirectoryWrapper; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; + +@SuppressWarnings("deprecation") +public class TestAncientIndicesCompatibility extends LuceneTestCase { + + static final String[] unsupportedNames = { + "1.9.0-cfs", + "1.9.0-nocfs", + "2.0.0-cfs", + "2.0.0-nocfs", + "2.1.0-cfs", + "2.1.0-nocfs", + "2.2.0-cfs", + "2.2.0-nocfs", + "2.3.0-cfs", + "2.3.0-nocfs", + "2.4.0-cfs", + "2.4.0-nocfs", + "2.4.1-cfs", + "2.4.1-nocfs", + "2.9.0-cfs", + "2.9.0-nocfs", + "2.9.1-cfs", + "2.9.1-nocfs", + "2.9.2-cfs", + "2.9.2-nocfs", + "2.9.3-cfs", + "2.9.3-nocfs", + "2.9.4-cfs", + "2.9.4-nocfs", + "3.0.0-cfs", + "3.0.0-nocfs", + "3.0.1-cfs", + "3.0.1-nocfs", + "3.0.2-cfs", + "3.0.2-nocfs", + "3.0.3-cfs", + "3.0.3-nocfs", + "3.1.0-cfs", + "3.1.0-nocfs", + "3.2.0-cfs", + "3.2.0-nocfs", + "3.3.0-cfs", + "3.3.0-nocfs", + "3.4.0-cfs", + "3.4.0-nocfs", + "3.5.0-cfs", + "3.5.0-nocfs", + "3.6.0-cfs", + "3.6.0-nocfs", + "3.6.1-cfs", + "3.6.1-nocfs", + "3.6.2-cfs", + "3.6.2-nocfs", + "4.0.0-cfs", + "4.0.0-cfs", + "4.0.0-nocfs", + "4.0.0.1-cfs", + "4.0.0.1-nocfs", + "4.0.0.2-cfs", + "4.0.0.2-nocfs", + "4.1.0-cfs", + "4.1.0-nocfs", + "4.2.0-cfs", + "4.2.0-nocfs", + "4.2.1-cfs", + "4.2.1-nocfs", + "4.3.0-cfs", + "4.3.0-nocfs", + "4.3.1-cfs", + "4.3.1-nocfs", + "4.4.0-cfs", + "4.4.0-nocfs", + "4.5.0-cfs", + "4.5.0-nocfs", + "4.5.1-cfs", + "4.5.1-nocfs", + "4.6.0-cfs", + "4.6.0-nocfs", + "4.6.1-cfs", + "4.6.1-nocfs", + "4.7.0-cfs", + "4.7.0-nocfs", + "4.7.1-cfs", + "4.7.1-nocfs", + "4.7.2-cfs", + "4.7.2-nocfs", + "4.8.0-cfs", + "4.8.0-nocfs", + "4.8.1-cfs", + "4.8.1-nocfs", + "4.9.0-cfs", + "4.9.0-nocfs", + "4.9.1-cfs", + "4.9.1-nocfs", + "4.10.0-cfs", + "4.10.0-nocfs", + "4.10.1-cfs", + "4.10.1-nocfs", + "4.10.2-cfs", + "4.10.2-nocfs", + "4.10.3-cfs", + "4.10.3-nocfs", + "4.10.4-cfs", + "4.10.4-nocfs", + "5x-with-4x-segments-cfs", + "5x-with-4x-segments-nocfs", + "5.0.0.singlesegment-cfs", + "5.0.0.singlesegment-nocfs", + "5.0.0-cfs", + "5.0.0-nocfs", + "5.1.0-cfs", + "5.1.0-nocfs", + "5.2.0-cfs", + "5.2.0-nocfs", + "5.2.1-cfs", + "5.2.1-nocfs", + "5.3.0-cfs", + "5.3.0-nocfs", + "5.3.1-cfs", + "5.3.1-nocfs", + "5.3.2-cfs", + "5.3.2-nocfs", + "5.4.0-cfs", + "5.4.0-nocfs", + "5.4.1-cfs", + "5.4.1-nocfs", + "5.5.0-cfs", + "5.5.0-nocfs", + "5.5.1-cfs", + "5.5.1-nocfs", + "5.5.2-cfs", + "5.5.2-nocfs", + "5.5.3-cfs", + "5.5.3-nocfs", + "5.5.4-cfs", + "5.5.4-nocfs", + "5.5.5-cfs", + "5.5.5-nocfs", + "6.0.0-cfs", + "6.0.0-nocfs", + "6.0.1-cfs", + "6.0.1-nocfs", + "6.1.0-cfs", + "6.1.0-nocfs", + "6.2.0-cfs", + "6.2.0-nocfs", + "6.2.1-cfs", + "6.2.1-nocfs", + "6.3.0-cfs", + "6.3.0-nocfs", + "6.4.0-cfs", + "6.4.0-nocfs", + "6.4.1-cfs", + "6.4.1-nocfs", + "6.4.2-cfs", + "6.4.2-nocfs", + "6.5.0-cfs", + "6.5.0-nocfs", + "6.5.1-cfs", + "6.5.1-nocfs", + "6.6.0-cfs", + "6.6.0-nocfs", + "6.6.1-cfs", + "6.6.1-nocfs", + "6.6.2-cfs", + "6.6.2-nocfs", + "6.6.3-cfs", + "6.6.3-nocfs", + "6.6.4-cfs", + "6.6.4-nocfs", + "6.6.5-cfs", + "6.6.5-nocfs", + "6.6.6-cfs", + "6.6.6-nocfs", + "7.0.0-cfs", + "7.0.0-nocfs", + "7.0.1-cfs", + "7.0.1-nocfs", + "7.1.0-cfs", + "7.1.0-nocfs", + "7.2.0-cfs", + "7.2.0-nocfs", + "7.2.1-cfs", + "7.2.1-nocfs", + "7.3.0-cfs", + "7.3.0-nocfs", + "7.3.1-cfs", + "7.3.1-nocfs", + "7.4.0-cfs", + "7.4.0-nocfs", + "7.5.0-cfs", + "7.5.0-nocfs", + "7.6.0-cfs", + "7.6.0-nocfs", + "7.7.0-cfs", + "7.7.0-nocfs", + "7.7.1-cfs", + "7.7.1-nocfs", + "7.7.2-cfs", + "7.7.2-nocfs", + "7.7.3-cfs", + "7.7.3-nocfs" + }; + + /** + * This test checks that *only* IndexFormatTooOldExceptions are thrown when you open and operate + * on too old indexes! + */ + public void testUnsupportedOldIndexes() throws Exception { + for (int i = 0; i < unsupportedNames.length; i++) { + if (VERBOSE) { + System.out.println("TEST: index " + unsupportedNames[i]); + } + Path oldIndexDir = createTempDir(unsupportedNames[i]); + TestUtil.unzip( + getDataInputStream("unsupported." + unsupportedNames[i] + ".zip"), oldIndexDir); + BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir); + // don't checkindex, these are intentionally not supported + dir.setCheckIndexOnClose(false); + + IndexReader reader = null; + IndexWriter writer = null; + try { + reader = DirectoryReader.open(dir); + fail("DirectoryReader.open should not pass for " + unsupportedNames[i]); + } catch (IndexFormatTooOldException e) { + if (e.getReason() != null) { + assertNull(e.getVersion()); + assertNull(e.getMinVersion()); + assertNull(e.getMaxVersion()); + assertEquals( + e.getMessage(), + new IndexFormatTooOldException(e.getResourceDescription(), e.getReason()) + .getMessage()); + } else { + assertNotNull(e.getVersion()); + assertNotNull(e.getMinVersion()); + assertNotNull(e.getMaxVersion()); + assertTrue(e.getMessage(), e.getMaxVersion() >= e.getMinVersion()); + assertTrue( + e.getMessage(), + e.getMaxVersion() < e.getVersion() || e.getVersion() < e.getMinVersion()); + assertEquals( + e.getMessage(), + new IndexFormatTooOldException( + e.getResourceDescription(), + e.getVersion(), + e.getMinVersion(), + e.getMaxVersion()) + .getMessage()); + } + // pass + if (VERBOSE) { + System.out.println("TEST: got expected exc:"); + e.printStackTrace(System.out); + } + } finally { + if (reader != null) reader.close(); + } + + try { + writer = + new IndexWriter( + dir, newIndexWriterConfig(new MockAnalyzer(random())).setCommitOnClose(false)); + fail("IndexWriter creation should not pass for " + unsupportedNames[i]); + } catch (IndexFormatTooOldException e) { + if (e.getReason() != null) { + assertNull(e.getVersion()); + assertNull(e.getMinVersion()); + assertNull(e.getMaxVersion()); + assertEquals( + e.getMessage(), + new IndexFormatTooOldException(e.getResourceDescription(), e.getReason()) + .getMessage()); + } else { + assertNotNull(e.getVersion()); + assertNotNull(e.getMinVersion()); + assertNotNull(e.getMaxVersion()); + assertTrue(e.getMessage(), e.getMaxVersion() >= e.getMinVersion()); + assertTrue( + e.getMessage(), + e.getMaxVersion() < e.getVersion() || e.getVersion() < e.getMinVersion()); + assertEquals( + e.getMessage(), + new IndexFormatTooOldException( + e.getResourceDescription(), + e.getVersion(), + e.getMinVersion(), + e.getMaxVersion()) + .getMessage()); + } + // pass + if (VERBOSE) { + System.out.println("TEST: got expected exc:"); + e.printStackTrace(System.out); + } + // Make sure exc message includes a path= + assertTrue("got exc message: " + e.getMessage(), e.getMessage().contains("path=\"")); + } finally { + // we should fail to open IW, and so it should be null when we get here. + // However, if the test fails (i.e., IW did not fail on open), we need + // to close IW. However, if merges are run, IW may throw + // IndexFormatTooOldException, and we don't want to mask the fail() + // above, so close without waiting for merges. + if (writer != null) { + try { + writer.commit(); + } finally { + writer.close(); + } + } + } + + ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); + CheckIndex checker = new CheckIndex(dir); + checker.setInfoStream(new PrintStream(bos, false, UTF_8)); + CheckIndex.Status indexStatus = checker.checkIndex(); + if (unsupportedNames[i].startsWith("7.")) { + assertTrue(indexStatus.clean); + } else { + assertFalse(indexStatus.clean); + // CheckIndex doesn't enforce a minimum version, so we either get an + // IndexFormatTooOldException + // or an IllegalArgumentException saying that the codec doesn't exist. + boolean formatTooOld = + bos.toString(UTF_8).contains(IndexFormatTooOldException.class.getName()); + boolean missingCodec = bos.toString(UTF_8).contains("Could not load codec"); + assertTrue(formatTooOld || missingCodec); + } + checker.close(); + + dir.close(); + } + } + + // #12895: test on a carefully crafted 9.8.0 index (from a small contiguous subset + // of wikibigall unique terms) that shows the read-time exception of + // IntersectTermsEnum (used by WildcardQuery) + public void testWildcardQueryExceptions990() throws IOException { + Path path = createTempDir("12895"); + + String name = "index.12895.9.8.0.zip"; + InputStream resource = TestAncientIndicesCompatibility.class.getResourceAsStream(name); + assertNotNull("missing zip file to reproduce #12895", resource); + TestUtil.unzip(resource, path); + + try (Directory dir = newFSDirectory(path); + DirectoryReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = new IndexSearcher(reader); + + searcher.count(new WildcardQuery(new Term("field", "*qx*"))); + } + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java deleted file mode 100644 index 5a57d0a817f9..000000000000 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java +++ /dev/null @@ -1,2505 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.backward_index; - -import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; -import static org.apache.lucene.util.Version.LUCENE_9_0_0; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.PrintStream; -import java.lang.reflect.Modifier; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.text.ParsePosition; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Random; -import java.util.Set; -import java.util.TimeZone; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.document.BinaryDocValuesField; -import org.apache.lucene.document.BinaryPoint; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.DoubleDocValuesField; -import org.apache.lucene.document.DoublePoint; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.FloatDocValuesField; -import org.apache.lucene.document.FloatPoint; -import org.apache.lucene.document.IntPoint; -import org.apache.lucene.document.KnnFloatVectorField; -import org.apache.lucene.document.LongPoint; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.SortedDocValuesField; -import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.document.SortedSetDocValuesField; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.CheckIndex; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.FloatVectorValues; -import org.apache.lucene.index.IndexCommit; -import org.apache.lucene.index.IndexFormatTooOldException; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexUpgrader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.LogByteSizeMergePolicy; -import org.apache.lucene.index.MergePolicy; -import org.apache.lucene.index.MultiBits; -import org.apache.lucene.index.MultiDocValues; -import org.apache.lucene.index.MultiTerms; -import org.apache.lucene.index.NoMergePolicy; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.SegmentCommitInfo; -import org.apache.lucene.index.SegmentInfos; -import org.apache.lucene.index.SegmentReader; -import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.index.StandardDirectoryReader; -import org.apache.lucene.index.StoredFields; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermVectors; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.FieldDoc; -import org.apache.lucene.search.FieldExistsQuery; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.KnnFloatVectorQuery; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.store.ByteBuffersDirectory; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.NIOFSDirectory; -import org.apache.lucene.tests.analysis.MockAnalyzer; -import org.apache.lucene.tests.index.RandomIndexWriter; -import org.apache.lucene.tests.store.BaseDirectoryWrapper; -import org.apache.lucene.tests.util.LineFileDocs; -import org.apache.lucene.tests.util.LuceneTestCase; -import org.apache.lucene.tests.util.TestUtil; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.InfoStream; -import org.apache.lucene.util.Version; -import org.junit.AfterClass; -import org.junit.BeforeClass; - -/* - Verify we can read previous versions' indexes, do searches - against them, and add documents to them. -*/ -// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows -// machines occasionally -@SuppressWarnings("deprecation") -public class TestBackwardsCompatibility extends LuceneTestCase { - - // Backcompat index generation, described below, is mostly automated in: - // - // dev-tools/scripts/addBackcompatIndexes.py - // - // For usage information, see: - // - // http://wiki.apache.org/lucene-java/ReleaseTodo#Generate_Backcompat_Indexes - // - // ----- - // - // To generate backcompat indexes with the current default codec, run the following gradle - // command: - // gradlew test -Ptests.bwcdir=/path/to/store/indexes -Ptests.codec=default - // -Ptests.useSecurityManager=false --tests TestBackwardsCompatibility - // Also add testmethod with one of the index creation methods below, for example: - // -Ptestmethod=testCreateCFS - // - // Zip up the generated indexes: - // - // cd /path/to/store/indexes/index.cfs ; zip index.-cfs.zip * - // cd /path/to/store/indexes/index.nocfs ; zip index.-nocfs.zip * - // - // Then move those 2 zip files to your trunk checkout and add them - // to the oldNames array. - - private static final int DOCS_COUNT = 35; - private static final int DELETED_ID = 7; - - // change version to 9 when we update 9 backward indices to include KNN field - private static final int KNN_VECTOR_MIN_SUPPORTED_VERSION = LUCENE_9_0_0.major; - private static final String KNN_VECTOR_FIELD = "knn_field"; - private static final FieldType KNN_VECTOR_FIELD_TYPE = - KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE); - private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f}; - - public void testCreateCFS() throws IOException { - Path indexDir = getIndexDir().resolve("index.cfs"); - Files.deleteIfExists(indexDir); - try (Directory dir = newFSDirectory(indexDir)) { - createIndex(dir, true, false); - } - } - - public void testCreateNoCFS() throws IOException { - Path indexDir = getIndexDir().resolve("index.nocfs"); - Files.deleteIfExists(indexDir); - try (Directory dir = newFSDirectory(indexDir)) { - createIndex(dir, false, false); - } - } - - // These are only needed for the special upgrade test to verify - // that also single-segment indexes are correctly upgraded by IndexUpgrader. - // You don't need them to be build for non-4.0 (the test is happy with just one - // "old" segment format, version is unimportant: - - public void testCreateSingleSegmentCFS() throws IOException { - Path indexDir = getIndexDir().resolve("index.singlesegment-cfs"); - Files.deleteIfExists(indexDir); - try (Directory dir = newFSDirectory(indexDir)) { - createIndex(dir, true, true); - } - } - - public void testCreateSingleSegmentNoCFS() throws IOException { - Path indexDir = getIndexDir().resolve("index.singlesegment-nocfs"); - Files.deleteIfExists(indexDir); - try (Directory dir = newFSDirectory(indexDir)) { - createIndex(dir, false, true); - } - } - - public void testCreateIndexInternal() throws IOException { - try (Directory dir = newDirectory()) { - createIndex(dir, random().nextBoolean(), false); - searchIndex(dir, Version.LATEST.toString(), Version.MIN_SUPPORTED_MAJOR, Version.LATEST); - } - } - - private Path getIndexDir() { - String path = System.getProperty("tests.bwcdir"); - assumeTrue( - "backcompat creation tests must be run with -Dtests.bwcdir=/path/to/write/indexes", - path != null); - return Paths.get(path); - } - - public void testCreateMoreTermsIndex() throws Exception { - Path indexDir = getIndexDir().resolve("moreterms"); - Files.deleteIfExists(indexDir); - try (Directory dir = newFSDirectory(indexDir)) { - createMoreTermsIndex(dir); - } - } - - public void testCreateMoreTermsIndexInternal() throws Exception { - try (Directory dir = newDirectory()) { - createMoreTermsIndex(dir); - } - } - - private void createMoreTermsIndex(Directory dir) throws Exception { - LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); - mp.setNoCFSRatio(1.0); - mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); - MockAnalyzer analyzer = new MockAnalyzer(random()); - analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); - - IndexWriterConfig conf = - new IndexWriterConfig(analyzer) - .setMergePolicy(mp) - .setCodec(TestUtil.getDefaultCodec()) - .setUseCompoundFile(false); - IndexWriter writer = new IndexWriter(dir, conf); - LineFileDocs docs = new LineFileDocs(new Random(0)); - for (int i = 0; i < 50; i++) { - Document doc = TestUtil.cloneDocument(docs.nextDoc()); - doc.add( - new NumericDocValuesField( - "docid_intDV", doc.getField("docid_int").numericValue().longValue())); - doc.add( - new SortedDocValuesField("titleDV", new BytesRef(doc.getField("title").stringValue()))); - writer.addDocument(doc); - if (i % 10 == 0) { // commit every 10 documents - writer.commit(); - } - } - docs.close(); - writer.close(); - try (DirectoryReader reader = DirectoryReader.open(dir)) { - searchExampleIndex(reader); // make sure we can search it - } - } - - // gradlew test -Ptestmethod=testCreateSortedIndex -Ptests.codec=default - // -Ptests.useSecurityManager=false -Ptests.bwcdir=/tmp/sorted --tests TestBackwardsCompatibility - public void testCreateSortedIndex() throws Exception { - Path indexDir = getIndexDir().resolve("sorted"); - Files.deleteIfExists(indexDir); - try (Directory dir = newFSDirectory(indexDir)) { - createSortedIndex(dir); - } - } - - public void testCreateSortedIndexInternal() throws Exception { - // this runs without the -Ptests.bwcdir=/tmp/sorted to make sure we can actually index and - // search the created index - try (Directory dir = newDirectory()) { - createSortedIndex(dir); - } - } - - public void createSortedIndex(Directory dir) throws Exception { - LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); - mp.setNoCFSRatio(1.0); - mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); - MockAnalyzer analyzer = new MockAnalyzer(random()); - analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); - - // TODO: remove randomness - IndexWriterConfig conf = new IndexWriterConfig(analyzer); - conf.setMergePolicy(mp); - conf.setUseCompoundFile(false); - conf.setCodec(TestUtil.getDefaultCodec()); - conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true))); - IndexWriter writer = new IndexWriter(dir, conf); - LineFileDocs docs = new LineFileDocs(new Random(0)); - SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT); - parser.setTimeZone(TimeZone.getTimeZone("UTC")); - ParsePosition position = new ParsePosition(0); - for (int i = 0; i < 50; i++) { - Document doc = TestUtil.cloneDocument(docs.nextDoc()); - String dateString = doc.get("date"); - position.setIndex(0); - Date date = parser.parse(dateString, position); - if (position.getErrorIndex() != -1) { - throw new AssertionError("failed to parse \"" + dateString + "\" as date"); - } - if (position.getIndex() != dateString.length()) { - throw new AssertionError("failed to parse \"" + dateString + "\" as date"); - } - doc.add( - new NumericDocValuesField( - "docid_intDV", doc.getField("docid_int").numericValue().longValue())); - doc.add( - new SortedDocValuesField("titleDV", new BytesRef(doc.getField("title").stringValue()))); - doc.add(new NumericDocValuesField("dateDV", date.getTime())); - if (i % 10 == 0) { // commit every 10 documents - writer.commit(); - } - writer.addDocument(doc); - } - writer.forceMerge(1); - writer.close(); - - try (DirectoryReader reader = DirectoryReader.open(dir)) { - searchExampleIndex(reader); // make sure we can search it - } - } - - private void updateNumeric(IndexWriter writer, String id, String f, String cf, long value) - throws IOException { - writer.updateNumericDocValue(new Term("id", id), f, value); - writer.updateNumericDocValue(new Term("id", id), cf, value * 2); - } - - private void updateBinary(IndexWriter writer, String id, String f, String cf, long value) - throws IOException { - writer.updateBinaryDocValue(new Term("id", id), f, toBytes(value)); - writer.updateBinaryDocValue(new Term("id", id), cf, toBytes(value * 2)); - } - - // Creates an index with DocValues updates - public void testCreateIndexWithDocValuesUpdates() throws IOException { - Path indexDir = getIndexDir().resolve("dvupdates"); - Files.deleteIfExists(indexDir); - try (Directory dir = newFSDirectory(indexDir)) { - createIndexWithDocValuesUpdates(dir); - searchDocValuesUpdatesIndex(dir); - } - } - - public void testCreateIndexWithDocValuesUpdatesInternal() throws IOException { - try (Directory dir = newDirectory()) { - createIndexWithDocValuesUpdates(dir); - searchDocValuesUpdatesIndex(dir); - } - } - - private void createIndexWithDocValuesUpdates(Directory dir) throws IOException { - IndexWriterConfig conf = - new IndexWriterConfig(new MockAnalyzer(random())) - .setCodec(TestUtil.getDefaultCodec()) - .setUseCompoundFile(false) - .setMergePolicy(NoMergePolicy.INSTANCE); - IndexWriter writer = new IndexWriter(dir, conf); - // create an index w/ few doc-values fields, some with updates and some without - for (int i = 0; i < 30; i++) { - Document doc = new Document(); - doc.add(new StringField("id", "" + i, Field.Store.NO)); - doc.add(new NumericDocValuesField("ndv1", i)); - doc.add(new NumericDocValuesField("ndv1_c", i * 2)); - doc.add(new NumericDocValuesField("ndv2", i * 3)); - doc.add(new NumericDocValuesField("ndv2_c", i * 6)); - doc.add(new BinaryDocValuesField("bdv1", toBytes(i))); - doc.add(new BinaryDocValuesField("bdv1_c", toBytes(i * 2))); - doc.add(new BinaryDocValuesField("bdv2", toBytes(i * 3))); - doc.add(new BinaryDocValuesField("bdv2_c", toBytes(i * 6))); - writer.addDocument(doc); - if ((i + 1) % 10 == 0) { - writer.commit(); // flush every 10 docs - } - } - - // first segment: no updates - - // second segment: update two fields, same gen - updateNumeric(writer, "10", "ndv1", "ndv1_c", 100L); - updateBinary(writer, "11", "bdv1", "bdv1_c", 100L); - writer.commit(); - - // third segment: update few fields, different gens, few docs - updateNumeric(writer, "20", "ndv1", "ndv1_c", 100L); - updateBinary(writer, "21", "bdv1", "bdv1_c", 100L); - writer.commit(); - updateNumeric(writer, "22", "ndv1", "ndv1_c", 200L); // update the field again - writer.close(); - } - - public void testCreateEmptyIndex() throws Exception { - Path indexDir = getIndexDir().resolve("emptyIndex"); - Files.deleteIfExists(indexDir); - IndexWriterConfig conf = - new IndexWriterConfig(new MockAnalyzer(random())) - .setUseCompoundFile(false) - .setMergePolicy(NoMergePolicy.INSTANCE); - try (Directory dir = newFSDirectory(indexDir); - IndexWriter writer = new IndexWriter(dir, conf)) { - writer.flush(); - } - } - - static final String[] oldNames = { - "8.0.0-cfs", - "8.0.0-nocfs", - "8.1.0-cfs", - "8.1.0-nocfs", - "8.1.1-cfs", - "8.1.1-nocfs", - "8.2.0-cfs", - "8.2.0-nocfs", - "8.3.0-cfs", - "8.3.0-nocfs", - "8.3.1-cfs", - "8.3.1-nocfs", - "8.4.0-cfs", - "8.4.0-nocfs", - "8.4.1-cfs", - "8.4.1-nocfs", - "8.5.0-cfs", - "8.5.0-nocfs", - "8.5.1-cfs", - "8.5.1-nocfs", - "8.5.2-cfs", - "8.5.2-nocfs", - "8.6.0-cfs", - "8.6.0-nocfs", - "8.6.1-cfs", - "8.6.1-nocfs", - "8.6.2-cfs", - "8.6.2-nocfs", - "8.6.3-cfs", - "8.6.3-nocfs", - "8.7.0-cfs", - "8.7.0-nocfs", - "8.8.0-cfs", - "8.8.0-nocfs", - "8.8.1-cfs", - "8.8.1-nocfs", - "8.8.2-cfs", - "8.8.2-nocfs", - "8.9.0-cfs", - "8.9.0-nocfs", - "8.10.0-cfs", - "8.10.0-nocfs", - "8.10.1-cfs", - "8.10.1-nocfs", - "8.11.0-cfs", - "8.11.0-nocfs", - "8.11.1-cfs", - "8.11.1-nocfs", - "8.11.2-cfs", - "8.11.2-nocfs", - "9.0.0-cfs", - "9.0.0-nocfs", - "9.1.0-cfs", - "9.1.0-nocfs", - "9.2.0-cfs", - "9.2.0-nocfs", - "9.3.0-cfs", - "9.3.0-nocfs", - "9.4.0-cfs", - "9.4.0-nocfs", - "9.4.1-cfs", - "9.4.1-nocfs", - "9.4.2-cfs", - "9.4.2-nocfs", - "9.5.0-cfs", - "9.5.0-nocfs", - "9.6.0-cfs", - "9.6.0-nocfs", - "9.7.0-cfs", - "9.7.0-nocfs", - "9.8.0-cfs", - "9.8.0-nocfs", - "9.9.0-cfs", - "9.9.0-nocfs", - "9.9.1-cfs", - "9.9.1-nocfs", - "9.9.2-cfs", - "9.9.2-nocfs" - }; - - public static String[] getOldNames() { - return oldNames; - } - - static final String[] oldSortedNames = { - "sorted.8.0.0", - "sorted.8.1.0", - "sorted.8.1.1", - "sorted.8.10.0", - "sorted.8.2.0", - "sorted.8.3.0", - "sorted.8.3.1", - "sorted.8.4.0", - "sorted.8.4.1", - "sorted.8.5.0", - "sorted.8.5.1", - "sorted.8.5.2", - "sorted.8.6.0", - "sorted.8.6.1", - "sorted.8.6.2", - "sorted.8.6.3", - "sorted.8.7.0", - "sorted.8.8.0", - "sorted.8.8.1", - "sorted.8.8.2", - "sorted.8.9.0", - "sorted.8.10.1", - "sorted.8.11.0", - "sorted.8.11.1", - "sorted.8.11.2", - "sorted.9.0.0", - "sorted.9.1.0", - "sorted.9.2.0", - "sorted.9.3.0", - "sorted.9.4.0", - "sorted.9.4.1", - "sorted.9.4.2", - "sorted.9.5.0", - "sorted.9.6.0", - "sorted.9.7.0", - "sorted.9.8.0", - "sorted.9.9.0", - "sorted.9.9.1", - "sorted.9.9.2" - }; - - public static String[] getOldSortedNames() { - return oldSortedNames; - } - - static final String[] unsupportedNames = { - "1.9.0-cfs", - "1.9.0-nocfs", - "2.0.0-cfs", - "2.0.0-nocfs", - "2.1.0-cfs", - "2.1.0-nocfs", - "2.2.0-cfs", - "2.2.0-nocfs", - "2.3.0-cfs", - "2.3.0-nocfs", - "2.4.0-cfs", - "2.4.0-nocfs", - "2.4.1-cfs", - "2.4.1-nocfs", - "2.9.0-cfs", - "2.9.0-nocfs", - "2.9.1-cfs", - "2.9.1-nocfs", - "2.9.2-cfs", - "2.9.2-nocfs", - "2.9.3-cfs", - "2.9.3-nocfs", - "2.9.4-cfs", - "2.9.4-nocfs", - "3.0.0-cfs", - "3.0.0-nocfs", - "3.0.1-cfs", - "3.0.1-nocfs", - "3.0.2-cfs", - "3.0.2-nocfs", - "3.0.3-cfs", - "3.0.3-nocfs", - "3.1.0-cfs", - "3.1.0-nocfs", - "3.2.0-cfs", - "3.2.0-nocfs", - "3.3.0-cfs", - "3.3.0-nocfs", - "3.4.0-cfs", - "3.4.0-nocfs", - "3.5.0-cfs", - "3.5.0-nocfs", - "3.6.0-cfs", - "3.6.0-nocfs", - "3.6.1-cfs", - "3.6.1-nocfs", - "3.6.2-cfs", - "3.6.2-nocfs", - "4.0.0-cfs", - "4.0.0-cfs", - "4.0.0-nocfs", - "4.0.0.1-cfs", - "4.0.0.1-nocfs", - "4.0.0.2-cfs", - "4.0.0.2-nocfs", - "4.1.0-cfs", - "4.1.0-nocfs", - "4.2.0-cfs", - "4.2.0-nocfs", - "4.2.1-cfs", - "4.2.1-nocfs", - "4.3.0-cfs", - "4.3.0-nocfs", - "4.3.1-cfs", - "4.3.1-nocfs", - "4.4.0-cfs", - "4.4.0-nocfs", - "4.5.0-cfs", - "4.5.0-nocfs", - "4.5.1-cfs", - "4.5.1-nocfs", - "4.6.0-cfs", - "4.6.0-nocfs", - "4.6.1-cfs", - "4.6.1-nocfs", - "4.7.0-cfs", - "4.7.0-nocfs", - "4.7.1-cfs", - "4.7.1-nocfs", - "4.7.2-cfs", - "4.7.2-nocfs", - "4.8.0-cfs", - "4.8.0-nocfs", - "4.8.1-cfs", - "4.8.1-nocfs", - "4.9.0-cfs", - "4.9.0-nocfs", - "4.9.1-cfs", - "4.9.1-nocfs", - "4.10.0-cfs", - "4.10.0-nocfs", - "4.10.1-cfs", - "4.10.1-nocfs", - "4.10.2-cfs", - "4.10.2-nocfs", - "4.10.3-cfs", - "4.10.3-nocfs", - "4.10.4-cfs", - "4.10.4-nocfs", - "5x-with-4x-segments-cfs", - "5x-with-4x-segments-nocfs", - "5.0.0.singlesegment-cfs", - "5.0.0.singlesegment-nocfs", - "5.0.0-cfs", - "5.0.0-nocfs", - "5.1.0-cfs", - "5.1.0-nocfs", - "5.2.0-cfs", - "5.2.0-nocfs", - "5.2.1-cfs", - "5.2.1-nocfs", - "5.3.0-cfs", - "5.3.0-nocfs", - "5.3.1-cfs", - "5.3.1-nocfs", - "5.3.2-cfs", - "5.3.2-nocfs", - "5.4.0-cfs", - "5.4.0-nocfs", - "5.4.1-cfs", - "5.4.1-nocfs", - "5.5.0-cfs", - "5.5.0-nocfs", - "5.5.1-cfs", - "5.5.1-nocfs", - "5.5.2-cfs", - "5.5.2-nocfs", - "5.5.3-cfs", - "5.5.3-nocfs", - "5.5.4-cfs", - "5.5.4-nocfs", - "5.5.5-cfs", - "5.5.5-nocfs", - "6.0.0-cfs", - "6.0.0-nocfs", - "6.0.1-cfs", - "6.0.1-nocfs", - "6.1.0-cfs", - "6.1.0-nocfs", - "6.2.0-cfs", - "6.2.0-nocfs", - "6.2.1-cfs", - "6.2.1-nocfs", - "6.3.0-cfs", - "6.3.0-nocfs", - "6.4.0-cfs", - "6.4.0-nocfs", - "6.4.1-cfs", - "6.4.1-nocfs", - "6.4.2-cfs", - "6.4.2-nocfs", - "6.5.0-cfs", - "6.5.0-nocfs", - "6.5.1-cfs", - "6.5.1-nocfs", - "6.6.0-cfs", - "6.6.0-nocfs", - "6.6.1-cfs", - "6.6.1-nocfs", - "6.6.2-cfs", - "6.6.2-nocfs", - "6.6.3-cfs", - "6.6.3-nocfs", - "6.6.4-cfs", - "6.6.4-nocfs", - "6.6.5-cfs", - "6.6.5-nocfs", - "6.6.6-cfs", - "6.6.6-nocfs", - "7.0.0-cfs", - "7.0.0-nocfs", - "7.0.1-cfs", - "7.0.1-nocfs", - "7.1.0-cfs", - "7.1.0-nocfs", - "7.2.0-cfs", - "7.2.0-nocfs", - "7.2.1-cfs", - "7.2.1-nocfs", - "7.3.0-cfs", - "7.3.0-nocfs", - "7.3.1-cfs", - "7.3.1-nocfs", - "7.4.0-cfs", - "7.4.0-nocfs", - "7.5.0-cfs", - "7.5.0-nocfs", - "7.6.0-cfs", - "7.6.0-nocfs", - "7.7.0-cfs", - "7.7.0-nocfs", - "7.7.1-cfs", - "7.7.1-nocfs", - "7.7.2-cfs", - "7.7.2-nocfs", - "7.7.3-cfs", - "7.7.3-nocfs" - }; - - static final int MIN_BINARY_SUPPORTED_MAJOR = Version.MIN_SUPPORTED_MAJOR - 1; - - static final String[] binarySupportedNames; - - static { - ArrayList list = new ArrayList<>(); - for (String name : unsupportedNames) { - if (name.startsWith(MIN_BINARY_SUPPORTED_MAJOR + ".")) { - list.add(name); - } - } - binarySupportedNames = list.toArray(new String[0]); - } - - // TODO: on 6.0.0 release, gen the single segment indices and add here: - static final String[] oldSingleSegmentNames = {}; - - public static String[] getOldSingleSegmentNames() { - return oldSingleSegmentNames; - } - - static Map oldIndexDirs; - - /** Randomizes the use of some of hte constructor variations */ - private static IndexUpgrader newIndexUpgrader(Directory dir) { - final boolean streamType = random().nextBoolean(); - final int choice = TestUtil.nextInt(random(), 0, 2); - switch (choice) { - case 0: - return new IndexUpgrader(dir); - case 1: - return new IndexUpgrader(dir, streamType ? null : InfoStream.NO_OUTPUT, false); - case 2: - return new IndexUpgrader(dir, newIndexWriterConfig(null), false); - default: - fail("case statement didn't get updated when random bounds changed"); - } - return null; // never get here - } - - @BeforeClass - public static void beforeClass() throws Exception { - List names = new ArrayList<>(oldNames.length + oldSingleSegmentNames.length); - names.addAll(Arrays.asList(oldNames)); - names.addAll(Arrays.asList(oldSingleSegmentNames)); - oldIndexDirs = new HashMap<>(); - for (String name : names) { - Path dir = createTempDir(name); - InputStream resource = - TestBackwardsCompatibility.class.getResourceAsStream("index." + name + ".zip"); - assertNotNull("Index name " + name + " not found", resource); - TestUtil.unzip(resource, dir); - oldIndexDirs.put(name, newFSDirectory(dir)); - } - } - - @AfterClass - public static void afterClass() throws Exception { - for (Directory d : oldIndexDirs.values()) { - d.close(); - } - oldIndexDirs = null; - } - - public void testAllVersionHaveCfsAndNocfs() { - // ensure all tested versions with cfs also have nocfs - String[] files = new String[oldNames.length]; - System.arraycopy(oldNames, 0, files, 0, oldNames.length); - Arrays.sort(files); - String prevFile = ""; - for (String file : files) { - if (prevFile.endsWith("-cfs")) { - String prefix = prevFile.replace("-cfs", ""); - assertEquals("Missing -nocfs for backcompat index " + prefix, prefix + "-nocfs", file); - } - } - } - - public void testAllVersionsTested() throws Exception { - Pattern constantPattern = Pattern.compile("LUCENE_(\\d+)_(\\d+)_(\\d+)(_ALPHA|_BETA)?"); - // find the unique versions according to Version.java - List expectedVersions = new ArrayList<>(); - for (java.lang.reflect.Field field : Version.class.getDeclaredFields()) { - if (Modifier.isStatic(field.getModifiers()) && field.getType() == Version.class) { - Version v = (Version) field.get(Version.class); - if (v.equals(Version.LATEST)) { - continue; - } - - Matcher constant = constantPattern.matcher(field.getName()); - if (constant.matches() == false) { - continue; - } - - expectedVersions.add(v.toString() + "-cfs"); - } - } - - // BEGIN TRUNK ONLY BLOCK - // on trunk, the last release of the prev major release is also untested - Version lastPrevMajorVersion = null; - for (java.lang.reflect.Field field : Version.class.getDeclaredFields()) { - if (Modifier.isStatic(field.getModifiers()) && field.getType() == Version.class) { - Version v = (Version) field.get(Version.class); - Matcher constant = constantPattern.matcher(field.getName()); - if (constant.matches() == false) continue; - if (v.major == Version.LATEST.major - 1 - && (lastPrevMajorVersion == null || v.onOrAfter(lastPrevMajorVersion))) { - lastPrevMajorVersion = v; - } - } - } - assertNotNull(lastPrevMajorVersion); - expectedVersions.remove(lastPrevMajorVersion.toString() + "-cfs"); - // END TRUNK ONLY BLOCK - - Collections.sort(expectedVersions); - - // find what versions we are testing - List testedVersions = new ArrayList<>(); - for (String testedVersion : oldNames) { - if (testedVersion.endsWith("-cfs") == false) { - continue; - } - testedVersions.add(testedVersion); - } - Collections.sort(testedVersions); - - int i = 0; - int j = 0; - List missingFiles = new ArrayList<>(); - List extraFiles = new ArrayList<>(); - while (i < expectedVersions.size() && j < testedVersions.size()) { - String expectedVersion = expectedVersions.get(i); - String testedVersion = testedVersions.get(j); - int compare = expectedVersion.compareTo(testedVersion); - if (compare == 0) { // equal, we can move on - ++i; - ++j; - } else if (compare < 0) { // didn't find test for version constant - missingFiles.add(expectedVersion); - ++i; - } else { // extra test file - extraFiles.add(testedVersion); - ++j; - } - } - while (i < expectedVersions.size()) { - missingFiles.add(expectedVersions.get(i)); - ++i; - } - while (j < testedVersions.size()) { - missingFiles.add(testedVersions.get(j)); - ++j; - } - - // we could be missing up to 1 file, which may be due to a release that is in progress - if (missingFiles.size() <= 1 && extraFiles.isEmpty()) { - // success - return; - } - - StringBuffer msg = new StringBuffer(); - if (missingFiles.size() > 1) { - msg.append("Missing backcompat test files:\n"); - for (String missingFile : missingFiles) { - msg.append(" " + missingFile + "\n"); - } - } - if (extraFiles.isEmpty() == false) { - msg.append("Extra backcompat test files:\n"); - for (String extraFile : extraFiles) { - msg.append(" " + extraFile + "\n"); - } - } - fail(msg.toString()); - } - - /** - * This test checks that *only* IndexFormatTooOldExceptions are thrown when you open and operate - * on too old indexes! - */ - public void testUnsupportedOldIndexes() throws Exception { - for (int i = 0; i < unsupportedNames.length; i++) { - if (VERBOSE) { - System.out.println("TEST: index " + unsupportedNames[i]); - } - Path oldIndexDir = createTempDir(unsupportedNames[i]); - TestUtil.unzip( - getDataInputStream("unsupported." + unsupportedNames[i] + ".zip"), oldIndexDir); - BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir); - // don't checkindex, these are intentionally not supported - dir.setCheckIndexOnClose(false); - - IndexReader reader = null; - IndexWriter writer = null; - try { - reader = DirectoryReader.open(dir); - fail("DirectoryReader.open should not pass for " + unsupportedNames[i]); - } catch (IndexFormatTooOldException e) { - if (e.getReason() != null) { - assertNull(e.getVersion()); - assertNull(e.getMinVersion()); - assertNull(e.getMaxVersion()); - assertEquals( - e.getMessage(), - new IndexFormatTooOldException(e.getResourceDescription(), e.getReason()) - .getMessage()); - } else { - assertNotNull(e.getVersion()); - assertNotNull(e.getMinVersion()); - assertNotNull(e.getMaxVersion()); - assertTrue(e.getMessage(), e.getMaxVersion() >= e.getMinVersion()); - assertTrue( - e.getMessage(), - e.getMaxVersion() < e.getVersion() || e.getVersion() < e.getMinVersion()); - assertEquals( - e.getMessage(), - new IndexFormatTooOldException( - e.getResourceDescription(), - e.getVersion(), - e.getMinVersion(), - e.getMaxVersion()) - .getMessage()); - } - // pass - if (VERBOSE) { - System.out.println("TEST: got expected exc:"); - e.printStackTrace(System.out); - } - } finally { - if (reader != null) reader.close(); - reader = null; - } - - try { - writer = - new IndexWriter( - dir, newIndexWriterConfig(new MockAnalyzer(random())).setCommitOnClose(false)); - fail("IndexWriter creation should not pass for " + unsupportedNames[i]); - } catch (IndexFormatTooOldException e) { - if (e.getReason() != null) { - assertNull(e.getVersion()); - assertNull(e.getMinVersion()); - assertNull(e.getMaxVersion()); - assertEquals( - e.getMessage(), - new IndexFormatTooOldException(e.getResourceDescription(), e.getReason()) - .getMessage()); - } else { - assertNotNull(e.getVersion()); - assertNotNull(e.getMinVersion()); - assertNotNull(e.getMaxVersion()); - assertTrue(e.getMessage(), e.getMaxVersion() >= e.getMinVersion()); - assertTrue( - e.getMessage(), - e.getMaxVersion() < e.getVersion() || e.getVersion() < e.getMinVersion()); - assertEquals( - e.getMessage(), - new IndexFormatTooOldException( - e.getResourceDescription(), - e.getVersion(), - e.getMinVersion(), - e.getMaxVersion()) - .getMessage()); - } - // pass - if (VERBOSE) { - System.out.println("TEST: got expected exc:"); - e.printStackTrace(System.out); - } - // Make sure exc message includes a path= - assertTrue("got exc message: " + e.getMessage(), e.getMessage().indexOf("path=\"") != -1); - } finally { - // we should fail to open IW, and so it should be null when we get here. - // However, if the test fails (i.e., IW did not fail on open), we need - // to close IW. However, if merges are run, IW may throw - // IndexFormatTooOldException, and we don't want to mask the fail() - // above, so close without waiting for merges. - if (writer != null) { - try { - writer.commit(); - } finally { - writer.close(); - } - } - } - - ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); - CheckIndex checker = new CheckIndex(dir); - checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8)); - CheckIndex.Status indexStatus = checker.checkIndex(); - if (unsupportedNames[i].startsWith("7.")) { - assertTrue(indexStatus.clean); - } else { - assertFalse(indexStatus.clean); - assertTrue( - bos.toString(IOUtils.UTF_8).contains(IndexFormatTooOldException.class.getName())); - } - checker.close(); - - dir.close(); - } - } - - public void testFullyMergeOldIndex() throws Exception { - for (String name : oldNames) { - if (VERBOSE) { - System.out.println("\nTEST: index=" + name); - } - Directory dir = newDirectory(oldIndexDirs.get(name)); - - final SegmentInfos oldSegInfos = SegmentInfos.readLatestCommit(dir); - - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random()))); - w.forceMerge(1); - w.close(); - - final SegmentInfos segInfos = SegmentInfos.readLatestCommit(dir); - assertEquals( - oldSegInfos.getIndexCreatedVersionMajor(), segInfos.getIndexCreatedVersionMajor()); - assertEquals(Version.LATEST, segInfos.asList().get(0).info.getVersion()); - assertEquals( - oldSegInfos.asList().get(0).info.getMinVersion(), - segInfos.asList().get(0).info.getMinVersion()); - - dir.close(); - } - } - - public void testAddOldIndexes() throws IOException { - for (String name : oldNames) { - if (VERBOSE) { - System.out.println("\nTEST: old index " + name); - } - Directory oldDir = oldIndexDirs.get(name); - SegmentInfos infos = SegmentInfos.readLatestCommit(oldDir); - - Directory targetDir = newDirectory(); - if (infos.getCommitLuceneVersion().major != Version.LATEST.major) { - // both indexes are not compatible - Directory targetDir2 = newDirectory(); - IndexWriter w = - new IndexWriter(targetDir2, newIndexWriterConfig(new MockAnalyzer(random()))); - IllegalArgumentException e = - expectThrows(IllegalArgumentException.class, () -> w.addIndexes(oldDir)); - assertTrue( - e.getMessage(), - e.getMessage() - .startsWith( - "Cannot use addIndexes(Directory) with indexes that have been created by a different Lucene version.")); - w.close(); - targetDir2.close(); - - // for the next test, we simulate writing to an index that was created on the same major - // version - new SegmentInfos(infos.getIndexCreatedVersionMajor()).commit(targetDir); - } - - IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(new MockAnalyzer(random()))); - w.addIndexes(oldDir); - w.close(); - - SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); - assertNull( - "none of the segments should have been upgraded", - si.asList().stream() - .filter( // depending on the MergePolicy we might see these segments merged away - sci -> - sci.getId() != null - && sci.info.getVersion().onOrAfter(Version.LUCENE_8_6_0) == false) - .findAny() - .orElse(null)); - if (VERBOSE) { - System.out.println("\nTEST: done adding indices; now close"); - } - - targetDir.close(); - } - } - - public void testAddOldIndexesReader() throws IOException { - for (String name : oldNames) { - Directory oldDir = oldIndexDirs.get(name); - SegmentInfos infos = SegmentInfos.readLatestCommit(oldDir); - DirectoryReader reader = DirectoryReader.open(oldDir); - - Directory targetDir = newDirectory(); - if (infos.getCommitLuceneVersion().major != Version.LATEST.major) { - Directory targetDir2 = newDirectory(); - IndexWriter w = - new IndexWriter(targetDir2, newIndexWriterConfig(new MockAnalyzer(random()))); - IllegalArgumentException e = - expectThrows( - IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w, reader)); - assertEquals( - e.getMessage(), - "Cannot merge a segment that has been created with major version 8 into this index which has been created by major version 9"); - w.close(); - targetDir2.close(); - - // for the next test, we simulate writing to an index that was created on the same major - // version - new SegmentInfos(infos.getIndexCreatedVersionMajor()).commit(targetDir); - } - IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(new MockAnalyzer(random()))); - TestUtil.addIndexesSlowly(w, reader); - w.close(); - reader.close(); - SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); - assertNull( - "all SCIs should have an id now", - si.asList().stream().filter(sci -> sci.getId() == null).findAny().orElse(null)); - targetDir.close(); - } - } - - public void testSearchOldIndex() throws Exception { - for (String name : oldNames) { - Version version = Version.parse(name.substring(0, name.indexOf('-'))); - searchIndex(oldIndexDirs.get(name), name, Version.MIN_SUPPORTED_MAJOR, version); - } - - if (TEST_NIGHTLY) { - for (String name : binarySupportedNames) { - Path oldIndexDir = createTempDir(name); - TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir); - try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) { - Version version = Version.parse(name.substring(0, name.indexOf('-'))); - searchIndex(dir, name, MIN_BINARY_SUPPORTED_MAJOR, version); - } - } - } - } - - public void testIndexOldIndexNoAdds() throws Exception { - for (String name : oldNames) { - Directory dir = newDirectory(oldIndexDirs.get(name)); - Version version = Version.parse(name.substring(0, name.indexOf('-'))); - changeIndexNoAdds(random(), dir, version); - dir.close(); - } - } - - public void testIndexOldIndex() throws Exception { - for (String name : oldNames) { - if (VERBOSE) { - System.out.println("TEST: oldName=" + name); - } - Directory dir = newDirectory(oldIndexDirs.get(name)); - Version v = Version.parse(name.substring(0, name.indexOf('-'))); - changeIndexWithAdds(random(), dir, v); - dir.close(); - } - } - - private void doTestHits(ScoreDoc[] hits, int expectedCount, IndexReader reader) - throws IOException { - final int hitCount = hits.length; - assertEquals("wrong number of hits", expectedCount, hitCount); - StoredFields storedFields = reader.storedFields(); - TermVectors termVectors = reader.termVectors(); - for (int i = 0; i < hitCount; i++) { - storedFields.document(hits[i].doc); - termVectors.get(hits[i].doc); - } - } - - public void searchIndex( - Directory dir, String oldName, int minIndexMajorVersion, Version nameVersion) - throws IOException { - IndexCommit indexCommit = DirectoryReader.listCommits(dir).get(0); - IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion, null); - IndexSearcher searcher = newSearcher(reader); - - TestUtil.checkIndex(dir); - - final Bits liveDocs = MultiBits.getLiveDocs(reader); - assertNotNull(liveDocs); - - StoredFields storedFields = reader.storedFields(); - TermVectors termVectors = reader.termVectors(); - - for (int i = 0; i < DOCS_COUNT; i++) { - if (liveDocs.get(i)) { - Document d = storedFields.document(i); - List fields = d.getFields(); - boolean isProxDoc = d.getField("content3") == null; - if (isProxDoc) { - assertEquals(7, fields.size()); - IndexableField f = d.getField("id"); - assertEquals("" + i, f.stringValue()); - - f = d.getField("utf8"); - assertEquals( - "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue()); - - f = d.getField("autf8"); - assertEquals( - "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue()); - - f = d.getField("content2"); - assertEquals("here is more content with aaa aaa aaa", f.stringValue()); - - f = d.getField("fie\u2C77ld"); - assertEquals("field with non-ascii name", f.stringValue()); - } - - Fields tfvFields = termVectors.get(i); - assertNotNull("i=" + i, tfvFields); - Terms tfv = tfvFields.terms("utf8"); - assertNotNull("docID=" + i + " index=" + oldName, tfv); - } else { - assertEquals(DELETED_ID, i); - } - } - - // check docvalues fields - NumericDocValues dvByte = MultiDocValues.getNumericValues(reader, "dvByte"); - BinaryDocValues dvBytesDerefFixed = MultiDocValues.getBinaryValues(reader, "dvBytesDerefFixed"); - BinaryDocValues dvBytesDerefVar = MultiDocValues.getBinaryValues(reader, "dvBytesDerefVar"); - SortedDocValues dvBytesSortedFixed = - MultiDocValues.getSortedValues(reader, "dvBytesSortedFixed"); - SortedDocValues dvBytesSortedVar = MultiDocValues.getSortedValues(reader, "dvBytesSortedVar"); - BinaryDocValues dvBytesStraightFixed = - MultiDocValues.getBinaryValues(reader, "dvBytesStraightFixed"); - BinaryDocValues dvBytesStraightVar = - MultiDocValues.getBinaryValues(reader, "dvBytesStraightVar"); - NumericDocValues dvDouble = MultiDocValues.getNumericValues(reader, "dvDouble"); - NumericDocValues dvFloat = MultiDocValues.getNumericValues(reader, "dvFloat"); - NumericDocValues dvInt = MultiDocValues.getNumericValues(reader, "dvInt"); - NumericDocValues dvLong = MultiDocValues.getNumericValues(reader, "dvLong"); - NumericDocValues dvPacked = MultiDocValues.getNumericValues(reader, "dvPacked"); - NumericDocValues dvShort = MultiDocValues.getNumericValues(reader, "dvShort"); - - SortedSetDocValues dvSortedSet = MultiDocValues.getSortedSetValues(reader, "dvSortedSet"); - SortedNumericDocValues dvSortedNumeric = - MultiDocValues.getSortedNumericValues(reader, "dvSortedNumeric"); - - for (int i = 0; i < DOCS_COUNT; i++) { - int id = Integer.parseInt(storedFields.document(i).get("id")); - assertEquals(i, dvByte.nextDoc()); - assertEquals(id, dvByte.longValue()); - - byte[] bytes = - new byte[] {(byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id}; - BytesRef expectedRef = new BytesRef(bytes); - - assertEquals(i, dvBytesDerefFixed.nextDoc()); - BytesRef term = dvBytesDerefFixed.binaryValue(); - assertEquals(expectedRef, term); - assertEquals(i, dvBytesDerefVar.nextDoc()); - term = dvBytesDerefVar.binaryValue(); - assertEquals(expectedRef, term); - assertEquals(i, dvBytesSortedFixed.nextDoc()); - term = dvBytesSortedFixed.lookupOrd(dvBytesSortedFixed.ordValue()); - assertEquals(expectedRef, term); - assertEquals(i, dvBytesSortedVar.nextDoc()); - term = dvBytesSortedVar.lookupOrd(dvBytesSortedVar.ordValue()); - assertEquals(expectedRef, term); - assertEquals(i, dvBytesStraightFixed.nextDoc()); - term = dvBytesStraightFixed.binaryValue(); - assertEquals(expectedRef, term); - assertEquals(i, dvBytesStraightVar.nextDoc()); - term = dvBytesStraightVar.binaryValue(); - assertEquals(expectedRef, term); - - assertEquals(i, dvDouble.nextDoc()); - assertEquals((double) id, Double.longBitsToDouble(dvDouble.longValue()), 0D); - assertEquals(i, dvFloat.nextDoc()); - assertEquals((float) id, Float.intBitsToFloat((int) dvFloat.longValue()), 0F); - assertEquals(i, dvInt.nextDoc()); - assertEquals(id, dvInt.longValue()); - assertEquals(i, dvLong.nextDoc()); - assertEquals(id, dvLong.longValue()); - assertEquals(i, dvPacked.nextDoc()); - assertEquals(id, dvPacked.longValue()); - assertEquals(i, dvShort.nextDoc()); - assertEquals(id, dvShort.longValue()); - - assertEquals(i, dvSortedSet.nextDoc()); - assertEquals(1, dvSortedSet.docValueCount()); - long ord = dvSortedSet.nextOrd(); - assertEquals(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.nextOrd()); - term = dvSortedSet.lookupOrd(ord); - assertEquals(expectedRef, term); - - assertEquals(i, dvSortedNumeric.nextDoc()); - assertEquals(1, dvSortedNumeric.docValueCount()); - assertEquals(id, dvSortedNumeric.nextValue()); - } - - ScoreDoc[] hits = - searcher.search(new TermQuery(new Term(new String("content"), "aaa")), 1000).scoreDocs; - - // First document should be #0 - Document d = storedFields.document(hits[0].doc); - assertEquals("didn't get the right document first", "0", d.get("id")); - - doTestHits(hits, 34, searcher.getIndexReader()); - - hits = searcher.search(new TermQuery(new Term(new String("content5"), "aaa")), 1000).scoreDocs; - - doTestHits(hits, 34, searcher.getIndexReader()); - - hits = searcher.search(new TermQuery(new Term(new String("content6"), "aaa")), 1000).scoreDocs; - - doTestHits(hits, 34, searcher.getIndexReader()); - - hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), 1000).scoreDocs; - assertEquals(34, hits.length); - hits = - searcher.search( - new TermQuery(new Term(new String("utf8"), "lu\uD834\uDD1Ece\uD834\uDD60ne")), 1000) - .scoreDocs; - assertEquals(34, hits.length); - hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), 1000).scoreDocs; - assertEquals(34, hits.length); - - doTestHits( - searcher.search(IntPoint.newRangeQuery("intPoint1d", 0, 34), 1000).scoreDocs, - 34, - searcher.getIndexReader()); - doTestHits( - searcher.search( - IntPoint.newRangeQuery("intPoint2d", new int[] {0, 0}, new int[] {34, 68}), 1000) - .scoreDocs, - 34, - searcher.getIndexReader()); - doTestHits( - searcher.search(FloatPoint.newRangeQuery("floatPoint1d", 0f, 34f), 1000).scoreDocs, - 34, - searcher.getIndexReader()); - doTestHits( - searcher.search( - FloatPoint.newRangeQuery( - "floatPoint2d", new float[] {0f, 0f}, new float[] {34f, 68f}), - 1000) - .scoreDocs, - 34, - searcher.getIndexReader()); - doTestHits( - searcher.search(LongPoint.newRangeQuery("longPoint1d", 0, 34), 1000).scoreDocs, - 34, - searcher.getIndexReader()); - doTestHits( - searcher.search( - LongPoint.newRangeQuery("longPoint2d", new long[] {0, 0}, new long[] {34, 68}), - 1000) - .scoreDocs, - 34, - searcher.getIndexReader()); - doTestHits( - searcher.search(DoublePoint.newRangeQuery("doublePoint1d", 0.0, 34.0), 1000).scoreDocs, - 34, - searcher.getIndexReader()); - doTestHits( - searcher.search( - DoublePoint.newRangeQuery( - "doublePoint2d", new double[] {0.0, 0.0}, new double[] {34.0, 68.0}), - 1000) - .scoreDocs, - 34, - searcher.getIndexReader()); - - byte[] bytes1 = new byte[4]; - byte[] bytes2 = new byte[] {0, 0, 0, (byte) 34}; - doTestHits( - searcher.search(BinaryPoint.newRangeQuery("binaryPoint1d", bytes1, bytes2), 1000).scoreDocs, - 34, - searcher.getIndexReader()); - byte[] bytes3 = new byte[] {0, 0, 0, (byte) 68}; - doTestHits( - searcher.search( - BinaryPoint.newRangeQuery( - "binaryPoint2d", new byte[][] {bytes1, bytes1}, new byte[][] {bytes2, bytes3}), - 1000) - .scoreDocs, - 34, - searcher.getIndexReader()); - - // test vector values and KNN search - if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) { - // test vector values - int cnt = 0; - for (LeafReaderContext ctx : reader.leaves()) { - FloatVectorValues values = ctx.reader().getFloatVectorValues(KNN_VECTOR_FIELD); - if (values != null) { - assertEquals(KNN_VECTOR_FIELD_TYPE.vectorDimension(), values.dimension()); - for (int doc = values.nextDoc(); doc != NO_MORE_DOCS; doc = values.nextDoc()) { - float[] expectedVector = {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * cnt}; - assertArrayEquals( - "vectors do not match for doc=" + cnt, expectedVector, values.vectorValue(), 0); - cnt++; - } - } - } - assertEquals(DOCS_COUNT, cnt); - - // test KNN search - ScoreDoc[] scoreDocs = assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0"); - for (int i = 0; i < scoreDocs.length; i++) { - int id = Integer.parseInt(storedFields.document(scoreDocs[i].doc).get("id")); - int expectedId = i < DELETED_ID ? i : i + 1; - assertEquals(expectedId, id); - } - } - - reader.close(); - } - - private static ScoreDoc[] assertKNNSearch( - IndexSearcher searcher, - float[] queryVector, - int k, - int expectedHitsCount, - String expectedFirstDocId) - throws IOException { - ScoreDoc[] hits = - searcher.search(new KnnFloatVectorQuery(KNN_VECTOR_FIELD, queryVector, k), k).scoreDocs; - assertEquals("wrong number of hits", expectedHitsCount, hits.length); - Document d = searcher.storedFields().document(hits[0].doc); - assertEquals("wrong first document", expectedFirstDocId, d.get("id")); - return hits; - } - - public void changeIndexWithAdds(Random random, Directory dir, Version nameVersion) - throws IOException { - SegmentInfos infos = SegmentInfos.readLatestCommit(dir); - assertEquals(nameVersion, infos.getCommitLuceneVersion()); - assertEquals(nameVersion, infos.getMinSegmentLuceneVersion()); - - // open writer - IndexWriter writer = - new IndexWriter( - dir, - newIndexWriterConfig(new MockAnalyzer(random)) - .setOpenMode(OpenMode.APPEND) - .setMergePolicy(newLogMergePolicy())); - // add 10 docs - for (int i = 0; i < 10; i++) { - addDoc(writer, DOCS_COUNT + i); - } - - // make sure writer sees right total -- writer seems not to know about deletes in .del? - final int expected = 45; - assertEquals("wrong doc count", expected, writer.getDocStats().numDocs); - writer.close(); - - // make sure searching sees right # hits for term search - IndexReader reader = DirectoryReader.open(dir); - IndexSearcher searcher = newSearcher(reader); - ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs; - Document d = searcher.getIndexReader().storedFields().document(hits[0].doc); - assertEquals("wrong first document", "0", d.get("id")); - doTestHits(hits, 44, searcher.getIndexReader()); - - if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) { - // make sure KNN search sees all hits (graph may not be used if k is big) - assertKNNSearch(searcher, KNN_VECTOR, 1000, 44, "0"); - // make sure KNN search using HNSW graph sees newly added docs - assertKNNSearch( - searcher, - new float[] {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * 44}, - 10, - 10, - "44"); - } - reader.close(); - - // fully merge - writer = - new IndexWriter( - dir, - newIndexWriterConfig(new MockAnalyzer(random)) - .setOpenMode(OpenMode.APPEND) - .setMergePolicy(newLogMergePolicy())); - writer.forceMerge(1); - writer.close(); - - reader = DirectoryReader.open(dir); - searcher = newSearcher(reader); - // make sure searching sees right # hits fot term search - hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs; - assertEquals("wrong number of hits", 44, hits.length); - d = searcher.storedFields().document(hits[0].doc); - doTestHits(hits, 44, searcher.getIndexReader()); - assertEquals("wrong first document", "0", d.get("id")); - - if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) { - // make sure KNN search sees all hits - assertKNNSearch(searcher, KNN_VECTOR, 1000, 44, "0"); - // make sure KNN search using HNSW graph sees newly added docs - assertKNNSearch( - searcher, - new float[] {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * 44}, - 10, - 10, - "44"); - } - reader.close(); - } - - public void changeIndexNoAdds(Random random, Directory dir, Version nameVersion) - throws IOException { - // make sure searching sees right # hits for term search - DirectoryReader reader = DirectoryReader.open(dir); - IndexSearcher searcher = newSearcher(reader); - ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs; - assertEquals("wrong number of hits", 34, hits.length); - Document d = searcher.storedFields().document(hits[0].doc); - assertEquals("wrong first document", "0", d.get("id")); - - if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) { - // make sure KNN search sees all hits - assertKNNSearch(searcher, KNN_VECTOR, 1000, 34, "0"); - // make sure KNN search using HNSW graph retrieves correct results - assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0"); - } - reader.close(); - - // fully merge - IndexWriter writer = - new IndexWriter( - dir, newIndexWriterConfig(new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); - writer.forceMerge(1); - writer.close(); - - reader = DirectoryReader.open(dir); - searcher = newSearcher(reader); - // make sure searching sees right # hits fot term search - hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs; - assertEquals("wrong number of hits", 34, hits.length); - doTestHits(hits, 34, searcher.getIndexReader()); - // make sure searching sees right # hits for KNN search - if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) { - // make sure KNN search sees all hits - assertKNNSearch(searcher, KNN_VECTOR, 1000, 34, "0"); - // make sure KNN search using HNSW graph retrieves correct results - assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0"); - } - reader.close(); - } - - public void createIndex(Directory dir, boolean doCFS, boolean fullyMerged) throws IOException { - LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); - mp.setNoCFSRatio(doCFS ? 1.0 : 0.0); - mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); - // TODO: remove randomness - IndexWriterConfig conf = - new IndexWriterConfig(new MockAnalyzer(random())) - .setMaxBufferedDocs(10) - .setMergePolicy(NoMergePolicy.INSTANCE); - IndexWriter writer = new IndexWriter(dir, conf); - - for (int i = 0; i < DOCS_COUNT; i++) { - addDoc(writer, i); - } - assertEquals("wrong doc count", DOCS_COUNT, writer.getDocStats().maxDoc); - if (fullyMerged) { - writer.forceMerge(1); - } - writer.close(); - - if (!fullyMerged) { - // open fresh writer so we get no prx file in the added segment - mp = new LogByteSizeMergePolicy(); - mp.setNoCFSRatio(doCFS ? 1.0 : 0.0); - // TODO: remove randomness - conf = - new IndexWriterConfig(new MockAnalyzer(random())) - .setMaxBufferedDocs(10) - .setMergePolicy(NoMergePolicy.INSTANCE); - writer = new IndexWriter(dir, conf); - addNoProxDoc(writer); - writer.close(); - - conf = - new IndexWriterConfig(new MockAnalyzer(random())) - .setMaxBufferedDocs(10) - .setMergePolicy(NoMergePolicy.INSTANCE); - writer = new IndexWriter(dir, conf); - Term searchTerm = new Term("id", String.valueOf(DELETED_ID)); - writer.deleteDocuments(searchTerm); - writer.close(); - } - } - - private void addDoc(IndexWriter writer, int id) throws IOException { - Document doc = new Document(); - doc.add(new TextField("content", "aaa", Field.Store.NO)); - doc.add(new StringField("id", Integer.toString(id), Field.Store.YES)); - FieldType customType2 = new FieldType(TextField.TYPE_STORED); - customType2.setStoreTermVectors(true); - customType2.setStoreTermVectorPositions(true); - customType2.setStoreTermVectorOffsets(true); - doc.add( - new Field( - "autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); - doc.add( - new Field( - "utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); - doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2)); - doc.add(new Field("fie\u2C77ld", "field with non-ascii name", customType2)); - - // add docvalues fields - doc.add(new NumericDocValuesField("dvByte", (byte) id)); - byte[] bytes = - new byte[] {(byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id}; - BytesRef ref = new BytesRef(bytes); - doc.add(new BinaryDocValuesField("dvBytesDerefFixed", ref)); - doc.add(new BinaryDocValuesField("dvBytesDerefVar", ref)); - doc.add(new SortedDocValuesField("dvBytesSortedFixed", ref)); - doc.add(new SortedDocValuesField("dvBytesSortedVar", ref)); - doc.add(new BinaryDocValuesField("dvBytesStraightFixed", ref)); - doc.add(new BinaryDocValuesField("dvBytesStraightVar", ref)); - doc.add(new DoubleDocValuesField("dvDouble", (double) id)); - doc.add(new FloatDocValuesField("dvFloat", (float) id)); - doc.add(new NumericDocValuesField("dvInt", id)); - doc.add(new NumericDocValuesField("dvLong", id)); - doc.add(new NumericDocValuesField("dvPacked", id)); - doc.add(new NumericDocValuesField("dvShort", (short) id)); - doc.add(new SortedSetDocValuesField("dvSortedSet", ref)); - doc.add(new SortedNumericDocValuesField("dvSortedNumeric", id)); - - doc.add(new IntPoint("intPoint1d", id)); - doc.add(new IntPoint("intPoint2d", id, 2 * id)); - doc.add(new FloatPoint("floatPoint1d", (float) id)); - doc.add(new FloatPoint("floatPoint2d", (float) id, (float) 2 * id)); - doc.add(new LongPoint("longPoint1d", id)); - doc.add(new LongPoint("longPoint2d", id, 2 * id)); - doc.add(new DoublePoint("doublePoint1d", (double) id)); - doc.add(new DoublePoint("doublePoint2d", (double) id, (double) 2 * id)); - doc.add(new BinaryPoint("binaryPoint1d", bytes)); - doc.add(new BinaryPoint("binaryPoint2d", bytes, bytes)); - - // a field with both offsets and term vectors for a cross-check - FieldType customType3 = new FieldType(TextField.TYPE_STORED); - customType3.setStoreTermVectors(true); - customType3.setStoreTermVectorPositions(true); - customType3.setStoreTermVectorOffsets(true); - customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); - doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3)); - // a field that omits only positions - FieldType customType4 = new FieldType(TextField.TYPE_STORED); - customType4.setStoreTermVectors(true); - customType4.setStoreTermVectorPositions(false); - customType4.setStoreTermVectorOffsets(true); - customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS); - doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4)); - - float[] vector = {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * id}; - doc.add(new KnnFloatVectorField(KNN_VECTOR_FIELD, vector, KNN_VECTOR_FIELD_TYPE)); - - // TODO: - // index different norms types via similarity (we use a random one currently?!) - // remove any analyzer randomness, explicitly add payloads for certain fields. - writer.addDocument(doc); - } - - private void addNoProxDoc(IndexWriter writer) throws IOException { - Document doc = new Document(); - FieldType customType = new FieldType(TextField.TYPE_STORED); - customType.setIndexOptions(IndexOptions.DOCS); - Field f = new Field("content3", "aaa", customType); - doc.add(f); - FieldType customType2 = new FieldType(); - customType2.setStored(true); - customType2.setIndexOptions(IndexOptions.DOCS); - f = new Field("content4", "aaa", customType2); - doc.add(f); - writer.addDocument(doc); - } - - private int countDocs(PostingsEnum docs) throws IOException { - int count = 0; - while ((docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - count++; - } - return count; - } - - // flex: test basics of TermsEnum api on non-flex index - public void testNextIntoWrongField() throws Exception { - for (String name : oldNames) { - Directory dir = oldIndexDirs.get(name); - IndexReader r = DirectoryReader.open(dir); - TermsEnum terms = MultiTerms.getTerms(r, "content").iterator(); - BytesRef t = terms.next(); - assertNotNull(t); - - // content field only has term aaa: - assertEquals("aaa", t.utf8ToString()); - assertNull(terms.next()); - - BytesRef aaaTerm = new BytesRef("aaa"); - - // should be found exactly - assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(aaaTerm)); - assertEquals(DOCS_COUNT, countDocs(TestUtil.docs(random(), terms, null, PostingsEnum.NONE))); - assertNull(terms.next()); - - // should hit end of field - assertEquals(TermsEnum.SeekStatus.END, terms.seekCeil(new BytesRef("bbb"))); - assertNull(terms.next()); - - // should seek to aaa - assertEquals(TermsEnum.SeekStatus.NOT_FOUND, terms.seekCeil(new BytesRef("a"))); - assertTrue(terms.term().bytesEquals(aaaTerm)); - assertEquals(DOCS_COUNT, countDocs(TestUtil.docs(random(), terms, null, PostingsEnum.NONE))); - assertNull(terms.next()); - - assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(aaaTerm)); - assertEquals(DOCS_COUNT, countDocs(TestUtil.docs(random(), terms, null, PostingsEnum.NONE))); - assertNull(terms.next()); - - r.close(); - } - } - - /** - * Test that we didn't forget to bump the current Constants.LUCENE_MAIN_VERSION. This is important - * so that we can determine which version of lucene wrote the segment. - */ - public void testOldVersions() throws Exception { - // first create a little index with the current code and get the version - Directory currentDir = newDirectory(); - RandomIndexWriter riw = new RandomIndexWriter(random(), currentDir); - riw.addDocument(new Document()); - riw.close(); - DirectoryReader ir = DirectoryReader.open(currentDir); - SegmentReader air = (SegmentReader) ir.leaves().get(0).reader(); - Version currentVersion = air.getSegmentInfo().info.getVersion(); - assertNotNull(currentVersion); // only 3.0 segments can have a null version - ir.close(); - currentDir.close(); - - // now check all the old indexes, their version should be < the current version - for (String name : oldNames) { - Directory dir = oldIndexDirs.get(name); - DirectoryReader r = DirectoryReader.open(dir); - for (LeafReaderContext context : r.leaves()) { - air = (SegmentReader) context.reader(); - Version oldVersion = air.getSegmentInfo().info.getVersion(); - assertNotNull(oldVersion); // only 3.0 segments can have a null version - assertTrue( - "current Version.LATEST is <= an old index: did you forget to bump it?!", - currentVersion.onOrAfter(oldVersion)); - } - r.close(); - } - } - - public void testIndexCreatedVersion() throws IOException { - for (String name : oldNames) { - Directory dir = oldIndexDirs.get(name); - SegmentInfos infos = SegmentInfos.readLatestCommit(dir); - // those indexes are created by a single version so we can - // compare the commit version with the created version - assertEquals(infos.getCommitLuceneVersion().major, infos.getIndexCreatedVersionMajor()); - } - } - - public void testSegmentCommitInfoId() throws IOException { - for (String name : oldNames) { - Directory dir = oldIndexDirs.get(name); - SegmentInfos infos = SegmentInfos.readLatestCommit(dir); - for (SegmentCommitInfo info : infos) { - if (info.info.getVersion().onOrAfter(Version.LUCENE_8_6_0)) { - assertNotNull(info.toString(), info.getId()); - } else { - assertNull(info.toString(), info.getId()); - } - } - } - } - - public void verifyUsesDefaultCodec(Directory dir, String name) throws IOException { - DirectoryReader r = DirectoryReader.open(dir); - for (LeafReaderContext context : r.leaves()) { - SegmentReader air = (SegmentReader) context.reader(); - Codec codec = air.getSegmentInfo().info.getCodec(); - assertTrue( - "codec used in " - + name - + " (" - + codec.getName() - + ") is not a default codec (does not begin with Lucene)", - codec.getName().startsWith("Lucene")); - } - r.close(); - } - - public void testAllIndexesUseDefaultCodec() throws Exception { - for (String name : oldNames) { - Directory dir = oldIndexDirs.get(name); - verifyUsesDefaultCodec(dir, name); - } - } - - private int checkAllSegmentsUpgraded(Directory dir, int indexCreatedVersion) throws IOException { - final SegmentInfos infos = SegmentInfos.readLatestCommit(dir); - if (VERBOSE) { - System.out.println("checkAllSegmentsUpgraded: " + infos); - } - for (SegmentCommitInfo si : infos) { - assertEquals(Version.LATEST, si.info.getVersion()); - assertNotNull(si.getId()); - } - assertEquals(Version.LATEST, infos.getCommitLuceneVersion()); - assertEquals(indexCreatedVersion, infos.getIndexCreatedVersionMajor()); - return infos.size(); - } - - private int getNumberOfSegments(Directory dir) throws IOException { - final SegmentInfos infos = SegmentInfos.readLatestCommit(dir); - return infos.size(); - } - - public void testUpgradeOldIndex() throws Exception { - List names = new ArrayList<>(oldNames.length + oldSingleSegmentNames.length); - names.addAll(Arrays.asList(oldNames)); - names.addAll(Arrays.asList(oldSingleSegmentNames)); - for (String name : names) { - if (VERBOSE) { - System.out.println("testUpgradeOldIndex: index=" + name); - } - Directory dir = newDirectory(oldIndexDirs.get(name)); - int indexCreatedVersion = SegmentInfos.readLatestCommit(dir).getIndexCreatedVersionMajor(); - - newIndexUpgrader(dir).upgrade(); - - checkAllSegmentsUpgraded(dir, indexCreatedVersion); - - dir.close(); - } - } - - public void testIndexUpgraderCommandLineArgs() throws Exception { - - PrintStream savedSystemOut = System.out; - System.setOut(new PrintStream(new ByteArrayOutputStream(), false, "UTF-8")); - try { - for (Map.Entry entry : oldIndexDirs.entrySet()) { - String name = entry.getKey(); - Directory origDir = entry.getValue(); - int indexCreatedVersion = - SegmentInfos.readLatestCommit(origDir).getIndexCreatedVersionMajor(); - Path dir = createTempDir(name); - try (FSDirectory fsDir = FSDirectory.open(dir)) { - // beware that ExtraFS might add extraXXX files - Set extraFiles = Set.of(fsDir.listAll()); - for (String file : origDir.listAll()) { - if (extraFiles.contains(file) == false) { - fsDir.copyFrom(origDir, file, file, IOContext.DEFAULT); - } - } - } - - String path = dir.toAbsolutePath().toString(); - - List args = new ArrayList<>(); - if (random().nextBoolean()) { - args.add("-verbose"); - } - if (random().nextBoolean()) { - args.add("-delete-prior-commits"); - } - if (random().nextBoolean()) { - // TODO: need to better randomize this, but ... - // - LuceneTestCase.FS_DIRECTORIES is private - // - newFSDirectory returns BaseDirectoryWrapper - // - BaseDirectoryWrapper doesn't expose delegate - Class dirImpl = NIOFSDirectory.class; - - args.add("-dir-impl"); - args.add(dirImpl.getName()); - } - args.add(path); - - IndexUpgrader.main(args.toArray(new String[0])); - - Directory upgradedDir = newFSDirectory(dir); - try { - checkAllSegmentsUpgraded(upgradedDir, indexCreatedVersion); - } finally { - upgradedDir.close(); - } - } - } finally { - System.setOut(savedSystemOut); - } - } - - public void testUpgradeOldSingleSegmentIndexWithAdditions() throws Exception { - for (String name : oldSingleSegmentNames) { - if (VERBOSE) { - System.out.println("testUpgradeOldSingleSegmentIndexWithAdditions: index=" + name); - } - Directory dir = newDirectory(oldIndexDirs.get(name)); - assertEquals("Original index must be single segment", 1, getNumberOfSegments(dir)); - int indexCreatedVersion = SegmentInfos.readLatestCommit(dir).getIndexCreatedVersionMajor(); - - // create a bunch of dummy segments - int id = 40; - Directory ramDir = new ByteBuffersDirectory(); - for (int i = 0; i < 3; i++) { - // only use Log- or TieredMergePolicy, to make document addition predictable and not - // suddenly merge: - MergePolicy mp = random().nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); - IndexWriterConfig iwc = - new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mp); - IndexWriter w = new IndexWriter(ramDir, iwc); - // add few more docs: - for (int j = 0; j < RANDOM_MULTIPLIER * random().nextInt(30); j++) { - addDoc(w, id++); - } - try { - w.commit(); - } finally { - w.close(); - } - } - - // add dummy segments (which are all in current - // version) to single segment index - MergePolicy mp = random().nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); - IndexWriterConfig iwc = new IndexWriterConfig(null).setMergePolicy(mp); - IndexWriter w = new IndexWriter(dir, iwc); - w.addIndexes(ramDir); - try { - w.commit(); - } finally { - w.close(); - } - - // determine count of segments in modified index - final int origSegCount = getNumberOfSegments(dir); - - // ensure there is only one commit - assertEquals(1, DirectoryReader.listCommits(dir).size()); - newIndexUpgrader(dir).upgrade(); - - final int segCount = checkAllSegmentsUpgraded(dir, indexCreatedVersion); - assertEquals( - "Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged", - origSegCount, - segCount); - - dir.close(); - } - } - - public static final String emptyIndex = "empty.9.0.0.zip"; - - public void testUpgradeEmptyOldIndex() throws Exception { - Path oldIndexDir = createTempDir("emptyIndex"); - TestUtil.unzip(getDataInputStream(emptyIndex), oldIndexDir); - Directory dir = newFSDirectory(oldIndexDir); - - newIndexUpgrader(dir).upgrade(); - - checkAllSegmentsUpgraded(dir, 9); - - dir.close(); - } - - public static final String moreTermsIndex = "moreterms.9.0.0.zip"; - - public void testMoreTerms() throws Exception { - Path oldIndexDir = createTempDir("moreterms"); - TestUtil.unzip(getDataInputStream(moreTermsIndex), oldIndexDir); - Directory dir = newFSDirectory(oldIndexDir); - DirectoryReader reader = DirectoryReader.open(dir); - - verifyUsesDefaultCodec(dir, moreTermsIndex); - TestUtil.checkIndex(dir); - searchExampleIndex(reader); - - reader.close(); - dir.close(); - } - - public static final String dvUpdatesIndex = "dvupdates.9.0.0.zip"; - - private void assertNumericDocValues(LeafReader r, String f, String cf) throws IOException { - NumericDocValues ndvf = r.getNumericDocValues(f); - NumericDocValues ndvcf = r.getNumericDocValues(cf); - for (int i = 0; i < r.maxDoc(); i++) { - assertEquals(i, ndvcf.nextDoc()); - assertEquals(i, ndvf.nextDoc()); - assertEquals(ndvcf.longValue(), ndvf.longValue() * 2); - } - } - - private void assertBinaryDocValues(LeafReader r, String f, String cf) throws IOException { - BinaryDocValues bdvf = r.getBinaryDocValues(f); - BinaryDocValues bdvcf = r.getBinaryDocValues(cf); - for (int i = 0; i < r.maxDoc(); i++) { - assertEquals(i, bdvf.nextDoc()); - assertEquals(i, bdvcf.nextDoc()); - assertEquals(getValue(bdvcf), getValue(bdvf) * 2); - } - } - - private void verifyDocValues(Directory dir) throws IOException { - DirectoryReader reader = DirectoryReader.open(dir); - for (LeafReaderContext context : reader.leaves()) { - LeafReader r = context.reader(); - assertNumericDocValues(r, "ndv1", "ndv1_c"); - assertNumericDocValues(r, "ndv2", "ndv2_c"); - assertBinaryDocValues(r, "bdv1", "bdv1_c"); - assertBinaryDocValues(r, "bdv2", "bdv2_c"); - } - reader.close(); - } - - public void testDocValuesUpdates() throws Exception { - Path oldIndexDir = createTempDir("dvupdates"); - TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); - try (Directory dir = newFSDirectory(oldIndexDir)) { - searchDocValuesUpdatesIndex(dir); - } - } - - private void searchDocValuesUpdatesIndex(Directory dir) throws IOException { - verifyUsesDefaultCodec(dir, dvUpdatesIndex); - verifyDocValues(dir); - - // update fields and verify index - IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); - IndexWriter writer = new IndexWriter(dir, conf); - updateNumeric(writer, "1", "ndv1", "ndv1_c", 300L); - updateNumeric(writer, "1", "ndv2", "ndv2_c", 300L); - updateBinary(writer, "1", "bdv1", "bdv1_c", 300L); - updateBinary(writer, "1", "bdv2", "bdv2_c", 300L); - - writer.commit(); - verifyDocValues(dir); - - // merge all segments - writer.forceMerge(1); - writer.commit(); - verifyDocValues(dir); - - writer.close(); - } - - public void testDeletes() throws Exception { - Path oldIndexDir = createTempDir("dvupdates"); - TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); - Directory dir = newFSDirectory(oldIndexDir); - verifyUsesDefaultCodec(dir, dvUpdatesIndex); - - IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); - IndexWriter writer = new IndexWriter(dir, conf); - - int maxDoc = writer.getDocStats().maxDoc; - writer.deleteDocuments(new Term("id", "1")); - if (random().nextBoolean()) { - writer.commit(); - } - - writer.forceMerge(1); - writer.commit(); - assertEquals(maxDoc - 1, writer.getDocStats().maxDoc); - - writer.close(); - dir.close(); - } - - public void testSoftDeletes() throws Exception { - Path oldIndexDir = createTempDir("dvupdates"); - TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); - Directory dir = newFSDirectory(oldIndexDir); - verifyUsesDefaultCodec(dir, dvUpdatesIndex); - IndexWriterConfig conf = - new IndexWriterConfig(new MockAnalyzer(random())).setSoftDeletesField("__soft_delete"); - IndexWriter writer = new IndexWriter(dir, conf); - int maxDoc = writer.getDocStats().maxDoc; - writer.updateDocValues(new Term("id", "1"), new NumericDocValuesField("__soft_delete", 1)); - - if (random().nextBoolean()) { - writer.commit(); - } - writer.forceMerge(1); - writer.commit(); - assertEquals(maxDoc - 1, writer.getDocStats().maxDoc); - writer.close(); - dir.close(); - } - - public void testDocValuesUpdatesWithNewField() throws Exception { - Path oldIndexDir = createTempDir("dvupdates"); - TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); - Directory dir = newFSDirectory(oldIndexDir); - verifyUsesDefaultCodec(dir, dvUpdatesIndex); - - // update fields and verify index - IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); - IndexWriter writer = new IndexWriter(dir, conf); - // introduce a new field that we later update - writer.addDocument( - Arrays.asList( - new StringField("id", "" + Integer.MAX_VALUE, Field.Store.NO), - new NumericDocValuesField("new_numeric", 1), - new BinaryDocValuesField("new_binary", toBytes(1)))); - writer.updateNumericDocValue(new Term("id", "1"), "new_numeric", 1); - writer.updateBinaryDocValue(new Term("id", "1"), "new_binary", toBytes(1)); - - writer.commit(); - Runnable assertDV = - () -> { - boolean found = false; - try (DirectoryReader reader = DirectoryReader.open(dir)) { - for (LeafReaderContext ctx : reader.leaves()) { - LeafReader leafReader = ctx.reader(); - TermsEnum id = leafReader.terms("id").iterator(); - if (id.seekExact(new BytesRef("1"))) { - PostingsEnum postings = id.postings(null, PostingsEnum.NONE); - NumericDocValues numericDocValues = leafReader.getNumericDocValues("new_numeric"); - BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("new_binary"); - int doc; - while ((doc = postings.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - found = true; - assertTrue(binaryDocValues.advanceExact(doc)); - assertTrue(numericDocValues.advanceExact(doc)); - assertEquals(1, numericDocValues.longValue()); - assertEquals(toBytes(1), binaryDocValues.binaryValue()); - } - } - } - } catch (IOException e) { - throw new AssertionError(e); - } - assertTrue(found); - }; - assertDV.run(); - // merge all segments - writer.forceMerge(1); - writer.commit(); - assertDV.run(); - writer.close(); - dir.close(); - } - - // LUCENE-5907 - public void testUpgradeWithNRTReader() throws Exception { - for (String name : oldNames) { - Directory dir = newDirectory(oldIndexDirs.get(name)); - - IndexWriter writer = - new IndexWriter( - dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND)); - writer.addDocument(new Document()); - DirectoryReader r = DirectoryReader.open(writer); - writer.commit(); - r.close(); - writer.forceMerge(1); - writer.commit(); - writer.rollback(); - SegmentInfos.readLatestCommit(dir); - dir.close(); - } - } - - // LUCENE-5907 - public void testUpgradeThenMultipleCommits() throws Exception { - for (String name : oldNames) { - Directory dir = newDirectory(oldIndexDirs.get(name)); - - IndexWriter writer = - new IndexWriter( - dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND)); - writer.addDocument(new Document()); - writer.commit(); - writer.addDocument(new Document()); - writer.commit(); - writer.close(); - dir.close(); - } - } - - public void testSortedIndex() throws Exception { - for (String name : oldSortedNames) { - Path path = createTempDir("sorted"); - InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name + ".zip"); - assertNotNull("Sorted index index " + name + " not found", resource); - TestUtil.unzip(resource, path); - - Directory dir = newFSDirectory(path); - DirectoryReader reader = DirectoryReader.open(dir); - - assertEquals(1, reader.leaves().size()); - Sort sort = reader.leaves().get(0).reader().getMetaData().getSort(); - assertNotNull(sort); - assertEquals("!", sort.toString()); - - // This will confirm the docs are really sorted - TestUtil.checkIndex(dir); - - searchExampleIndex(reader); - - reader.close(); - dir.close(); - } - } - - public void testSortedIndexAddDocBlocks() throws Exception { - for (String name : oldSortedNames) { - Path path = createTempDir("sorted"); - InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name + ".zip"); - assertNotNull("Sorted index index " + name + " not found", resource); - TestUtil.unzip(resource, path); - - try (Directory dir = newFSDirectory(path)) { - final Sort sort; - try (DirectoryReader reader = DirectoryReader.open(dir)) { - assertEquals(1, reader.leaves().size()); - sort = reader.leaves().get(0).reader().getMetaData().getSort(); - assertNotNull(sort); - searchExampleIndex(reader); - } - // open writer - try (IndexWriter writer = - new IndexWriter( - dir, - newIndexWriterConfig(new MockAnalyzer(random())) - .setOpenMode(OpenMode.APPEND) - .setIndexSort(sort) - .setMergePolicy(newLogMergePolicy()))) { - // add 10 docs - for (int i = 0; i < 10; i++) { - Document child = new Document(); - child.add(new StringField("relation", "child", Field.Store.NO)); - child.add(new StringField("bid", "" + i, Field.Store.NO)); - child.add(new NumericDocValuesField("dateDV", i)); - Document parent = new Document(); - parent.add(new StringField("relation", "parent", Field.Store.NO)); - parent.add(new StringField("bid", "" + i, Field.Store.NO)); - parent.add(new NumericDocValuesField("dateDV", i)); - writer.addDocuments(Arrays.asList(child, child, parent)); - if (random().nextBoolean()) { - writer.flush(); - } - } - if (random().nextBoolean()) { - writer.forceMerge(1); - } - writer.commit(); - try (IndexReader reader = DirectoryReader.open(dir)) { - IndexSearcher searcher = new IndexSearcher(reader); - for (int i = 0; i < 10; i++) { - TopDocs children = - searcher.search( - new BooleanQuery.Builder() - .add( - new TermQuery(new Term("relation", "child")), - BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST) - .build(), - 2); - TopDocs parents = - searcher.search( - new BooleanQuery.Builder() - .add( - new TermQuery(new Term("relation", "parent")), - BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST) - .build(), - 2); - assertEquals(2, children.totalHits.value); - assertEquals(1, parents.totalHits.value); - // make sure it's sorted - assertEquals(children.scoreDocs[0].doc + 1, children.scoreDocs[1].doc); - assertEquals(children.scoreDocs[1].doc + 1, parents.scoreDocs[0].doc); - } - } - } - // This will confirm the docs are really sorted - TestUtil.checkIndex(dir); - } - } - } - - private void searchExampleIndex(DirectoryReader reader) throws IOException { - IndexSearcher searcher = newSearcher(reader); - - TopDocs topDocs = searcher.search(new FieldExistsQuery("titleTokenized"), 10); - assertEquals(50, topDocs.totalHits.value); - - topDocs = searcher.search(new FieldExistsQuery("titleDV"), 10); - assertEquals(50, topDocs.totalHits.value); - - topDocs = searcher.search(new TermQuery(new Term("body", "ja")), 10); - assertTrue(topDocs.totalHits.value > 0); - - topDocs = - searcher.search( - IntPoint.newRangeQuery("docid_int", 42, 44), - 10, - new Sort(new SortField("docid_intDV", SortField.Type.INT))); - assertEquals(3, topDocs.totalHits.value); - assertEquals(3, topDocs.scoreDocs.length); - assertEquals(42, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]); - assertEquals(43, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]); - assertEquals(44, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]); - - topDocs = searcher.search(new TermQuery(new Term("body", "the")), 5); - assertTrue(topDocs.totalHits.value > 0); - - topDocs = - searcher.search( - new MatchAllDocsQuery(), 5, new Sort(new SortField("dateDV", SortField.Type.LONG))); - assertEquals(50, topDocs.totalHits.value); - assertEquals(5, topDocs.scoreDocs.length); - long firstDate = (Long) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]; - long lastDate = (Long) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]; - assertTrue(firstDate <= lastDate); - } - - static long getValue(BinaryDocValues bdv) throws IOException { - BytesRef term = bdv.binaryValue(); - int idx = term.offset; - byte b = term.bytes[idx++]; - long value = b & 0x7FL; - for (int shift = 7; (b & 0x80L) != 0; shift += 7) { - b = term.bytes[idx++]; - value |= (b & 0x7FL) << shift; - } - return value; - } - - // encodes a long into a BytesRef as VLong so that we get varying number of bytes when we update - static BytesRef toBytes(long value) { - BytesRef bytes = new BytesRef(10); // negative longs may take 10 bytes - while ((value & ~0x7FL) != 0L) { - bytes.bytes[bytes.length++] = (byte) ((value & 0x7FL) | 0x80L); - value >>>= 7; - } - bytes.bytes[bytes.length++] = (byte) value; - return bytes; - } - - public void testFailOpenOldIndex() throws IOException { - for (String name : oldNames) { - Directory directory = oldIndexDirs.get(name); - IndexCommit commit = DirectoryReader.listCommits(directory).get(0); - - final int createdMajor; - // No exception when opening with the allowed min version - try (IndexReader reader = - StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR, null)) { - createdMajor = reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor(); - } - - if (createdMajor < Version.LATEST.major) { - IndexFormatTooOldException ex = - expectThrows( - IndexFormatTooOldException.class, - () -> StandardDirectoryReader.open(commit, Version.LATEST.major, null)); - assertTrue( - ex.getMessage() - .contains( - "only supports reading from version " + Version.LATEST.major + " upwards.")); - } - } - } - - // #12895: test on a carefully crafted 9.8.0 index (from a small contiguous subset - // of wikibigall unique terms) that shows the read-time exception of - // IntersectTermsEnum (used by WildcardQuery) - public void testWildcardQueryExceptions990() throws IOException { - Path path = createTempDir("12895"); - - String name = "index.12895.9.8.0.zip"; - InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name); - assertNotNull("missing zip file to reproduce #12895", resource); - TestUtil.unzip(resource, path); - - try (Directory dir = newFSDirectory(path); - DirectoryReader reader = DirectoryReader.open(dir)) { - IndexSearcher searcher = new IndexSearcher(reader); - - searcher.count(new WildcardQuery(new Term("field", "*qx*"))); - } - } - - @Nightly - public void testReadNMinusTwoCommit() throws IOException { - for (String name : binarySupportedNames) { - Path oldIndexDir = createTempDir(name); - TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir); - try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) { - IndexCommit commit = DirectoryReader.listCommits(dir).get(0); - StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close(); - } - } - } - - @Nightly - public void testReadNMinusTwoSegmentInfos() throws IOException { - for (String name : binarySupportedNames) { - Path oldIndexDir = createTempDir(name); - TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir); - try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) { - expectThrows( - IndexFormatTooOldException.class, - () -> SegmentInfos.readLatestCommit(dir, Version.MIN_SUPPORTED_MAJOR)); - SegmentInfos.readLatestCommit(dir, MIN_BINARY_SUPPORTED_MAJOR); - } - } - } - - public void testOpenModeAndCreatedVersion() throws IOException { - for (String name : oldNames) { - Directory dir = newDirectory(oldIndexDirs.get(name)); - int majorVersion = SegmentInfos.readLatestCommit(dir).getIndexCreatedVersionMajor(); - if (majorVersion != Version.MIN_SUPPORTED_MAJOR && majorVersion != Version.LATEST.major) { - fail( - "expected one of: [" - + Version.MIN_SUPPORTED_MAJOR - + ", " - + Version.LATEST.major - + "] but got: " - + majorVersion); - } - for (OpenMode openMode : OpenMode.values()) { - Directory tmpDir = newDirectory(dir); - IndexWriter w = new IndexWriter(tmpDir, newIndexWriterConfig().setOpenMode(openMode)); - w.commit(); - w.close(); - switch (openMode) { - case CREATE: - assertEquals( - Version.LATEST.major, - SegmentInfos.readLatestCommit(tmpDir).getIndexCreatedVersionMajor()); - break; - case APPEND: - case CREATE_OR_APPEND: - default: - assertEquals( - majorVersion, SegmentInfos.readLatestCommit(tmpDir).getIndexCreatedVersionMajor()); - } - tmpDir.close(); - } - dir.close(); - } - } -} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java new file mode 100644 index 000000000000..2805de744ccc --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java @@ -0,0 +1,899 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; +import static org.apache.lucene.util.Version.LUCENE_9_0_0; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import java.io.IOException; +import java.util.List; +import java.util.Random; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.BinaryPoint; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.index.IndexFormatTooOldException; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.LogByteSizeMergePolicy; +import org.apache.lucene.index.MultiBits; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.MultiTerms; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.StandardDirectoryReader; +import org.apache.lucene.index.StoredFields; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermVectors; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.KnnFloatVectorQuery; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Version; + +public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestBase { + + static final String INDEX_NAME = "index"; + static final String SUFFIX_CFS = "-cfs"; + static final String SUFFIX_NO_CFS = "-nocfs"; + + private static final int DOCS_COUNT = 35; + private static final int DELETED_ID = 7; + + private static final int KNN_VECTOR_MIN_SUPPORTED_VERSION = LUCENE_9_0_0.major; + private static final String KNN_VECTOR_FIELD = "knn_field"; + private static final FieldType KNN_VECTOR_FIELD_TYPE = + KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE); + private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f}; + + static final int MIN_BINARY_SUPPORTED_MAJOR = Version.MIN_SUPPORTED_MAJOR - 1; + + /** + * A parameter constructor for {@link com.carrotsearch.randomizedtesting.RandomizedRunner}. See + * {@link #testVersionsFactory()} for details on the values provided to the framework. + */ + public TestBasicBackwardsCompatibility(Version version, String pattern) { + super(version, pattern); + } + + /** Provides all current version to the test-framework for each of the index suffixes. */ + @ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s") + public static Iterable testVersionsFactory() throws IllegalAccessException { + return allVersion(INDEX_NAME, SUFFIX_CFS, SUFFIX_NO_CFS); + } + + @Override + protected void createIndex(Directory directory) throws IOException { + if (indexPattern.equals(createPattern(INDEX_NAME, SUFFIX_CFS))) { + createIndex(directory, true, false); + } else { + createIndex(directory, false, false); + } + } + + static void createIndex(Directory dir, boolean doCFS, boolean fullyMerged) throws IOException { + LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); + mp.setNoCFSRatio(doCFS ? 1.0 : 0.0); + mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); + // TODO: remove randomness + IndexWriterConfig conf = + new IndexWriterConfig(new MockAnalyzer(random())) + .setMaxBufferedDocs(10) + .setCodec(TestUtil.getDefaultCodec()) + .setMergePolicy(NoMergePolicy.INSTANCE); + IndexWriter writer = new IndexWriter(dir, conf); + + for (int i = 0; i < DOCS_COUNT; i++) { + addDoc(writer, i); + } + assertEquals("wrong doc count", DOCS_COUNT, writer.getDocStats().maxDoc); + if (fullyMerged) { + writer.forceMerge(1); + } + writer.close(); + + if (!fullyMerged) { + // open fresh writer so we get no prx file in the added segment + mp = new LogByteSizeMergePolicy(); + mp.setNoCFSRatio(doCFS ? 1.0 : 0.0); + // TODO: remove randomness + conf = + new IndexWriterConfig(new MockAnalyzer(random())) + .setMaxBufferedDocs(10) + .setCodec(TestUtil.getDefaultCodec()) + .setMergePolicy(NoMergePolicy.INSTANCE); + writer = new IndexWriter(dir, conf); + addNoProxDoc(writer); + writer.close(); + + conf = + new IndexWriterConfig(new MockAnalyzer(random())) + .setMaxBufferedDocs(10) + .setCodec(TestUtil.getDefaultCodec()) + .setMergePolicy(NoMergePolicy.INSTANCE); + writer = new IndexWriter(dir, conf); + Term searchTerm = new Term("id", String.valueOf(DELETED_ID)); + writer.deleteDocuments(searchTerm); + writer.close(); + } + } + + static void addDoc(IndexWriter writer, int id) throws IOException { + Document doc = new Document(); + doc.add(new TextField("content", "aaa", Field.Store.NO)); + doc.add(new StringField("id", Integer.toString(id), Field.Store.YES)); + FieldType customType2 = new FieldType(TextField.TYPE_STORED); + customType2.setStoreTermVectors(true); + customType2.setStoreTermVectorPositions(true); + customType2.setStoreTermVectorOffsets(true); + doc.add( + new Field( + "autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); + doc.add( + new Field( + "utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); + doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2)); + doc.add(new Field("fie\u2C77ld", "field with non-ascii name", customType2)); + + // add docvalues fields + doc.add(new NumericDocValuesField("dvByte", (byte) id)); + byte[] bytes = + new byte[] {(byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id}; + BytesRef ref = new BytesRef(bytes); + doc.add(new BinaryDocValuesField("dvBytesDerefFixed", ref)); + doc.add(new BinaryDocValuesField("dvBytesDerefVar", ref)); + doc.add(new SortedDocValuesField("dvBytesSortedFixed", ref)); + doc.add(new SortedDocValuesField("dvBytesSortedVar", ref)); + doc.add(new BinaryDocValuesField("dvBytesStraightFixed", ref)); + doc.add(new BinaryDocValuesField("dvBytesStraightVar", ref)); + doc.add(new DoubleDocValuesField("dvDouble", id)); + doc.add(new FloatDocValuesField("dvFloat", (float) id)); + doc.add(new NumericDocValuesField("dvInt", id)); + doc.add(new NumericDocValuesField("dvLong", id)); + doc.add(new NumericDocValuesField("dvPacked", id)); + doc.add(new NumericDocValuesField("dvShort", (short) id)); + doc.add(new SortedSetDocValuesField("dvSortedSet", ref)); + doc.add(new SortedNumericDocValuesField("dvSortedNumeric", id)); + + doc.add(new IntPoint("intPoint1d", id)); + doc.add(new IntPoint("intPoint2d", id, 2 * id)); + doc.add(new FloatPoint("floatPoint1d", (float) id)); + doc.add(new FloatPoint("floatPoint2d", (float) id, (float) 2 * id)); + doc.add(new LongPoint("longPoint1d", id)); + doc.add(new LongPoint("longPoint2d", id, 2 * id)); + doc.add(new DoublePoint("doublePoint1d", id)); + doc.add(new DoublePoint("doublePoint2d", id, (double) 2 * id)); + doc.add(new BinaryPoint("binaryPoint1d", bytes)); + doc.add(new BinaryPoint("binaryPoint2d", bytes, bytes)); + + // a field with both offsets and term vectors for a cross-check + FieldType customType3 = new FieldType(TextField.TYPE_STORED); + customType3.setStoreTermVectors(true); + customType3.setStoreTermVectorPositions(true); + customType3.setStoreTermVectorOffsets(true); + customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3)); + // a field that omits only positions + FieldType customType4 = new FieldType(TextField.TYPE_STORED); + customType4.setStoreTermVectors(true); + customType4.setStoreTermVectorPositions(false); + customType4.setStoreTermVectorOffsets(true); + customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4)); + + float[] vector = {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * id}; + doc.add(new KnnFloatVectorField(KNN_VECTOR_FIELD, vector, KNN_VECTOR_FIELD_TYPE)); + + // TODO: + // index different norms types via similarity (we use a random one currently?!) + // remove any analyzer randomness, explicitly add payloads for certain fields. + writer.addDocument(doc); + } + + static void addNoProxDoc(IndexWriter writer) throws IOException { + Document doc = new Document(); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setIndexOptions(IndexOptions.DOCS); + Field f = new Field("content3", "aaa", customType); + doc.add(f); + FieldType customType2 = new FieldType(); + customType2.setStored(true); + customType2.setIndexOptions(IndexOptions.DOCS); + f = new Field("content4", "aaa", customType2); + doc.add(f); + writer.addDocument(doc); + } + + public static void searchIndex( + Directory dir, String oldName, int minIndexMajorVersion, Version nameVersion) + throws IOException { + // QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); + // Query query = parser.parse("handle:1"); + IndexCommit indexCommit = DirectoryReader.listCommits(dir).get(0); + IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion, null); + IndexSearcher searcher = newSearcher(reader); + + TestUtil.checkIndex(dir); + + final Bits liveDocs = MultiBits.getLiveDocs(reader); + assertNotNull(liveDocs); + + StoredFields storedFields = reader.storedFields(); + TermVectors termVectors = reader.termVectors(); + + for (int i = 0; i < DOCS_COUNT; i++) { + if (liveDocs.get(i)) { + Document d = storedFields.document(i); + List fields = d.getFields(); + boolean isProxDoc = d.getField("content3") == null; + if (isProxDoc) { + assertEquals(7, fields.size()); + IndexableField f = d.getField("id"); + assertEquals("" + i, f.stringValue()); + + f = d.getField("utf8"); + assertEquals( + "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue()); + + f = d.getField("autf8"); + assertEquals( + "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue()); + + f = d.getField("content2"); + assertEquals("here is more content with aaa aaa aaa", f.stringValue()); + + f = d.getField("fie\u2C77ld"); + assertEquals("field with non-ascii name", f.stringValue()); + } + + Fields tfvFields = termVectors.get(i); + assertNotNull("i=" + i, tfvFields); + Terms tfv = tfvFields.terms("utf8"); + assertNotNull("docID=" + i + " index=" + oldName, tfv); + } else { + assertEquals(DELETED_ID, i); + } + } + + // check docvalues fields + NumericDocValues dvByte = MultiDocValues.getNumericValues(reader, "dvByte"); + BinaryDocValues dvBytesDerefFixed = MultiDocValues.getBinaryValues(reader, "dvBytesDerefFixed"); + BinaryDocValues dvBytesDerefVar = MultiDocValues.getBinaryValues(reader, "dvBytesDerefVar"); + SortedDocValues dvBytesSortedFixed = + MultiDocValues.getSortedValues(reader, "dvBytesSortedFixed"); + SortedDocValues dvBytesSortedVar = MultiDocValues.getSortedValues(reader, "dvBytesSortedVar"); + BinaryDocValues dvBytesStraightFixed = + MultiDocValues.getBinaryValues(reader, "dvBytesStraightFixed"); + BinaryDocValues dvBytesStraightVar = + MultiDocValues.getBinaryValues(reader, "dvBytesStraightVar"); + NumericDocValues dvDouble = MultiDocValues.getNumericValues(reader, "dvDouble"); + NumericDocValues dvFloat = MultiDocValues.getNumericValues(reader, "dvFloat"); + NumericDocValues dvInt = MultiDocValues.getNumericValues(reader, "dvInt"); + NumericDocValues dvLong = MultiDocValues.getNumericValues(reader, "dvLong"); + NumericDocValues dvPacked = MultiDocValues.getNumericValues(reader, "dvPacked"); + NumericDocValues dvShort = MultiDocValues.getNumericValues(reader, "dvShort"); + + SortedSetDocValues dvSortedSet = MultiDocValues.getSortedSetValues(reader, "dvSortedSet"); + SortedNumericDocValues dvSortedNumeric = + MultiDocValues.getSortedNumericValues(reader, "dvSortedNumeric"); + + for (int i = 0; i < DOCS_COUNT; i++) { + int id = Integer.parseInt(storedFields.document(i).get("id")); + assertEquals(i, dvByte.nextDoc()); + assertEquals(id, dvByte.longValue()); + + byte[] bytes = + new byte[] {(byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id}; + BytesRef expectedRef = new BytesRef(bytes); + + assertEquals(i, dvBytesDerefFixed.nextDoc()); + BytesRef term = dvBytesDerefFixed.binaryValue(); + assertEquals(expectedRef, term); + assertEquals(i, dvBytesDerefVar.nextDoc()); + term = dvBytesDerefVar.binaryValue(); + assertEquals(expectedRef, term); + assertEquals(i, dvBytesSortedFixed.nextDoc()); + term = dvBytesSortedFixed.lookupOrd(dvBytesSortedFixed.ordValue()); + assertEquals(expectedRef, term); + assertEquals(i, dvBytesSortedVar.nextDoc()); + term = dvBytesSortedVar.lookupOrd(dvBytesSortedVar.ordValue()); + assertEquals(expectedRef, term); + assertEquals(i, dvBytesStraightFixed.nextDoc()); + term = dvBytesStraightFixed.binaryValue(); + assertEquals(expectedRef, term); + assertEquals(i, dvBytesStraightVar.nextDoc()); + term = dvBytesStraightVar.binaryValue(); + assertEquals(expectedRef, term); + + assertEquals(i, dvDouble.nextDoc()); + assertEquals(id, Double.longBitsToDouble(dvDouble.longValue()), 0D); + assertEquals(i, dvFloat.nextDoc()); + assertEquals((float) id, Float.intBitsToFloat((int) dvFloat.longValue()), 0F); + assertEquals(i, dvInt.nextDoc()); + assertEquals(id, dvInt.longValue()); + assertEquals(i, dvLong.nextDoc()); + assertEquals(id, dvLong.longValue()); + assertEquals(i, dvPacked.nextDoc()); + assertEquals(id, dvPacked.longValue()); + assertEquals(i, dvShort.nextDoc()); + assertEquals(id, dvShort.longValue()); + + assertEquals(i, dvSortedSet.nextDoc()); + assertEquals(1, dvSortedSet.docValueCount()); + long ord = dvSortedSet.nextOrd(); + term = dvSortedSet.lookupOrd(ord); + assertEquals(expectedRef, term); + + assertEquals(i, dvSortedNumeric.nextDoc()); + assertEquals(1, dvSortedNumeric.docValueCount()); + assertEquals(id, dvSortedNumeric.nextValue()); + } + + ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs; + + // First document should be #0 + Document d = storedFields.document(hits[0].doc); + assertEquals("didn't get the right document first", "0", d.get("id")); + + doTestHits(hits, 34, searcher.getIndexReader()); + + hits = searcher.search(new TermQuery(new Term("content5", "aaa")), 1000).scoreDocs; + + doTestHits(hits, 34, searcher.getIndexReader()); + + hits = searcher.search(new TermQuery(new Term("content6", "aaa")), 1000).scoreDocs; + + doTestHits(hits, 34, searcher.getIndexReader()); + + hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), 1000).scoreDocs; + assertEquals(34, hits.length); + hits = + searcher.search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), 1000) + .scoreDocs; + assertEquals(34, hits.length); + hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), 1000).scoreDocs; + assertEquals(34, hits.length); + + doTestHits( + searcher.search(IntPoint.newRangeQuery("intPoint1d", 0, 34), 1000).scoreDocs, + 34, + searcher.getIndexReader()); + doTestHits( + searcher.search( + IntPoint.newRangeQuery("intPoint2d", new int[] {0, 0}, new int[] {34, 68}), 1000) + .scoreDocs, + 34, + searcher.getIndexReader()); + doTestHits( + searcher.search(FloatPoint.newRangeQuery("floatPoint1d", 0f, 34f), 1000).scoreDocs, + 34, + searcher.getIndexReader()); + doTestHits( + searcher.search( + FloatPoint.newRangeQuery( + "floatPoint2d", new float[] {0f, 0f}, new float[] {34f, 68f}), + 1000) + .scoreDocs, + 34, + searcher.getIndexReader()); + doTestHits( + searcher.search(LongPoint.newRangeQuery("longPoint1d", 0, 34), 1000).scoreDocs, + 34, + searcher.getIndexReader()); + doTestHits( + searcher.search( + LongPoint.newRangeQuery("longPoint2d", new long[] {0, 0}, new long[] {34, 68}), + 1000) + .scoreDocs, + 34, + searcher.getIndexReader()); + doTestHits( + searcher.search(DoublePoint.newRangeQuery("doublePoint1d", 0.0, 34.0), 1000).scoreDocs, + 34, + searcher.getIndexReader()); + doTestHits( + searcher.search( + DoublePoint.newRangeQuery( + "doublePoint2d", new double[] {0.0, 0.0}, new double[] {34.0, 68.0}), + 1000) + .scoreDocs, + 34, + searcher.getIndexReader()); + + byte[] bytes1 = new byte[4]; + byte[] bytes2 = new byte[] {0, 0, 0, (byte) 34}; + doTestHits( + searcher.search(BinaryPoint.newRangeQuery("binaryPoint1d", bytes1, bytes2), 1000).scoreDocs, + 34, + searcher.getIndexReader()); + byte[] bytes3 = new byte[] {0, 0, 0, (byte) 68}; + doTestHits( + searcher.search( + BinaryPoint.newRangeQuery( + "binaryPoint2d", new byte[][] {bytes1, bytes1}, new byte[][] {bytes2, bytes3}), + 1000) + .scoreDocs, + 34, + searcher.getIndexReader()); + + // test vector values and KNN search + if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) { + // test vector values + int cnt = 0; + for (LeafReaderContext ctx : reader.leaves()) { + FloatVectorValues values = ctx.reader().getFloatVectorValues(KNN_VECTOR_FIELD); + if (values != null) { + assertEquals(KNN_VECTOR_FIELD_TYPE.vectorDimension(), values.dimension()); + for (int doc = values.nextDoc(); doc != NO_MORE_DOCS; doc = values.nextDoc()) { + float[] expectedVector = {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * cnt}; + assertArrayEquals( + "vectors do not match for doc=" + cnt, expectedVector, values.vectorValue(), 0); + cnt++; + } + } + } + assertEquals(DOCS_COUNT, cnt); + + // test KNN search + ScoreDoc[] scoreDocs = assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0"); + for (int i = 0; i < scoreDocs.length; i++) { + int id = Integer.parseInt(storedFields.document(scoreDocs[i].doc).get("id")); + int expectedId = i < DELETED_ID ? i : i + 1; + assertEquals(expectedId, id); + } + } + + reader.close(); + } + + private static void doTestHits(ScoreDoc[] hits, int expectedCount, IndexReader reader) + throws IOException { + final int hitCount = hits.length; + assertEquals("wrong number of hits", expectedCount, hitCount); + StoredFields storedFields = reader.storedFields(); + TermVectors termVectors = reader.termVectors(); + for (ScoreDoc hit : hits) { + storedFields.document(hit.doc); + termVectors.get(hit.doc); + } + } + + private static ScoreDoc[] assertKNNSearch( + IndexSearcher searcher, + float[] queryVector, + int k, + int expectedHitsCount, + String expectedFirstDocId) + throws IOException { + ScoreDoc[] hits = + searcher.search(new KnnFloatVectorQuery(KNN_VECTOR_FIELD, queryVector, k), k).scoreDocs; + assertEquals("wrong number of hits", expectedHitsCount, hits.length); + Document d = searcher.storedFields().document(hits[0].doc); + assertEquals("wrong first document", expectedFirstDocId, d.get("id")); + return hits; + } + + public void changeIndexWithAdds(Random random, Directory dir, Version nameVersion) + throws IOException { + SegmentInfos infos = SegmentInfos.readLatestCommit(dir); + assertEquals(nameVersion, infos.getCommitLuceneVersion()); + assertEquals(nameVersion, infos.getMinSegmentLuceneVersion()); + + // open writer + IndexWriter writer = + new IndexWriter( + dir, + newIndexWriterConfig(new MockAnalyzer(random)) + .setOpenMode(IndexWriterConfig.OpenMode.APPEND) + .setMergePolicy(newLogMergePolicy())); + // add 10 docs + for (int i = 0; i < 10; i++) { + addDoc(writer, DOCS_COUNT + i); + } + + // make sure writer sees right total -- writer seems not to know about deletes in .del? + final int expected = 45; + assertEquals("wrong doc count", expected, writer.getDocStats().numDocs); + writer.close(); + + // make sure searching sees right # hits for term search + IndexReader reader = DirectoryReader.open(dir); + IndexSearcher searcher = newSearcher(reader); + ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs; + Document d = searcher.getIndexReader().storedFields().document(hits[0].doc); + assertEquals("wrong first document", "0", d.get("id")); + doTestHits(hits, 44, searcher.getIndexReader()); + + if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) { + // make sure KNN search sees all hits (graph may not be used if k is big) + assertKNNSearch(searcher, KNN_VECTOR, 1000, 44, "0"); + // make sure KNN search using HNSW graph sees newly added docs + assertKNNSearch( + searcher, + new float[] {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * 44}, + 10, + 10, + "44"); + } + reader.close(); + + // fully merge + writer = + new IndexWriter( + dir, + newIndexWriterConfig(new MockAnalyzer(random)) + .setOpenMode(IndexWriterConfig.OpenMode.APPEND) + .setMergePolicy(newLogMergePolicy())); + writer.forceMerge(1); + writer.close(); + + reader = DirectoryReader.open(dir); + searcher = newSearcher(reader); + // make sure searching sees right # hits fot term search + hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs; + assertEquals("wrong number of hits", 44, hits.length); + d = searcher.storedFields().document(hits[0].doc); + doTestHits(hits, 44, searcher.getIndexReader()); + assertEquals("wrong first document", "0", d.get("id")); + + if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) { + // make sure KNN search sees all hits + assertKNNSearch(searcher, KNN_VECTOR, 1000, 44, "0"); + // make sure KNN search using HNSW graph sees newly added docs + assertKNNSearch( + searcher, + new float[] {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * 44}, + 10, + 10, + "44"); + } + reader.close(); + } + + public void changeIndexNoAdds(Random random, Directory dir, Version nameVersion) + throws IOException { + // make sure searching sees right # hits for term search + DirectoryReader reader = DirectoryReader.open(dir); + IndexSearcher searcher = newSearcher(reader); + ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs; + assertEquals("wrong number of hits", 34, hits.length); + Document d = searcher.storedFields().document(hits[0].doc); + assertEquals("wrong first document", "0", d.get("id")); + + if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) { + // make sure KNN search sees all hits + assertKNNSearch(searcher, KNN_VECTOR, 1000, 34, "0"); + // make sure KNN search using HNSW graph retrieves correct results + assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0"); + } + reader.close(); + + // fully merge + IndexWriter writer = + new IndexWriter( + dir, + newIndexWriterConfig(new MockAnalyzer(random)) + .setOpenMode(IndexWriterConfig.OpenMode.APPEND)); + writer.forceMerge(1); + writer.close(); + + reader = DirectoryReader.open(dir); + searcher = newSearcher(reader); + // make sure searching sees right # hits fot term search + hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs; + assertEquals("wrong number of hits", 34, hits.length); + doTestHits(hits, 34, searcher.getIndexReader()); + // make sure searching sees right # hits for KNN search + if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) { + // make sure KNN search sees all hits + assertKNNSearch(searcher, KNN_VECTOR, 1000, 34, "0"); + // make sure KNN search using HNSW graph retrieves correct results + assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0"); + } + reader.close(); + } + + // flex: test basics of TermsEnum api on non-flex index + public void testNextIntoWrongField() throws Exception { + IndexReader r = DirectoryReader.open(directory); + TermsEnum terms = MultiTerms.getTerms(r, "content").iterator(); + BytesRef t = terms.next(); + assertNotNull(t); + + // content field only has term aaa: + assertEquals("aaa", t.utf8ToString()); + assertNull(terms.next()); + + BytesRef aaaTerm = new BytesRef("aaa"); + + // should be found exactly + assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(aaaTerm)); + assertEquals(DOCS_COUNT, countDocs(TestUtil.docs(random(), terms, null, PostingsEnum.NONE))); + assertNull(terms.next()); + + // should hit end of field + assertEquals(TermsEnum.SeekStatus.END, terms.seekCeil(new BytesRef("bbb"))); + assertNull(terms.next()); + + // should seek to aaa + assertEquals(TermsEnum.SeekStatus.NOT_FOUND, terms.seekCeil(new BytesRef("a"))); + assertTrue(terms.term().bytesEquals(aaaTerm)); + assertEquals(DOCS_COUNT, countDocs(TestUtil.docs(random(), terms, null, PostingsEnum.NONE))); + assertNull(terms.next()); + + assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(aaaTerm)); + assertEquals(DOCS_COUNT, countDocs(TestUtil.docs(random(), terms, null, PostingsEnum.NONE))); + assertNull(terms.next()); + + r.close(); + } + + /** + * Test that we didn't forget to bump the current Constants.LUCENE_MAIN_VERSION. This is important + * so that we can determine which version of lucene wrote the segment. + */ + public void testOldVersions() throws Exception { + // first create a little index with the current code and get the version + Directory currentDir = newDirectory(); + RandomIndexWriter riw = new RandomIndexWriter(random(), currentDir); + riw.addDocument(new Document()); + riw.close(); + DirectoryReader ir = DirectoryReader.open(currentDir); + SegmentReader air = (SegmentReader) ir.leaves().get(0).reader(); + Version currentVersion = air.getSegmentInfo().info.getVersion(); + assertNotNull(currentVersion); // only 3.0 segments can have a null version + ir.close(); + currentDir.close(); + + // now check all the old indexes, their version should be < the current version + DirectoryReader r = DirectoryReader.open(directory); + for (LeafReaderContext context : r.leaves()) { + air = (SegmentReader) context.reader(); + Version oldVersion = air.getSegmentInfo().info.getVersion(); + assertNotNull(oldVersion); // only 3.0 segments can have a null version + assertTrue( + "current Version.LATEST is <= an old index: did you forget to bump it?!", + currentVersion.onOrAfter(oldVersion)); + } + r.close(); + } + + public void testIndexCreatedVersion() throws IOException { + SegmentInfos infos = SegmentInfos.readLatestCommit(directory); + // those indexes are created by a single version so we can + // compare the commit version with the created version + assertEquals(infos.getCommitLuceneVersion().major, infos.getIndexCreatedVersionMajor()); + } + + public void testSegmentCommitInfoId() throws IOException { + Directory dir = this.directory; + SegmentInfos infos = SegmentInfos.readLatestCommit(dir); + for (SegmentCommitInfo info : infos) { + if (info.info.getVersion().onOrAfter(Version.fromBits(8, 6, 0))) { + assertNotNull(info.toString(), info.getId()); + } else { + assertNull(info.toString(), info.getId()); + } + } + } + + private int countDocs(PostingsEnum docs) throws IOException { + int count = 0; + while ((docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + count++; + } + return count; + } + + public void testIndexOldIndexNoAdds() throws Exception { + try (Directory dir = newDirectory(directory)) { + changeIndexNoAdds(random(), dir, version); + } + } + + public void testIndexOldIndex() throws Exception { + try (Directory dir = newDirectory(directory)) { + changeIndexWithAdds(random(), dir, version); + } + } + + public void testSearchOldIndex() throws Exception { + searchIndex(directory, indexPattern, Version.MIN_SUPPORTED_MAJOR, version); + } + + public void testFullyMergeOldIndex() throws Exception { + try (Directory dir = newDirectory(this.directory)) { + final SegmentInfos oldSegInfos = SegmentInfos.readLatestCommit(dir); + + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random()))); + w.forceMerge(1); + w.close(); + + final SegmentInfos segInfos = SegmentInfos.readLatestCommit(dir); + assertEquals( + oldSegInfos.getIndexCreatedVersionMajor(), segInfos.getIndexCreatedVersionMajor()); + assertEquals(Version.LATEST, segInfos.asList().get(0).info.getVersion()); + assertEquals( + oldSegInfos.asList().get(0).info.getMinVersion(), + segInfos.asList().get(0).info.getMinVersion()); + } + } + + public void testAddOldIndexes() throws IOException { + SegmentInfos infos = SegmentInfos.readLatestCommit(directory); + + Directory targetDir = newDirectory(); + if (infos.getCommitLuceneVersion().major != Version.LATEST.major) { + // both indexes are not compatible + Directory targetDir2 = newDirectory(); + IndexWriter w = new IndexWriter(targetDir2, newIndexWriterConfig(new MockAnalyzer(random()))); + IllegalArgumentException e = + expectThrows(IllegalArgumentException.class, () -> w.addIndexes(directory)); + assertTrue( + e.getMessage(), + e.getMessage() + .startsWith( + "Cannot use addIndexes(Directory) with indexes that have been created by a different Lucene version.")); + w.close(); + targetDir2.close(); + + // for the next test, we simulate writing to an index that was created on the same major + // version + new SegmentInfos(infos.getIndexCreatedVersionMajor()).commit(targetDir); + } + + IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(new MockAnalyzer(random()))); + w.addIndexes(directory); + w.close(); + + SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); + assertNull( + "none of the segments should have been upgraded", + si.asList().stream() + .filter( // depending on the MergePolicy we might see these segments merged away + sci -> + sci.getId() != null + && sci.info.getVersion().onOrAfter(Version.fromBits(8, 6, 0)) == false) + .findAny() + .orElse(null)); + if (VERBOSE) { + System.out.println("\nTEST: done adding indices; now close"); + } + + targetDir.close(); + } + + public void testAddOldIndexesReader() throws IOException { + SegmentInfos infos = SegmentInfos.readLatestCommit(directory); + DirectoryReader reader = DirectoryReader.open(directory); + + Directory targetDir = newDirectory(); + if (infos.getCommitLuceneVersion().major != Version.LATEST.major) { + Directory targetDir2 = newDirectory(); + IndexWriter w = new IndexWriter(targetDir2, newIndexWriterConfig(new MockAnalyzer(random()))); + IllegalArgumentException e = + expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w, reader)); + assertEquals( + e.getMessage(), + "Cannot merge a segment that has been created with major version 8 into this index which has been created by major version 9"); + w.close(); + targetDir2.close(); + + // for the next test, we simulate writing to an index that was created on the same major + // version + new SegmentInfos(infos.getIndexCreatedVersionMajor()).commit(targetDir); + } + IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(new MockAnalyzer(random()))); + TestUtil.addIndexesSlowly(w, reader); + w.close(); + reader.close(); + SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); + assertNull( + "all SCIs should have an id now", + si.asList().stream().filter(sci -> sci.getId() == null).findAny().orElse(null)); + targetDir.close(); + } + + public void testFailOpenOldIndex() throws IOException { + assumeFalse("doesn't work on current index", version.major == Version.LATEST.major); + IndexCommit commit = DirectoryReader.listCommits(directory).get(0); + IndexFormatTooOldException ex = + expectThrows( + IndexFormatTooOldException.class, + () -> StandardDirectoryReader.open(commit, Version.LATEST.major, null)); + assertTrue( + ex.getMessage() + .contains("only supports reading from version " + Version.LATEST.major + " upwards.")); + // now open with allowed min version + StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR, null).close(); + } + + public void testOpenModeAndCreatedVersion() throws IOException { + Directory dir = newDirectory(directory); + int majorVersion = SegmentInfos.readLatestCommit(dir).getIndexCreatedVersionMajor(); + if (majorVersion != Version.MIN_SUPPORTED_MAJOR && majorVersion != Version.LATEST.major) { + fail( + "expected one of: [" + + Version.MIN_SUPPORTED_MAJOR + + ", " + + Version.LATEST.major + + "] but got: " + + majorVersion); + } + for (IndexWriterConfig.OpenMode openMode : IndexWriterConfig.OpenMode.values()) { + Directory tmpDir = newDirectory(dir); + IndexWriter w = new IndexWriter(tmpDir, newIndexWriterConfig().setOpenMode(openMode)); + w.commit(); + w.close(); + switch (openMode) { + case CREATE: + assertEquals( + Version.LATEST.major, + SegmentInfos.readLatestCommit(tmpDir).getIndexCreatedVersionMajor()); + break; + case APPEND: + case CREATE_OR_APPEND: + default: + assertEquals( + majorVersion, SegmentInfos.readLatestCommit(tmpDir).getIndexCreatedVersionMajor()); + } + tmpDir.close(); + } + dir.close(); + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBinaryBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBinaryBackwardsCompatibility.java new file mode 100644 index 000000000000..82594e775c78 --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBinaryBackwardsCompatibility.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.index.IndexFormatTooOldException; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.StandardDirectoryReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.store.BaseDirectoryWrapper; +import org.apache.lucene.util.Version; + +public class TestBinaryBackwardsCompatibility extends BackwardsCompatibilityTestBase { + + static final int MIN_BINARY_SUPPORTED_MAJOR = Version.MIN_SUPPORTED_MAJOR - 1; + static final String INDEX_NAME = "unsupported"; + static final String SUFFIX_CFS = "-cfs"; + static final String SUFFIX_NO_CFS = "-nocfs"; + + public TestBinaryBackwardsCompatibility(Version version, String pattern) { + super(version, pattern); + } + + @ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s") + public static Iterable testVersionsFactory() { + List params = new ArrayList<>(); + for (Version version : BINARY_SUPPORTED_VERSIONS) { + params.add(new Object[] {version, createPattern(INDEX_NAME, SUFFIX_CFS)}); + params.add(new Object[] {version, createPattern(INDEX_NAME, SUFFIX_NO_CFS)}); + } + return params; + } + + @Override + void verifyUsesDefaultCodec(Directory dir, String name) throws IOException { + // don't this will fail since the indices are not supported + } + + @Override + protected void createIndex(Directory directory) throws IOException { + fail("not supported"); + } + + @Nightly + public void testReadNMinusTwoCommit() throws IOException { + + try (BaseDirectoryWrapper dir = newDirectory(directory)) { + IndexCommit commit = DirectoryReader.listCommits(dir).get(0); + StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close(); + } + } + + @Nightly + public void testReadNMinusTwoSegmentInfos() throws IOException { + try (BaseDirectoryWrapper dir = newDirectory(directory)) { + expectThrows( + IndexFormatTooOldException.class, + () -> SegmentInfos.readLatestCommit(dir, Version.MIN_SUPPORTED_MAJOR)); + SegmentInfos.readLatestCommit(dir, MIN_BINARY_SUPPORTED_MAJOR); + } + } + + @Nightly + public void testSearchOldIndex() throws Exception { + TestBasicBackwardsCompatibility.searchIndex( + directory, indexPattern, MIN_BINARY_SUPPORTED_MAJOR, version); + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestDVUpdateBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestDVUpdateBackwardsCompatibility.java new file mode 100644 index 000000000000..332daa621ed5 --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestDVUpdateBackwardsCompatibility.java @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Version; + +public class TestDVUpdateBackwardsCompatibility extends BackwardsCompatibilityTestBase { + + static final String INDEX_NAME = "dvupdates"; + static final String SUFFIX = ""; + + public TestDVUpdateBackwardsCompatibility(Version version, String pattern) { + super(version, pattern); + } + + /** Provides the initial release of the previous major to the test-framework */ + @ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s") + public static Iterable testVersionsFactory() { + List params = new ArrayList<>(); + // TODO - WHY ONLY on the first major version? + params.add(new Object[] {Version.LUCENE_9_0_0, createPattern(INDEX_NAME, SUFFIX)}); + return params; + } + + @Override + protected void createIndex(Directory directory) throws IOException { + IndexWriterConfig conf = + new IndexWriterConfig(new MockAnalyzer(random())) + .setCodec(TestUtil.getDefaultCodec()) + .setUseCompoundFile(false) + .setMergePolicy(NoMergePolicy.INSTANCE); + IndexWriter writer = new IndexWriter(directory, conf); + // create an index w/ few doc-values fields, some with updates and some without + for (int i = 0; i < 30; i++) { + Document doc = new Document(); + doc.add(new StringField("id", "" + i, Field.Store.NO)); + doc.add(new NumericDocValuesField("ndv1", i)); + doc.add(new NumericDocValuesField("ndv1_c", i * 2)); + doc.add(new NumericDocValuesField("ndv2", i * 3)); + doc.add(new NumericDocValuesField("ndv2_c", i * 6)); + doc.add(new BinaryDocValuesField("bdv1", toBytes(i))); + doc.add(new BinaryDocValuesField("bdv1_c", toBytes(i * 2))); + doc.add(new BinaryDocValuesField("bdv2", toBytes(i * 3))); + doc.add(new BinaryDocValuesField("bdv2_c", toBytes(i * 6))); + writer.addDocument(doc); + if ((i + 1) % 10 == 0) { + writer.commit(); // flush every 10 docs + } + } + + // first segment: no updates + + // second segment: update two fields, same gen + updateNumeric(writer, "10", "ndv1", "ndv1_c", 100L); + updateBinary(writer, "11", "bdv1", "bdv1_c", 100L); + writer.commit(); + + // third segment: update few fields, different gens, few docs + updateNumeric(writer, "20", "ndv1", "ndv1_c", 100L); + updateBinary(writer, "21", "bdv1", "bdv1_c", 100L); + writer.commit(); + updateNumeric(writer, "22", "ndv1", "ndv1_c", 200L); // update the field again + writer.close(); + } + + public void testDocValuesUpdates() throws Exception { + searchDocValuesUpdatesIndex(directory); + } + + public void testDeletes() throws Exception { + IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(directory, conf); + + int maxDoc = writer.getDocStats().maxDoc; + writer.deleteDocuments(new Term("id", "1")); + if (random().nextBoolean()) { + writer.commit(); + } + + writer.forceMerge(1); + writer.commit(); + assertEquals(maxDoc - 1, writer.getDocStats().maxDoc); + + writer.close(); + } + + public void testSoftDeletes() throws Exception { + IndexWriterConfig conf = + new IndexWriterConfig(new MockAnalyzer(random())).setSoftDeletesField("__soft_delete"); + IndexWriter writer = new IndexWriter(directory, conf); + int maxDoc = writer.getDocStats().maxDoc; + writer.updateDocValues(new Term("id", "1"), new NumericDocValuesField("__soft_delete", 1)); + + if (random().nextBoolean()) { + writer.commit(); + } + writer.forceMerge(1); + writer.commit(); + assertEquals(maxDoc - 1, writer.getDocStats().maxDoc); + writer.close(); + } + + public void testDocValuesUpdatesWithNewField() throws Exception { + // update fields and verify index + IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(directory, conf); + // introduce a new field that we later update + writer.addDocument( + Arrays.asList( + new StringField("id", "" + Integer.MAX_VALUE, Field.Store.NO), + new NumericDocValuesField("new_numeric", 1), + new BinaryDocValuesField("new_binary", toBytes(1)))); + writer.updateNumericDocValue(new Term("id", "1"), "new_numeric", 1); + writer.updateBinaryDocValue(new Term("id", "1"), "new_binary", toBytes(1)); + + writer.commit(); + Runnable assertDV = + () -> { + boolean found = false; + try (DirectoryReader reader = DirectoryReader.open(directory)) { + for (LeafReaderContext ctx : reader.leaves()) { + LeafReader leafReader = ctx.reader(); + TermsEnum id = leafReader.terms("id").iterator(); + if (id.seekExact(new BytesRef("1"))) { + PostingsEnum postings = id.postings(null, PostingsEnum.NONE); + NumericDocValues numericDocValues = leafReader.getNumericDocValues("new_numeric"); + BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("new_binary"); + int doc; + while ((doc = postings.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + found = true; + assertTrue(binaryDocValues.advanceExact(doc)); + assertTrue(numericDocValues.advanceExact(doc)); + assertEquals(1, numericDocValues.longValue()); + assertEquals(toBytes(1), binaryDocValues.binaryValue()); + } + } + } + } catch (IOException e) { + throw new AssertionError(e); + } + assertTrue(found); + }; + assertDV.run(); + // merge all segments + writer.forceMerge(1); + writer.commit(); + assertDV.run(); + writer.close(); + } + + private void assertNumericDocValues(LeafReader r, String f, String cf) throws IOException { + NumericDocValues ndvf = r.getNumericDocValues(f); + NumericDocValues ndvcf = r.getNumericDocValues(cf); + for (int i = 0; i < r.maxDoc(); i++) { + assertEquals(i, ndvcf.nextDoc()); + assertEquals(i, ndvf.nextDoc()); + assertEquals(ndvcf.longValue(), ndvf.longValue() * 2); + } + } + + private void assertBinaryDocValues(LeafReader r, String f, String cf) throws IOException { + BinaryDocValues bdvf = r.getBinaryDocValues(f); + BinaryDocValues bdvcf = r.getBinaryDocValues(cf); + for (int i = 0; i < r.maxDoc(); i++) { + assertEquals(i, bdvf.nextDoc()); + assertEquals(i, bdvcf.nextDoc()); + assertEquals(getValue(bdvcf), getValue(bdvf) * 2); + } + } + + static long getValue(BinaryDocValues bdv) throws IOException { + BytesRef term = bdv.binaryValue(); + int idx = term.offset; + byte b = term.bytes[idx++]; + long value = b & 0x7FL; + for (int shift = 7; (b & 0x80L) != 0; shift += 7) { + b = term.bytes[idx++]; + value |= (b & 0x7FL) << shift; + } + return value; + } + + private void verifyDocValues(Directory dir) throws IOException { + DirectoryReader reader = DirectoryReader.open(dir); + for (LeafReaderContext context : reader.leaves()) { + LeafReader r = context.reader(); + assertNumericDocValues(r, "ndv1", "ndv1_c"); + assertNumericDocValues(r, "ndv2", "ndv2_c"); + assertBinaryDocValues(r, "bdv1", "bdv1_c"); + assertBinaryDocValues(r, "bdv2", "bdv2_c"); + } + reader.close(); + } + + private void searchDocValuesUpdatesIndex(Directory dir) throws IOException { + verifyUsesDefaultCodec(dir, indexName(version)); + verifyDocValues(dir); + + // update fields and verify index + IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + updateNumeric(writer, "1", "ndv1", "ndv1_c", 300L); + updateNumeric(writer, "1", "ndv2", "ndv2_c", 300L); + updateBinary(writer, "1", "bdv1", "bdv1_c", 300L); + updateBinary(writer, "1", "bdv2", "bdv2_c", 300L); + + writer.commit(); + verifyDocValues(dir); + + // merge all segments + writer.forceMerge(1); + writer.commit(); + verifyDocValues(dir); + + writer.close(); + } + + private void updateNumeric(IndexWriter writer, String id, String f, String cf, long value) + throws IOException { + writer.updateNumericDocValue(new Term("id", id), f, value); + writer.updateNumericDocValue(new Term("id", id), cf, value * 2); + } + + private void updateBinary(IndexWriter writer, String id, String f, String cf, long value) + throws IOException { + writer.updateBinaryDocValue(new Term("id", id), f, toBytes(value)); + writer.updateBinaryDocValue(new Term("id", id), cf, toBytes(value * 2)); + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestEmptyIndexBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestEmptyIndexBackwardsCompatibility.java new file mode 100644 index 000000000000..40fcd4c59bf9 --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestEmptyIndexBackwardsCompatibility.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.Version; + +public class TestEmptyIndexBackwardsCompatibility extends BackwardsCompatibilityTestBase { + static final String INDEX_NAME = "empty"; + static final String SUFFIX = ""; + + public TestEmptyIndexBackwardsCompatibility(Version version, String pattern) { + super(version, pattern); + } + + @Override + protected void createIndex(Directory directory) throws IOException { + IndexWriterConfig conf = + new IndexWriterConfig(new MockAnalyzer(random())) + .setUseCompoundFile(false) + .setCodec(TestUtil.getDefaultCodec()) + .setMergePolicy(NoMergePolicy.INSTANCE); + try (IndexWriter writer = new IndexWriter(directory, conf)) { + writer.flush(); + } + } + + /** Provides the initial release of the previous major to the test-framework */ + @ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s") + public static Iterable testVersionsFactory() { + List params = new ArrayList<>(); + // TODO - WHY ONLY on the first major version? + params.add(new Object[] {Version.LUCENE_9_0_0, createPattern(INDEX_NAME, SUFFIX)}); + return params; + } + + public void testUpgradeEmptyOldIndex() throws Exception { + try (Directory dir = newDirectory(directory)) { + TestIndexUpgradeBackwardsCompatibility.newIndexUpgrader(dir).upgrade(); + TestIndexUpgradeBackwardsCompatibility.checkAllSegmentsUpgraded(dir, 9); + } + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java new file mode 100644 index 000000000000..0cd9f37d5c32 --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import static org.apache.lucene.backward_index.BackwardsCompatibilityTestBase.createPattern; + +import java.io.IOException; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.util.Version; + +public class TestGenerateBwcIndices extends LuceneTestCase { + + // Backcompat index generation, described below, is mostly automated in: + // + // dev-tools/scripts/addBackcompatIndexes.py + // + // For usage information, see: + // + // http://wiki.apache.org/lucene-java/ReleaseTodo#Generate_Backcompat_Indexes + // + // ----- + // + // To generate backcompat indexes with the current default codec, run the following gradle + // command: + // gradlew test -Ptests.bwcdir=/path/to/store/indexes -Ptests.codec=default + // -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices + // + // Also add testmethod with one of the index creation methods below, for example: + // -Ptestmethod=testCreateCFS + // + // Zip up the generated indexes: + // + // cd /path/to/store/indexes/index.cfs ; zip index.-cfs.zip * + // cd /path/to/store/indexes/index.nocfs ; zip index.-nocfs.zip * + // + // Then move those 2 zip files to your trunk checkout and add them + // to the oldNames array. + + public void testCreateCFS() throws IOException { + TestBasicBackwardsCompatibility basicTest = + new TestBasicBackwardsCompatibility( + Version.LATEST, + createPattern( + TestBasicBackwardsCompatibility.INDEX_NAME, + TestBasicBackwardsCompatibility.SUFFIX_CFS)); + basicTest.createBWCIndex(); + } + + public void testCreateNoCFS() throws IOException { + TestBasicBackwardsCompatibility basicTest = + new TestBasicBackwardsCompatibility( + Version.LATEST, + createPattern( + TestBasicBackwardsCompatibility.INDEX_NAME, + TestBasicBackwardsCompatibility.SUFFIX_NO_CFS)); + basicTest.createBWCIndex(); + } + + public void testCreateSortedIndex() throws IOException { + TestIndexSortBackwardsCompatibility sortedTest = + new TestIndexSortBackwardsCompatibility( + Version.LATEST, + createPattern( + TestIndexSortBackwardsCompatibility.INDEX_NAME, + TestIndexSortBackwardsCompatibility.SUFFIX)); + sortedTest.createBWCIndex(); + } + + private boolean isInitialMajorVersionRelease() { + return Version.LATEST.equals(Version.fromBits(Version.LATEST.major, 0, 0)); + } + + public void testCreateMoreTermsIndex() throws IOException { + if (isInitialMajorVersionRelease()) { + // TODO - WHY ONLY on the first major version? + TestMoreTermsBackwardsCompatibility moreTermsTest = + new TestMoreTermsBackwardsCompatibility( + Version.LATEST, + createPattern( + TestMoreTermsBackwardsCompatibility.INDEX_NAME, + TestMoreTermsBackwardsCompatibility.SUFFIX)); + moreTermsTest.createBWCIndex(); + } + } + + public void testCreateIndexWithDocValuesUpdates() throws IOException { + if (isInitialMajorVersionRelease()) { + // TODO - WHY ONLY on the first major version? + TestDVUpdateBackwardsCompatibility dvUpdatesTest = + new TestDVUpdateBackwardsCompatibility( + Version.LATEST, + createPattern( + TestDVUpdateBackwardsCompatibility.INDEX_NAME, + TestDVUpdateBackwardsCompatibility.SUFFIX)); + dvUpdatesTest.createBWCIndex(); + } + } + + public void testCreateEmptyIndex() throws IOException { + if (isInitialMajorVersionRelease()) { + // TODO - WHY ONLY on the first major version? + TestEmptyIndexBackwardsCompatibility emptyIndex = + new TestEmptyIndexBackwardsCompatibility( + Version.LATEST, + createPattern( + TestEmptyIndexBackwardsCompatibility.INDEX_NAME, + TestEmptyIndexBackwardsCompatibility.SUFFIX)); + emptyIndex.createBWCIndex(); + } + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java new file mode 100644 index 000000000000..4c33aee65e08 --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import java.io.IOException; +import java.text.ParsePosition; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Date; +import java.util.Locale; +import java.util.Random; +import java.util.TimeZone; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LogByteSizeMergePolicy; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.util.LineFileDocs; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Version; + +public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityTestBase { + + static final String INDEX_NAME = "sorted"; + static final String SUFFIX = ""; + + public TestIndexSortBackwardsCompatibility(Version version, String pattern) { + super(version, pattern); + } + + /** Provides all sorted versions to the test-framework */ + @ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s") + public static Iterable testVersionsFactory() throws IllegalAccessException { + return allVersion(INDEX_NAME, SUFFIX); + } + + public void testSortedIndexAddDocBlocks() throws Exception { + final Sort sort; + try (DirectoryReader reader = DirectoryReader.open(directory)) { + assertEquals(1, reader.leaves().size()); + sort = reader.leaves().get(0).reader().getMetaData().getSort(); + assertNotNull(sort); + searchExampleIndex(reader); + } + IndexWriterConfig indexWriterConfig = + newIndexWriterConfig(new MockAnalyzer(random())) + .setOpenMode(IndexWriterConfig.OpenMode.APPEND) + .setIndexSort(sort) + .setMergePolicy(newLogMergePolicy()); + // open writer + try (IndexWriter writer = new IndexWriter(directory, indexWriterConfig)) { + // add 10 docs + for (int i = 0; i < 10; i++) { + Document child = new Document(); + child.add(new StringField("relation", "child", Field.Store.NO)); + child.add(new StringField("bid", "" + i, Field.Store.NO)); + child.add(new NumericDocValuesField("dateDV", i)); + Document parent = new Document(); + parent.add(new StringField("relation", "parent", Field.Store.NO)); + parent.add(new StringField("bid", "" + i, Field.Store.NO)); + parent.add(new NumericDocValuesField("dateDV", i)); + writer.addDocuments(Arrays.asList(child, child, parent)); + if (random().nextBoolean()) { + writer.flush(); + } + } + if (random().nextBoolean()) { + writer.forceMerge(1); + } + writer.commit(); + try (IndexReader reader = DirectoryReader.open(directory)) { + IndexSearcher searcher = new IndexSearcher(reader); + for (int i = 0; i < 10; i++) { + TopDocs children = + searcher.search( + new BooleanQuery.Builder() + .add(new TermQuery(new Term("relation", "child")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST) + .build(), + 2); + TopDocs parents = + searcher.search( + new BooleanQuery.Builder() + .add(new TermQuery(new Term("relation", "parent")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST) + .build(), + 2); + assertEquals(2, children.totalHits.value); + assertEquals(1, parents.totalHits.value); + // make sure it's sorted + assertEquals(children.scoreDocs[0].doc + 1, children.scoreDocs[1].doc); + assertEquals(children.scoreDocs[1].doc + 1, parents.scoreDocs[0].doc); + } + } + } + // This will confirm the docs are really sorted + TestUtil.checkIndex(directory); + } + + public void testSortedIndex() throws Exception { + try (DirectoryReader reader = DirectoryReader.open(directory)) { + assertEquals(1, reader.leaves().size()); + Sort sort = reader.leaves().get(0).reader().getMetaData().getSort(); + assertNotNull(sort); + assertEquals("!", sort.toString()); + // This will confirm the docs are really sorted + TestUtil.checkIndex(directory); + searchExampleIndex(reader); + } + } + + @Override + protected void createIndex(Directory directory) throws IOException { + LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); + mp.setNoCFSRatio(1.0); + mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + + // TODO: remove randomness + IndexWriterConfig conf = new IndexWriterConfig(analyzer); + conf.setMergePolicy(mp); + conf.setUseCompoundFile(false); + conf.setCodec(TestUtil.getDefaultCodec()); + conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true))); + IndexWriter writer = new IndexWriter(directory, conf); + LineFileDocs docs = new LineFileDocs(new Random(0)); + SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT); + parser.setTimeZone(TimeZone.getTimeZone("UTC")); + ParsePosition position = new ParsePosition(0); + for (int i = 0; i < 50; i++) { + Document doc = TestUtil.cloneDocument(docs.nextDoc()); + String dateString = doc.get("date"); + position.setIndex(0); + Date date = parser.parse(dateString, position); + if (position.getErrorIndex() != -1) { + throw new AssertionError("failed to parse \"" + dateString + "\" as date"); + } + if (position.getIndex() != dateString.length()) { + throw new AssertionError("failed to parse \"" + dateString + "\" as date"); + } + doc.add( + new NumericDocValuesField( + "docid_intDV", doc.getField("docid_int").numericValue().longValue())); + doc.add( + new SortedDocValuesField("titleDV", new BytesRef(doc.getField("title").stringValue()))); + doc.add(new NumericDocValuesField("dateDV", date.getTime())); + if (i % 10 == 0) { // commit every 10 documents + writer.commit(); + } + writer.addDocument(doc); + } + writer.forceMerge(1); + writer.close(); + + try (DirectoryReader reader = DirectoryReader.open(directory)) { + searchExampleIndex(reader); // make sure we can search it + } + } + + public static void searchExampleIndex(DirectoryReader reader) throws IOException { + IndexSearcher searcher = newSearcher(reader); + + TopDocs topDocs = searcher.search(new FieldExistsQuery("titleTokenized"), 10); + assertEquals(50, topDocs.totalHits.value); + + topDocs = searcher.search(new FieldExistsQuery("titleDV"), 10); + assertEquals(50, topDocs.totalHits.value); + + topDocs = searcher.search(new TermQuery(new Term("body", "ja")), 10); + assertTrue(topDocs.totalHits.value > 0); + + topDocs = + searcher.search( + IntPoint.newRangeQuery("docid_int", 42, 44), + 10, + new Sort(new SortField("docid_intDV", SortField.Type.INT))); + assertEquals(3, topDocs.totalHits.value); + assertEquals(3, topDocs.scoreDocs.length); + assertEquals(42, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]); + assertEquals(43, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]); + assertEquals(44, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]); + + topDocs = searcher.search(new TermQuery(new Term("body", "the")), 5); + assertTrue(topDocs.totalHits.value > 0); + topDocs = + searcher.search( + new MatchAllDocsQuery(), 5, new Sort(new SortField("dateDV", SortField.Type.LONG))); + assertEquals(50, topDocs.totalHits.value); + assertEquals(5, topDocs.scoreDocs.length); + long firstDate = (Long) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]; + long lastDate = (Long) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]; + assertTrue(firstDate <= lastDate); + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexUpgradeBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexUpgradeBackwardsCompatibility.java new file mode 100644 index 000000000000..10099a5b5b59 --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexUpgradeBackwardsCompatibility.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexUpgrader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.Version; + +public class TestIndexUpgradeBackwardsCompatibility extends BackwardsCompatibilityTestBase { + + /** + * A parameter constructor for {@link com.carrotsearch.randomizedtesting.RandomizedRunner}. See + * {@link #testVersionsFactory()} for details on the values provided to the framework. + */ + public TestIndexUpgradeBackwardsCompatibility(Version version, String pattern) { + super(version, pattern); + } + + /** Provides all current version to the test-framework for each of the index suffixes. */ + @ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s") + public static Iterable testVersionsFactory() throws IllegalAccessException { + Iterable allSupportedVersions = + allVersion( + TestBasicBackwardsCompatibility.INDEX_NAME, + TestBasicBackwardsCompatibility.SUFFIX_CFS, + TestBasicBackwardsCompatibility.SUFFIX_NO_CFS); + return allSupportedVersions; + } + + /** Randomizes the use of some of the constructor variations */ + static IndexUpgrader newIndexUpgrader(Directory dir) { + final boolean streamType = random().nextBoolean(); + final int choice = TestUtil.nextInt(random(), 0, 2); + switch (choice) { + case 0: + return new IndexUpgrader(dir); + case 1: + return new IndexUpgrader(dir, streamType ? null : InfoStream.NO_OUTPUT, false); + case 2: + return new IndexUpgrader(dir, newIndexWriterConfig(null), false); + default: + fail("case statement didn't get updated when random bounds changed"); + } + return null; // never get here + } + + public void testUpgradeOldIndex() throws Exception { + int indexCreatedVersion = + SegmentInfos.readLatestCommit(directory).getIndexCreatedVersionMajor(); + newIndexUpgrader(directory).upgrade(); + checkAllSegmentsUpgraded(directory, indexCreatedVersion); + } + + @Override + protected void createIndex(Directory directory) throws IOException { + if (indexPattern.equals( + createPattern( + TestBasicBackwardsCompatibility.INDEX_NAME, + TestBasicBackwardsCompatibility.SUFFIX_CFS))) { + TestBasicBackwardsCompatibility.createIndex(directory, true, false); + } else { + TestBasicBackwardsCompatibility.createIndex(directory, false, false); + } + } + + public void testUpgradeOldSingleSegmentIndexWithAdditions() throws Exception { + // TODO we use to have single segment indices but we stopped creating them at some point + // either delete the test or recreate the indices + assumeTrue("Original index must be single segment", 1 == getNumberOfSegments(directory)); + int indexCreatedVersion = + SegmentInfos.readLatestCommit(directory).getIndexCreatedVersionMajor(); + + // create a bunch of dummy segments + int id = 40; + Directory ramDir = new ByteBuffersDirectory(); + for (int i = 0; i < 3; i++) { + // only use Log- or TieredMergePolicy, to make document addition predictable and not + // suddenly merge: + MergePolicy mp = random().nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mp); + IndexWriter w = new IndexWriter(ramDir, iwc); + // add few more docs: + for (int j = 0; j < RANDOM_MULTIPLIER * random().nextInt(30); j++) { + TestBasicBackwardsCompatibility.addDoc(w, id++); + } + try { + w.commit(); + } finally { + w.close(); + } + } + + // add dummy segments (which are all in current + // version) to single segment index + MergePolicy mp = random().nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); + IndexWriterConfig iwc = new IndexWriterConfig(null).setMergePolicy(mp); + IndexWriter w = new IndexWriter(directory, iwc); + w.addIndexes(ramDir); + try (w) { + w.commit(); + } + + // determine count of segments in modified index + final int origSegCount = getNumberOfSegments(directory); + + // ensure there is only one commit + assertEquals(1, DirectoryReader.listCommits(directory).size()); + newIndexUpgrader(directory).upgrade(); + + final int segCount = checkAllSegmentsUpgraded(directory, indexCreatedVersion); + assertEquals( + "Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged", + origSegCount, + segCount); + } + + // LUCENE-5907 + public void testUpgradeWithNRTReader() throws Exception { + IndexWriter writer = + new IndexWriter( + directory, + newIndexWriterConfig(new MockAnalyzer(random())) + .setOpenMode(IndexWriterConfig.OpenMode.APPEND)); + writer.addDocument(new Document()); + DirectoryReader r = DirectoryReader.open(writer); + writer.commit(); + r.close(); + writer.forceMerge(1); + writer.commit(); + writer.rollback(); + SegmentInfos.readLatestCommit(directory); + } + + // LUCENE-5907 + public void testUpgradeThenMultipleCommits() throws Exception { + IndexWriter writer = + new IndexWriter( + directory, + newIndexWriterConfig(new MockAnalyzer(random())) + .setOpenMode(IndexWriterConfig.OpenMode.APPEND)); + writer.addDocument(new Document()); + writer.commit(); + writer.addDocument(new Document()); + writer.commit(); + writer.close(); + } + + public void testIndexUpgraderCommandLineArgs() throws Exception { + PrintStream savedSystemOut = System.out; + System.setOut(new PrintStream(new ByteArrayOutputStream(), false, UTF_8)); + try { + String name = indexName(this.version); + Directory origDir = directory; + int indexCreatedVersion = + SegmentInfos.readLatestCommit(origDir).getIndexCreatedVersionMajor(); + Path dir = createTempDir(name); + try (FSDirectory fsDir = FSDirectory.open(dir)) { + // beware that ExtraFS might add extraXXX files + Set extraFiles = Set.of(fsDir.listAll()); + for (String file : origDir.listAll()) { + if (extraFiles.contains(file) == false) { + fsDir.copyFrom(origDir, file, file, IOContext.DEFAULT); + } + } + } + + String path = dir.toAbsolutePath().toString(); + + List args = new ArrayList<>(); + if (random().nextBoolean()) { + args.add("-verbose"); + } + if (random().nextBoolean()) { + args.add("-delete-prior-commits"); + } + if (random().nextBoolean()) { + // TODO: need to better randomize this, but ... + // - LuceneTestCase.FS_DIRECTORIES is private + // - newFSDirectory returns BaseDirectoryWrapper + // - BaseDirectoryWrapper doesn't expose delegate + Class dirImpl = NIOFSDirectory.class; + + args.add("-dir-impl"); + args.add(dirImpl.getName()); + } + args.add(path); + + IndexUpgrader.main(args.toArray(new String[0])); + + try (Directory upgradedDir = newFSDirectory(dir)) { + checkAllSegmentsUpgraded(upgradedDir, indexCreatedVersion); + } + + } finally { + System.setOut(savedSystemOut); + } + } + + static int checkAllSegmentsUpgraded(Directory dir, int indexCreatedVersion) throws IOException { + final SegmentInfos infos = SegmentInfos.readLatestCommit(dir); + if (VERBOSE) { + System.out.println("checkAllSegmentsUpgraded: " + infos); + } + for (SegmentCommitInfo si : infos) { + assertEquals(Version.LATEST, si.info.getVersion()); + assertNotNull(si.getId()); + } + assertEquals(Version.LATEST, infos.getCommitLuceneVersion()); + assertEquals(indexCreatedVersion, infos.getIndexCreatedVersionMajor()); + return infos.size(); + } + + static int getNumberOfSegments(Directory dir) throws IOException { + final SegmentInfos infos = SegmentInfos.readLatestCommit(dir); + return infos.size(); + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestMoreTermsBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestMoreTermsBackwardsCompatibility.java new file mode 100644 index 000000000000..6bacb49dd652 --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestMoreTermsBackwardsCompatibility.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LogByteSizeMergePolicy; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.util.LineFileDocs; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Version; + +public class TestMoreTermsBackwardsCompatibility extends BackwardsCompatibilityTestBase { + + static final String INDEX_NAME = "moreterms"; + + static final String SUFFIX = ""; + + public TestMoreTermsBackwardsCompatibility(Version version, String pattern) { + super(version, pattern); + } + + @ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s") + public static Iterable testVersionsFactory() { + List params = new ArrayList<>(); + params.add(new Object[] {Version.LUCENE_9_0_0, createPattern(INDEX_NAME, SUFFIX)}); + return params; + } + + @Override + protected void createIndex(Directory directory) throws IOException { + LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); + mp.setNoCFSRatio(1.0); + mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + + IndexWriterConfig conf = + new IndexWriterConfig(analyzer) + .setMergePolicy(mp) + .setCodec(TestUtil.getDefaultCodec()) + .setUseCompoundFile(false); + IndexWriter writer = new IndexWriter(directory, conf); + LineFileDocs docs = new LineFileDocs(new Random(0)); + for (int i = 0; i < 50; i++) { + Document doc = TestUtil.cloneDocument(docs.nextDoc()); + doc.add( + new NumericDocValuesField( + "docid_intDV", doc.getField("docid_int").numericValue().longValue())); + doc.add( + new SortedDocValuesField("titleDV", new BytesRef(doc.getField("title").stringValue()))); + writer.addDocument(doc); + if (i % 10 == 0) { // commit every 10 documents + writer.commit(); + } + } + docs.close(); + writer.close(); + try (DirectoryReader reader = DirectoryReader.open(directory)) { + TestIndexSortBackwardsCompatibility.searchExampleIndex(reader); // make sure we can search it + } + } + + public void testMoreTerms() throws Exception { + try (DirectoryReader reader = DirectoryReader.open(directory)) { + + TestUtil.checkIndex(directory); + TestIndexSortBackwardsCompatibility.searchExampleIndex(reader); + } + } +}