From 8e99dd4374dd9ba67d9a633e5f6719713585f7b4 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Thu, 17 Mar 2022 09:21:49 -0700 Subject: [PATCH] Add back in older codecs for bwc Signed-off-by: John Mazanec --- .../index/codec/KNN80Codec/KNN80Codec.java | 115 ++++++++++++++++ .../index/codec/KNN84Codec/KNN84Codec.java | 119 ++++++++++++++++ .../index/codec/KNN86Codec/KNN86Codec.java | 129 ++++++++++++++++++ .../services/org.apache.lucene.codecs.Codec | 3 + 4 files changed, 366 insertions(+) create mode 100644 src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80Codec.java create mode 100644 src/main/java/org/opensearch/knn/index/codec/KNN84Codec/KNN84Codec.java create mode 100644 src/main/java/org/opensearch/knn/index/codec/KNN86Codec/KNN86Codec.java diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80Codec.java b/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80Codec.java new file mode 100644 index 0000000000..59655762e4 --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80Codec.java @@ -0,0 +1,115 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.KNN80Codec; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.TermVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; + +/** + * Extends the Codec to support a new file format for KNN index + * based on the mappings. + * + */ +public final class KNN80Codec extends Codec { + + private static final Logger logger = LogManager.getLogger(KNN80Codec.class); + private final DocValuesFormat docValuesFormat; + private final DocValuesFormat perFieldDocValuesFormat; + private final CompoundFormat compoundFormat; + private Codec lucene80Codec; + + public static final String KNN_80 = "KNN80Codec"; + public static final String LUCENE_80 = "Lucene80"; // Lucene Codec to be used + + public KNN80Codec() { + super(KNN_80); + this.docValuesFormat = new KNN80DocValuesFormat(); + this.perFieldDocValuesFormat = new PerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return docValuesFormat; + } + }; + this.compoundFormat = new KNN80CompoundFormat(); + } + + /* + * This function returns the Lucene80 Codec. + */ + public Codec getDelegatee() { + if (lucene80Codec == null) + lucene80Codec = Codec.forName(LUCENE_80); + return lucene80Codec; + } + + @Override + public DocValuesFormat docValuesFormat() { + return this.perFieldDocValuesFormat; + } + + /* + * For all the below functions, we could have extended FilterCodec, but this brings + * SPI related issues while loading Codec in the tests. So fall back to traditional + * approach of manually overriding. + */ + + @Override + public PostingsFormat postingsFormat() { + return getDelegatee().postingsFormat(); + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return getDelegatee().storedFieldsFormat(); + } + + @Override + public TermVectorsFormat termVectorsFormat() { + return getDelegatee().termVectorsFormat(); + } + + @Override + public FieldInfosFormat fieldInfosFormat() { + return getDelegatee().fieldInfosFormat(); + } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + return getDelegatee().segmentInfoFormat(); + } + + @Override + public NormsFormat normsFormat() { + return getDelegatee().normsFormat(); + } + + @Override + public LiveDocsFormat liveDocsFormat() { + return getDelegatee().liveDocsFormat(); + } + + @Override + public CompoundFormat compoundFormat() { + return this.compoundFormat; + } + + @Override + public PointsFormat pointsFormat() { + return getDelegatee().pointsFormat(); + } +} diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN84Codec/KNN84Codec.java b/src/main/java/org/opensearch/knn/index/codec/KNN84Codec/KNN84Codec.java new file mode 100644 index 0000000000..a50f396a48 --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/codec/KNN84Codec/KNN84Codec.java @@ -0,0 +1,119 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.KNN84Codec; + +import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat; +import org.opensearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.TermVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; + +/** + * Extends the Codec to support a new file format for KNN index + * based on the mappings. + * + */ +public final class KNN84Codec extends Codec { + + private static final Logger logger = LogManager.getLogger(KNN84Codec.class); + private final DocValuesFormat docValuesFormat; + private final DocValuesFormat perFieldDocValuesFormat; + private final CompoundFormat compoundFormat; + private Codec lucene84Codec; + + public static final String KNN_84 = "KNN84Codec"; + public static final String LUCENE_84 = "Lucene84"; // Lucene Codec to be used + + public KNN84Codec() { + super(KNN_84); + // Note that DocValuesFormat can use old Codec's DocValuesFormat. For instance Lucene84 uses Lucene80 + // DocValuesFormat. Refer to defaultDVFormat in LuceneXXCodec.java to find out which version it uses + this.docValuesFormat = new KNN80DocValuesFormat(); + this.perFieldDocValuesFormat = new PerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return docValuesFormat; + } + }; + this.compoundFormat = new KNN80CompoundFormat(); + } + + /* + * This function returns the Lucene84 Codec. + */ + public Codec getDelegatee() { + if (lucene84Codec == null) + lucene84Codec = Codec.forName(LUCENE_84); + return lucene84Codec; + } + + @Override + public DocValuesFormat docValuesFormat() { + return this.perFieldDocValuesFormat; + } + + /* + * For all the below functions, we could have extended FilterCodec, but this brings + * SPI related issues while loading Codec in the tests. So fall back to traditional + * approach of manually overriding. + */ + + @Override + public PostingsFormat postingsFormat() { + return getDelegatee().postingsFormat(); + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return getDelegatee().storedFieldsFormat(); + } + + @Override + public TermVectorsFormat termVectorsFormat() { + return getDelegatee().termVectorsFormat(); + } + + @Override + public FieldInfosFormat fieldInfosFormat() { + return getDelegatee().fieldInfosFormat(); + } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + return getDelegatee().segmentInfoFormat(); + } + + @Override + public NormsFormat normsFormat() { + return getDelegatee().normsFormat(); + } + + @Override + public LiveDocsFormat liveDocsFormat() { + return getDelegatee().liveDocsFormat(); + } + + @Override + public CompoundFormat compoundFormat() { + return this.compoundFormat; + } + + @Override + public PointsFormat pointsFormat() { + return getDelegatee().pointsFormat(); + } +} diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN86Codec/KNN86Codec.java b/src/main/java/org/opensearch/knn/index/codec/KNN86Codec/KNN86Codec.java new file mode 100644 index 0000000000..c5cefaf164 --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/codec/KNN86Codec/KNN86Codec.java @@ -0,0 +1,129 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.KNN86Codec; + +import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat; +import org.opensearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.TermVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; + +/** + * Extends the Codec to support a new file format for KNN index + * based on the mappings. + * + */ +public final class KNN86Codec extends Codec { + + private static final Logger logger = LogManager.getLogger(KNN86Codec.class); + private final DocValuesFormat docValuesFormat; + private final DocValuesFormat perFieldDocValuesFormat; + private final CompoundFormat compoundFormat; + private Codec lucene86Codec; + private PostingsFormat postingsFormat = null; + + public static final String KNN_86 = "KNN86Codec"; + public static final String LUCENE_86 = "Lucene86"; // Lucene Codec to be used + + public KNN86Codec() { + super(KNN_86); + // Note that DocValuesFormat can use old Codec's DocValuesFormat. For instance Lucene84 uses Lucene80 + // DocValuesFormat. Refer to defaultDVFormat in LuceneXXCodec.java to find out which version it uses + this.docValuesFormat = new KNN80DocValuesFormat(); + this.perFieldDocValuesFormat = new PerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return docValuesFormat; + } + }; + this.compoundFormat = new KNN80CompoundFormat(); + } + + /* + * This function returns the Lucene84 Codec. + */ + public Codec getDelegatee() { + if (lucene86Codec == null) + lucene86Codec = Codec.forName(LUCENE_86); + return lucene86Codec; + } + + @Override + public DocValuesFormat docValuesFormat() { + return this.perFieldDocValuesFormat; + } + + /* + * For all the below functions, we could have extended FilterCodec, but this brings + * SPI related issues while loading Codec in the tests. So fall back to traditional + * approach of manually overriding. + */ + + + public void setPostingsFormat(PostingsFormat postingsFormat) { + this.postingsFormat = postingsFormat; + } + + @Override + public PostingsFormat postingsFormat() { + if (this.postingsFormat == null) { + return getDelegatee().postingsFormat(); + } + return this.postingsFormat; + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return getDelegatee().storedFieldsFormat(); + } + + @Override + public TermVectorsFormat termVectorsFormat() { + return getDelegatee().termVectorsFormat(); + } + + @Override + public FieldInfosFormat fieldInfosFormat() { + return getDelegatee().fieldInfosFormat(); + } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + return getDelegatee().segmentInfoFormat(); + } + + @Override + public NormsFormat normsFormat() { + return getDelegatee().normsFormat(); + } + + @Override + public LiveDocsFormat liveDocsFormat() { + return getDelegatee().liveDocsFormat(); + } + + @Override + public CompoundFormat compoundFormat() { + return this.compoundFormat; + } + + @Override + public PointsFormat pointsFormat() { + return getDelegatee().pointsFormat(); + } +} diff --git a/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec index f4cac59cbf..8e64afa086 100644 --- a/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -1 +1,4 @@ +org.opensearch.knn.index.codec.KNN80Codec.KNN80Codec +org.opensearch.knn.index.codec.KNN84Codec.KNN84Codec +org.opensearch.knn.index.codec.KNN86Codec.KNN86Codec org.opensearch.knn.index.codec.KNN87Codec.KNN87Codec \ No newline at end of file