-
Notifications
You must be signed in to change notification settings - Fork 127
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: John Mazanec <[email protected]>
- Loading branch information
1 parent
c5ba1dc
commit 11085ec
Showing
4 changed files
with
383 additions
and
0 deletions.
There are no files selected for viewing
121 changes: 121 additions & 0 deletions
121
src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80Codec.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.codec.KNN80Codec; | ||
|
||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.apache.lucene.codecs.Codec; | ||
import org.apache.lucene.codecs.CompoundFormat; | ||
import org.apache.lucene.codecs.DocValuesFormat; | ||
import org.apache.lucene.codecs.FieldInfosFormat; | ||
import org.apache.lucene.codecs.KnnVectorsFormat; | ||
import org.apache.lucene.codecs.LiveDocsFormat; | ||
import org.apache.lucene.codecs.NormsFormat; | ||
import org.apache.lucene.codecs.PointsFormat; | ||
import org.apache.lucene.codecs.PostingsFormat; | ||
import org.apache.lucene.codecs.SegmentInfoFormat; | ||
import org.apache.lucene.codecs.StoredFieldsFormat; | ||
import org.apache.lucene.codecs.TermVectorsFormat; | ||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; | ||
|
||
/** | ||
* Extends the Codec to support a new file format for KNN index | ||
* based on the mappings. | ||
* | ||
*/ | ||
public final class KNN80Codec extends Codec { | ||
|
||
private static final Logger logger = LogManager.getLogger(KNN80Codec.class); | ||
private final DocValuesFormat docValuesFormat; | ||
private final DocValuesFormat perFieldDocValuesFormat; | ||
private final CompoundFormat compoundFormat; | ||
private Codec lucene80Codec; | ||
|
||
public static final String KNN_80 = "KNN80Codec"; | ||
public static final String LUCENE_80 = "Lucene80"; // Lucene Codec to be used | ||
|
||
public KNN80Codec() { | ||
super(KNN_80); | ||
this.docValuesFormat = new KNN80DocValuesFormat(); | ||
this.perFieldDocValuesFormat = new PerFieldDocValuesFormat() { | ||
@Override | ||
public DocValuesFormat getDocValuesFormatForField(String field) { | ||
return docValuesFormat; | ||
} | ||
}; | ||
this.compoundFormat = new KNN80CompoundFormat(); | ||
} | ||
|
||
/* | ||
* This function returns the Lucene80 Codec. | ||
*/ | ||
public Codec getDelegatee() { | ||
if (lucene80Codec == null) | ||
lucene80Codec = Codec.forName(LUCENE_80); | ||
return lucene80Codec; | ||
} | ||
|
||
@Override | ||
public DocValuesFormat docValuesFormat() { | ||
return this.perFieldDocValuesFormat; | ||
} | ||
|
||
/* | ||
* For all the below functions, we could have extended FilterCodec, but this brings | ||
* SPI related issues while loading Codec in the tests. So fall back to traditional | ||
* approach of manually overriding. | ||
*/ | ||
|
||
@Override | ||
public PostingsFormat postingsFormat() { | ||
return getDelegatee().postingsFormat(); | ||
} | ||
|
||
@Override | ||
public StoredFieldsFormat storedFieldsFormat() { | ||
return getDelegatee().storedFieldsFormat(); | ||
} | ||
|
||
@Override | ||
public TermVectorsFormat termVectorsFormat() { | ||
return getDelegatee().termVectorsFormat(); | ||
} | ||
|
||
@Override | ||
public FieldInfosFormat fieldInfosFormat() { | ||
return getDelegatee().fieldInfosFormat(); | ||
} | ||
|
||
@Override | ||
public SegmentInfoFormat segmentInfoFormat() { | ||
return getDelegatee().segmentInfoFormat(); | ||
} | ||
|
||
@Override | ||
public NormsFormat normsFormat() { | ||
return getDelegatee().normsFormat(); | ||
} | ||
|
||
@Override | ||
public LiveDocsFormat liveDocsFormat() { | ||
return getDelegatee().liveDocsFormat(); | ||
} | ||
|
||
@Override | ||
public CompoundFormat compoundFormat() { | ||
return this.compoundFormat; | ||
} | ||
|
||
@Override | ||
public PointsFormat pointsFormat() { | ||
return getDelegatee().pointsFormat(); | ||
} | ||
|
||
@Override | ||
public final KnnVectorsFormat knnVectorsFormat() { | ||
return KnnVectorsFormat.EMPTY; | ||
} | ||
} |
125 changes: 125 additions & 0 deletions
125
src/main/java/org/opensearch/knn/index/codec/KNN84Codec/KNN84Codec.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.codec.KNN84Codec; | ||
|
||
import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat; | ||
import org.opensearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.apache.lucene.codecs.Codec; | ||
import org.apache.lucene.codecs.CompoundFormat; | ||
import org.apache.lucene.codecs.DocValuesFormat; | ||
import org.apache.lucene.codecs.FieldInfosFormat; | ||
import org.apache.lucene.codecs.KnnVectorsFormat; | ||
import org.apache.lucene.codecs.LiveDocsFormat; | ||
import org.apache.lucene.codecs.NormsFormat; | ||
import org.apache.lucene.codecs.PointsFormat; | ||
import org.apache.lucene.codecs.PostingsFormat; | ||
import org.apache.lucene.codecs.SegmentInfoFormat; | ||
import org.apache.lucene.codecs.StoredFieldsFormat; | ||
import org.apache.lucene.codecs.TermVectorsFormat; | ||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; | ||
|
||
/** | ||
* Extends the Codec to support a new file format for KNN index | ||
* based on the mappings. | ||
* | ||
*/ | ||
public final class KNN84Codec extends Codec { | ||
|
||
private static final Logger logger = LogManager.getLogger(KNN84Codec.class); | ||
private final DocValuesFormat docValuesFormat; | ||
private final DocValuesFormat perFieldDocValuesFormat; | ||
private final CompoundFormat compoundFormat; | ||
private Codec lucene84Codec; | ||
|
||
public static final String KNN_84 = "KNN84Codec"; | ||
public static final String LUCENE_84 = "Lucene84"; // Lucene Codec to be used | ||
|
||
public KNN84Codec() { | ||
super(KNN_84); | ||
// Note that DocValuesFormat can use old Codec's DocValuesFormat. For instance Lucene84 uses Lucene80 | ||
// DocValuesFormat. Refer to defaultDVFormat in LuceneXXCodec.java to find out which version it uses | ||
this.docValuesFormat = new KNN80DocValuesFormat(); | ||
this.perFieldDocValuesFormat = new PerFieldDocValuesFormat() { | ||
@Override | ||
public DocValuesFormat getDocValuesFormatForField(String field) { | ||
return docValuesFormat; | ||
} | ||
}; | ||
this.compoundFormat = new KNN80CompoundFormat(); | ||
} | ||
|
||
/* | ||
* This function returns the Lucene84 Codec. | ||
*/ | ||
public Codec getDelegatee() { | ||
if (lucene84Codec == null) | ||
lucene84Codec = Codec.forName(LUCENE_84); | ||
return lucene84Codec; | ||
} | ||
|
||
@Override | ||
public DocValuesFormat docValuesFormat() { | ||
return this.perFieldDocValuesFormat; | ||
} | ||
|
||
/* | ||
* For all the below functions, we could have extended FilterCodec, but this brings | ||
* SPI related issues while loading Codec in the tests. So fall back to traditional | ||
* approach of manually overriding. | ||
*/ | ||
|
||
@Override | ||
public PostingsFormat postingsFormat() { | ||
return getDelegatee().postingsFormat(); | ||
} | ||
|
||
@Override | ||
public StoredFieldsFormat storedFieldsFormat() { | ||
return getDelegatee().storedFieldsFormat(); | ||
} | ||
|
||
@Override | ||
public TermVectorsFormat termVectorsFormat() { | ||
return getDelegatee().termVectorsFormat(); | ||
} | ||
|
||
@Override | ||
public FieldInfosFormat fieldInfosFormat() { | ||
return getDelegatee().fieldInfosFormat(); | ||
} | ||
|
||
@Override | ||
public SegmentInfoFormat segmentInfoFormat() { | ||
return getDelegatee().segmentInfoFormat(); | ||
} | ||
|
||
@Override | ||
public NormsFormat normsFormat() { | ||
return getDelegatee().normsFormat(); | ||
} | ||
|
||
@Override | ||
public LiveDocsFormat liveDocsFormat() { | ||
return getDelegatee().liveDocsFormat(); | ||
} | ||
|
||
@Override | ||
public CompoundFormat compoundFormat() { | ||
return this.compoundFormat; | ||
} | ||
|
||
@Override | ||
public PointsFormat pointsFormat() { | ||
return getDelegatee().pointsFormat(); | ||
} | ||
|
||
@Override | ||
public final KnnVectorsFormat knnVectorsFormat() { | ||
return KnnVectorsFormat.EMPTY; | ||
} | ||
} |
134 changes: 134 additions & 0 deletions
134
src/main/java/org/opensearch/knn/index/codec/KNN86Codec/KNN86Codec.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.codec.KNN86Codec; | ||
|
||
import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat; | ||
import org.opensearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.apache.lucene.codecs.Codec; | ||
import org.apache.lucene.codecs.CompoundFormat; | ||
import org.apache.lucene.codecs.DocValuesFormat; | ||
import org.apache.lucene.codecs.FieldInfosFormat; | ||
import org.apache.lucene.codecs.KnnVectorsFormat; | ||
import org.apache.lucene.codecs.LiveDocsFormat; | ||
import org.apache.lucene.codecs.NormsFormat; | ||
import org.apache.lucene.codecs.PointsFormat; | ||
import org.apache.lucene.codecs.PostingsFormat; | ||
import org.apache.lucene.codecs.SegmentInfoFormat; | ||
import org.apache.lucene.codecs.StoredFieldsFormat; | ||
import org.apache.lucene.codecs.TermVectorsFormat; | ||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; | ||
|
||
/** | ||
* Extends the Codec to support a new file format for KNN index | ||
* based on the mappings. | ||
* | ||
*/ | ||
public final class KNN86Codec extends Codec { | ||
|
||
private static final Logger logger = LogManager.getLogger(KNN86Codec.class); | ||
private final DocValuesFormat docValuesFormat; | ||
private final DocValuesFormat perFieldDocValuesFormat; | ||
private final CompoundFormat compoundFormat; | ||
private Codec lucene86Codec; | ||
private PostingsFormat postingsFormat = null; | ||
|
||
public static final String KNN_86 = "KNN86Codec"; | ||
public static final String LUCENE_86 = "Lucene86"; // Lucene Codec to be used | ||
|
||
public KNN86Codec() { | ||
super(KNN_86); | ||
// Note that DocValuesFormat can use old Codec's DocValuesFormat. For instance Lucene84 uses Lucene80 | ||
// DocValuesFormat. Refer to defaultDVFormat in LuceneXXCodec.java to find out which version it uses | ||
this.docValuesFormat = new KNN80DocValuesFormat(); | ||
this.perFieldDocValuesFormat = new PerFieldDocValuesFormat() { | ||
@Override | ||
public DocValuesFormat getDocValuesFormatForField(String field) { | ||
return docValuesFormat; | ||
} | ||
}; | ||
this.compoundFormat = new KNN80CompoundFormat(); | ||
} | ||
|
||
/* | ||
* This function returns the Lucene84 Codec. | ||
*/ | ||
public Codec getDelegatee() { | ||
if (lucene86Codec == null) | ||
lucene86Codec = Codec.forName(LUCENE_86); | ||
return lucene86Codec; | ||
} | ||
|
||
@Override | ||
public DocValuesFormat docValuesFormat() { | ||
return this.perFieldDocValuesFormat; | ||
} | ||
|
||
/* | ||
* For all the below functions, we could have extended FilterCodec, but this brings | ||
* SPI related issues while loading Codec in the tests. So fall back to traditional | ||
* approach of manually overriding. | ||
*/ | ||
|
||
|
||
public void setPostingsFormat(PostingsFormat postingsFormat) { | ||
this.postingsFormat = postingsFormat; | ||
} | ||
|
||
@Override | ||
public PostingsFormat postingsFormat() { | ||
if (this.postingsFormat == null) { | ||
return getDelegatee().postingsFormat(); | ||
} | ||
return this.postingsFormat; | ||
} | ||
|
||
@Override | ||
public StoredFieldsFormat storedFieldsFormat() { | ||
return getDelegatee().storedFieldsFormat(); | ||
} | ||
|
||
@Override | ||
public TermVectorsFormat termVectorsFormat() { | ||
return getDelegatee().termVectorsFormat(); | ||
} | ||
|
||
@Override | ||
public FieldInfosFormat fieldInfosFormat() { | ||
return getDelegatee().fieldInfosFormat(); | ||
} | ||
|
||
@Override | ||
public SegmentInfoFormat segmentInfoFormat() { | ||
return getDelegatee().segmentInfoFormat(); | ||
} | ||
|
||
@Override | ||
public NormsFormat normsFormat() { | ||
return getDelegatee().normsFormat(); | ||
} | ||
|
||
@Override | ||
public LiveDocsFormat liveDocsFormat() { | ||
return getDelegatee().liveDocsFormat(); | ||
} | ||
|
||
@Override | ||
public CompoundFormat compoundFormat() { | ||
return this.compoundFormat; | ||
} | ||
|
||
@Override | ||
public PointsFormat pointsFormat() { | ||
return getDelegatee().pointsFormat(); | ||
} | ||
|
||
@Override | ||
public final KnnVectorsFormat knnVectorsFormat() { | ||
return KnnVectorsFormat.EMPTY; | ||
} | ||
} |
3 changes: 3 additions & 0 deletions
3
src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,4 @@ | ||
org.opensearch.knn.index.codec.KNN80Codec.KNN80Codec | ||
org.opensearch.knn.index.codec.KNN84Codec.KNN84Codec | ||
org.opensearch.knn.index.codec.KNN86Codec.KNN86Codec | ||
org.opensearch.knn.index.codec.KNN87Codec.KNN87Codec |