From 00dde82994fa67ccd5c95c62a653c70021d50e8c Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Mon, 30 Mar 2020 12:47:52 -0400 Subject: [PATCH] Field type refactor (#30) Field type refactor. Fix for dates sorting with string fields --- .../analysis/ZuliaPerFieldAnalyzer.java | 12 +----- .../io/zulia/server/field/FieldTypeUtil.java | 9 ++++ .../server/index/ShardDocumentIndexer.java | 24 +++++------ .../io/zulia/server/search/QueryCombiner.java | 40 +++++++++++------- .../zulia/server/test/node/StartStopTest.java | 42 ++++++++++++++----- 5 files changed, 80 insertions(+), 47 deletions(-) diff --git a/zulia-analyzer/src/main/java/io/zulia/server/analysis/ZuliaPerFieldAnalyzer.java b/zulia-analyzer/src/main/java/io/zulia/server/analysis/ZuliaPerFieldAnalyzer.java index b75f1a9b..5fb4ed2e 100644 --- a/zulia-analyzer/src/main/java/io/zulia/server/analysis/ZuliaPerFieldAnalyzer.java +++ b/zulia-analyzer/src/main/java/io/zulia/server/analysis/ZuliaPerFieldAnalyzer.java @@ -6,6 +6,7 @@ import io.zulia.server.analysis.filter.BritishUSFilter; import io.zulia.server.analysis.filter.CaseProtectedWordsFilter; import io.zulia.server.config.ServerIndexConfig; +import io.zulia.server.field.FieldTypeUtil; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.DelegatingAnalyzerWrapper; @@ -79,16 +80,7 @@ public void refresh() { else if (ZuliaIndex.FieldConfig.FieldType.BOOL.equals(fieldType)) { a = new BooleanAnalyzer(); } - else if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_INT.equals(fieldType)) { - a = new WhitespaceAnalyzer(); - } - else if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_LONG.equals(fieldType)) { - a = new WhitespaceAnalyzer(); - } - else if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_FLOAT.equals(fieldType)) { - a = new WhitespaceAnalyzer(); - } - else if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_DOUBLE.equals(fieldType)) { + else if (FieldTypeUtil.isNumericOrDateFieldType(fieldType)) { a = new WhitespaceAnalyzer(); } else { diff --git a/zulia-analyzer/src/main/java/io/zulia/server/field/FieldTypeUtil.java b/zulia-analyzer/src/main/java/io/zulia/server/field/FieldTypeUtil.java index 762757b2..c22d981e 100644 --- a/zulia-analyzer/src/main/java/io/zulia/server/field/FieldTypeUtil.java +++ b/zulia-analyzer/src/main/java/io/zulia/server/field/FieldTypeUtil.java @@ -1,5 +1,6 @@ package io.zulia.server.field; +import io.zulia.message.ZuliaIndex; import io.zulia.message.ZuliaIndex.FieldConfig; public class FieldTypeUtil { @@ -28,4 +29,12 @@ public static boolean isNumericOrDateFieldType(FieldConfig.FieldType fieldType) return isNumericIntFieldType(fieldType) || isNumericLongFieldType(fieldType) || isNumericFloatFieldType(fieldType) || isNumericDoubleFieldType( fieldType) || isDateFieldType(fieldType); } + + public static boolean isBooleanFieldType(FieldConfig.FieldType fieldType) { + return FieldConfig.FieldType.BOOL.equals(fieldType); + } + + public static boolean isStringFieldType(FieldConfig.FieldType fieldType) { + return ZuliaIndex.FieldConfig.FieldType.STRING.equals(fieldType); + } } diff --git a/zulia-server/src/main/java/io/zulia/server/index/ShardDocumentIndexer.java b/zulia-server/src/main/java/io/zulia/server/index/ShardDocumentIndexer.java index 5be1d4a2..5a268ab4 100644 --- a/zulia-server/src/main/java/io/zulia/server/index/ShardDocumentIndexer.java +++ b/zulia-server/src/main/java/io/zulia/server/index/ShardDocumentIndexer.java @@ -121,25 +121,25 @@ private void handleIndexingForStoredField(Document luceneDocument, String stored String indexedFieldName = indexAs.getIndexFieldName(); luceneDocument.add(new StringField(ZuliaConstants.FIELDS_LIST_FIELD, indexedFieldName, Field.Store.NO)); - if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_INT.equals(fieldType)) { + if (FieldTypeUtil.isNumericIntFieldType(fieldType)) { IntFieldIndexer.INSTANCE.index(luceneDocument, storedFieldName, o, indexedFieldName); } - else if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_LONG.equals(fieldType)) { + else if (FieldTypeUtil.isNumericLongFieldType(fieldType)) { LongFieldIndexer.INSTANCE.index(luceneDocument, storedFieldName, o, indexedFieldName); } - else if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_FLOAT.equals(fieldType)) { + else if (FieldTypeUtil.isNumericFloatFieldType(fieldType)) { FloatFieldIndexer.INSTANCE.index(luceneDocument, storedFieldName, o, indexedFieldName); } - else if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_DOUBLE.equals(fieldType)) { + else if (FieldTypeUtil.isNumericDoubleFieldType(fieldType)) { DoubleFieldIndexer.INSTANCE.index(luceneDocument, storedFieldName, o, indexedFieldName); } - else if (ZuliaIndex.FieldConfig.FieldType.DATE.equals(fieldType)) { + else if (FieldTypeUtil.isDateFieldType(fieldType)) { DateFieldIndexer.INSTANCE.index(luceneDocument, storedFieldName, o, indexedFieldName); } - else if (ZuliaIndex.FieldConfig.FieldType.BOOL.equals(fieldType)) { + else if (FieldTypeUtil.isBooleanFieldType(fieldType)) { BooleanFieldIndexer.INSTANCE.index(luceneDocument, storedFieldName, o, indexedFieldName); } - else if (ZuliaIndex.FieldConfig.FieldType.STRING.equals(fieldType)) { + else if (FieldTypeUtil.isStringFieldType(fieldType)) { StringFieldIndexer.INSTANCE.index(luceneDocument, storedFieldName, o, indexedFieldName); } else { @@ -157,7 +157,7 @@ private void handleSortForStoredField(Document d, String storedFieldName, ZuliaI if (FieldTypeUtil.isNumericOrDateFieldType(fieldType)) { ZuliaUtil.handleLists(o, obj -> { - if (ZuliaIndex.FieldConfig.FieldType.DATE.equals(fieldType)) { + if (FieldTypeUtil.isDateFieldType(fieldType)) { if (obj instanceof Date) { Date date = (Date) obj; @@ -175,16 +175,16 @@ private void handleSortForStoredField(Document d, String storedFieldName, ZuliaI Number number = (Number) obj; SortedNumericDocValuesField docValue = null; - if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_INT.equals(fieldType)) { + if (FieldTypeUtil.isNumericIntFieldType(fieldType)) { docValue = new SortedNumericDocValuesField(sortFieldName, number.intValue()); } - else if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_LONG.equals(fieldType)) { + else if (FieldTypeUtil.isNumericLongFieldType(fieldType)) { docValue = new SortedNumericDocValuesField(sortFieldName, number.longValue()); } - else if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_FLOAT.equals(fieldType)) { + else if (FieldTypeUtil.isNumericFloatFieldType(fieldType)) { docValue = new SortedNumericDocValuesField(sortFieldName, NumericUtils.floatToSortableInt(number.floatValue())); } - else if (ZuliaIndex.FieldConfig.FieldType.NUMERIC_DOUBLE.equals(fieldType)) { + else if (FieldTypeUtil.isNumericDoubleFieldType(fieldType)) { docValue = new SortedNumericDocValuesField(sortFieldName, NumericUtils.doubleToSortableLong(number.doubleValue())); } else { diff --git a/zulia-server/src/main/java/io/zulia/server/search/QueryCombiner.java b/zulia-server/src/main/java/io/zulia/server/search/QueryCombiner.java index 947852d1..078b31d8 100644 --- a/zulia-server/src/main/java/io/zulia/server/search/QueryCombiner.java +++ b/zulia-server/src/main/java/io/zulia/server/search/QueryCombiner.java @@ -3,6 +3,7 @@ import io.zulia.ZuliaConstants; import io.zulia.message.ZuliaBase.Term; import io.zulia.message.ZuliaIndex.FieldConfig; +import io.zulia.message.ZuliaQuery; import io.zulia.message.ZuliaQuery.AnalysisRequest; import io.zulia.message.ZuliaQuery.AnalysisResult; import io.zulia.message.ZuliaQuery.CountRequest; @@ -21,6 +22,7 @@ import io.zulia.message.ZuliaServiceOuterClass.QueryRequest; import io.zulia.message.ZuliaServiceOuterClass.QueryResponse; import io.zulia.server.analysis.frequency.TermFreq; +import io.zulia.server.field.FieldTypeUtil; import io.zulia.server.index.ZuliaIndex; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; @@ -411,37 +413,45 @@ public QueryResponse getQueryResponse() throws Exception { } } else { - if (FieldConfig.FieldType.NUMERIC_INT.equals(sortType)) { - Integer a = sortValues1.getSortValue(sortValueIndex).getIntegerValue(); - Integer b = sortValues2.getSortValue(sortValueIndex).getIntegerValue(); + ZuliaQuery.SortValue sortValue1 = sortValues1.getSortValue(sortValueIndex); + ZuliaQuery.SortValue sortValue2 = sortValues2.getSortValue(sortValueIndex); + + if (FieldTypeUtil.isNumericIntFieldType(sortType)) { + Integer a = sortValue1.getExists() ? sortValue1.getIntegerValue() : null; + Integer b = sortValue2.getExists() ? sortValue2.getIntegerValue() : null; compare = Comparator.nullsLast(Integer::compareTo).compare(a, b); } - else if (FieldConfig.FieldType.NUMERIC_LONG.equals(sortType) || FieldConfig.FieldType.DATE.equals(sortType)) { - Long a = sortValues1.getSortValue(sortValueIndex).getLongValue(); - Long b = sortValues2.getSortValue(sortValueIndex).getLongValue(); + else if (FieldTypeUtil.isNumericLongFieldType(sortType)) { + Long a = sortValue1.getExists() ? sortValue1.getLongValue() : null; + Long b = sortValue2.getExists() ? sortValue2.getLongValue() : null; + + compare = Comparator.nullsLast(Long::compareTo).compare(a, b); + } + else if (FieldTypeUtil.isDateFieldType(sortType)) { + Long a = sortValue1.getExists() ? sortValue1.getDateValue() : null; + Long b = sortValue2.getExists() ? sortValue2.getDateValue() : null; compare = Comparator.nullsLast(Long::compareTo).compare(a, b); } - else if (FieldConfig.FieldType.NUMERIC_FLOAT.equals(sortType)) { + else if (FieldTypeUtil.isNumericFloatFieldType(sortType)) { - Float a = sortValues1.getSortValue(sortValueIndex).getFloatValue(); - Float b = sortValues2.getSortValue(sortValueIndex).getFloatValue(); + Float a = sortValue1.getExists() ? sortValue1.getFloatValue() : null; + Float b = sortValue2.getExists() ? sortValue2.getFloatValue() : null; compare = Comparator.nullsLast(Float::compareTo).compare(a, b); } - else if (FieldConfig.FieldType.NUMERIC_DOUBLE.equals(sortType)) { + else if (FieldTypeUtil.isNumericDoubleFieldType(sortType)) { - Double a = sortValues1.getSortValue(sortValueIndex).getDoubleValue(); - Double b = sortValues2.getSortValue(sortValueIndex).getDoubleValue(); + Double a = sortValue1.getExists() ? sortValue1.getDoubleValue() : null; + Double b = sortValue2.getExists() ? sortValue2.getDoubleValue() : null; compare = Comparator.nullsLast(Double::compareTo).compare(a, b); } else { - String a = sortValues1.getSortValue(sortValueIndex).getStringValue(); - String b = sortValues2.getSortValue(sortValueIndex).getStringValue(); + String a = sortValue1.getExists() ? sortValue1.getStringValue() : null; + String b = sortValue2.getExists() ? sortValue2.getStringValue() : null; - //compare = Comparator.nullsLast(String::compareTo).compare(a, b); compare = Comparator.nullsLast(BytesRef::compareTo).compare(new BytesRef(a), new BytesRef(b)); } diff --git a/zulia-server/src/test/java/io/zulia/server/test/node/StartStopTest.java b/zulia-server/src/test/java/io/zulia/server/test/node/StartStopTest.java index 12640a42..ee878a21 100644 --- a/zulia-server/src/test/java/io/zulia/server/test/node/StartStopTest.java +++ b/zulia-server/src/test/java/io/zulia/server/test/node/StartStopTest.java @@ -61,13 +61,13 @@ public static void initAll() throws Exception { ClientIndexConfig indexConfig = new ClientIndexConfig(); indexConfig.addDefaultSearchField("title"); - indexConfig.addFieldConfig(FieldConfigBuilder.create("title", FieldType.STRING).indexAs(DefaultAnalyzers.STANDARD)); - indexConfig.addFieldConfig(FieldConfigBuilder.create("issn", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facet()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("title", FieldType.STRING).indexAs(DefaultAnalyzers.STANDARD).sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("issn", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facet().sort()); indexConfig.addFieldConfig(FieldConfigBuilder.create("eissn", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD)); indexConfig.addFieldConfig(FieldConfigBuilder.create("uid", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD)); - indexConfig.addFieldConfig(FieldConfigBuilder.create("an", FieldType.NUMERIC_INT).index()); - indexConfig.addFieldConfig(FieldConfigBuilder.create("country", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facet()); - indexConfig.addFieldConfig(FieldConfigBuilder.create("date", FieldType.DATE).index().facetAs(DateHandling.DATE_YYYY_MM_DD)); + indexConfig.addFieldConfig(FieldConfigBuilder.create("an", FieldType.NUMERIC_INT).index().sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("country", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facet().sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("date", FieldType.DATE).index().facetAs(DateHandling.DATE_YYYY_MM_DD).sort()); indexConfig.addFieldConfig(FieldConfigBuilder.create("testList", FieldType.STRING).index()); indexConfig.setIndexName(FACET_TEST_INDEX); indexConfig.setNumberOfShards(1); @@ -130,7 +130,7 @@ else if (half) { // 2/5 of input mongoDocument.put("date", d); } else { // 1/2 of input - Date d = Date.from(LocalDate.of(2013, 8, 4).atStartOfDay(ZoneId.of("UTC")).toInstant()); + Date d = Date.from(LocalDate.of(2012, 8, 4).atStartOfDay(ZoneId.of("UTC")).toInstant()); mongoDocument.put("date", d); } @@ -219,14 +219,14 @@ public void lengthTest() throws Exception { public void reindex() throws Exception { ClientIndexConfig indexConfig = new ClientIndexConfig(); indexConfig.addDefaultSearchField("title"); - indexConfig.addFieldConfig(FieldConfigBuilder.create("title", FieldType.STRING).indexAs(DefaultAnalyzers.STANDARD)); - indexConfig.addFieldConfig(FieldConfigBuilder.create("issn", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facet()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("title", FieldType.STRING).indexAs(DefaultAnalyzers.STANDARD).sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("issn", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facet().sort()); indexConfig.addFieldConfig(FieldConfigBuilder.create("eissn", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facet()); indexConfig.addFieldConfig(FieldConfigBuilder.create("uid", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD)); indexConfig.addFieldConfig(FieldConfigBuilder.create("an", FieldType.NUMERIC_INT).index().displayName("Accession Number")); indexConfig.addFieldConfig(FieldConfigBuilder.create("country", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facet()); indexConfig.addFieldConfig( - FieldConfigBuilder.create("date", FieldType.DATE).index().facetAs(DateHandling.DATE_YYYY_MM_DD).description("The very special data")); + FieldConfigBuilder.create("date", FieldType.DATE).index().facetAs(DateHandling.DATE_YYYY_MM_DD).description("The very special data").sort()); indexConfig.addFieldConfig(FieldConfigBuilder.create("testList", FieldType.STRING).index()); indexConfig.setIndexName(FACET_TEST_INDEX); indexConfig.setNumberOfShards(1); @@ -319,7 +319,7 @@ public void confirm() throws Exception { } { - Query q = new Query(FACET_TEST_INDEX, "title:userguide", 10).addDrillDown("date", "2013-08-04"); + Query q = new Query(FACET_TEST_INDEX, "title:userguide", 10).addDrillDown("date", "2012-08-04"); QueryResult qr = zuliaWorkPool.query(q); @@ -381,6 +381,28 @@ public void confirm() throws Exception { } + { + Query q = new Query(FACET_TEST_INDEX, null, 1).addFieldSort("date", ASCENDING).addFieldSort("issn", ASCENDING); + + QueryResult qr = zuliaWorkPool.query(q); + + Document firstDateDocument = qr.getFirstDocument(); + + q = new Query(FACET_TEST_INDEX, null, 1).addFieldSort("date", DESCENDING).addFieldSort("issn", DESCENDING); + + qr = zuliaWorkPool.query(q); + + Document lastDateDocument = qr.getFirstDocument(); + + Date firstDate = firstDateDocument.getDate("date"); + Date lastDate = lastDateDocument.getDate("date"); + + System.out.println(firstDate); + System.out.println(lastDate); + + Assertions.assertTrue(firstDate.compareTo(lastDate) < 0, "First date: " + firstDate + " lastDate: " + lastDate); + } + } @Test