Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

semantic_text field mapping #107519

Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
db9a383
Remove code remnants from semantic_text previous iterations in featur…
carlosdelest Apr 16, 2024
78f8880
Remove unnecessary interface
carlosdelest Apr 16, 2024
7c42501
Remove SemanticTextModelSettings
carlosdelest Apr 16, 2024
faef7e2
Fix test
carlosdelest Apr 16, 2024
3359c9a
Add semantic_text field mapper specifics
carlosdelest Apr 16, 2024
d4fdeb7
Add mapping specific tests
carlosdelest Apr 16, 2024
59bb7ae
Revert changes to inference service
carlosdelest Apr 16, 2024
3b8c49e
Fix redundant path in semantic text field merging (#107507)
jimczi Apr 16, 2024
3859fad
Merge remote-tracking branch 'origin/main' into carlosdelest/semantic…
carlosdelest Apr 16, 2024
eacd6ad
Remove logging dependency as it's unused
carlosdelest Apr 16, 2024
92cdfa8
Remove unneeded unchecked warning
carlosdelest Apr 16, 2024
914ce1c
PR feedback
carlosdelest Apr 16, 2024
db36d14
PR feedback
carlosdelest Apr 16, 2024
b2ab726
Using integration test distribution for YAML tests
carlosdelest Apr 17, 2024
b307a76
PR review
carlosdelest Apr 17, 2024
d565c55
Improve detection of illegal values and testing
carlosdelest Apr 18, 2024
b3246a6
PR review - moving back methods to field mapper to be reused in later…
carlosdelest Apr 18, 2024
6a32de2
PR review - add validations for ModelSettings
carlosdelest Apr 18, 2024
c2ab40f
Add tests for InferenceFieldMapper integration in MappingLookup
carlosdelest Apr 22, 2024
11d6b3c
Change ParseField for Strings to improve reading
carlosdelest Apr 22, 2024
f8dfd9c
Ensure parser closing
carlosdelest Apr 22, 2024
251ddb5
Merge remote-tracking branch 'origin/main' into carlosdelest/semantic…
carlosdelest Apr 29, 2024
0417268
PR feedback on tests
carlosdelest Apr 29, 2024
b382b3b
Merge branch 'main' into carlosdelest/semantic-text-field-mapping-spe…
elasticmachine Apr 30, 2024
60f5e72
toSemanticTextFieldChunks() method belongs to SemanticTextField - fixing
carlosdelest Apr 30, 2024
fad7ab6
Merge branch 'main' into carlosdelest/semantic-text-field-mapping-spe…
elasticmachine Apr 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,10 @@ private static void failIfMatchesRoutingPath(DocumentParserContext context, Stri
*/
private static void parseCopyFields(DocumentParserContext context, List<String> copyToFields) throws IOException {
for (String field : copyToFields) {
if (context.mappingLookup().getMapper(field) instanceof InferenceFieldMapper) {
// ignore copy_to that targets inference fields, values are already extracted in the coordinating node to perform inference.
continue;
}
javanna marked this conversation as resolved.
Show resolved Hide resolved
// In case of a hierarchy of nested documents, we need to figure out
// which document the field should go to
LuceneDocument targetDoc = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1199,7 +1199,7 @@ public static final class Conflicts {
private final String mapperName;
private final List<String> conflicts = new ArrayList<>();

Conflicts(String mapperName) {
public Conflicts(String mapperName) {
this.mapperName = mapperName;
}

Expand All @@ -1211,7 +1211,7 @@ void addConflict(String parameter, String existing, String toMerge) {
conflicts.add("Cannot update parameter [" + parameter + "] from [" + existing + "] to [" + toMerge + "]");
}

void check() {
public void check() {
if (conflicts.isEmpty()) {
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public static MapperMergeContext from(MapperBuilderContext mapperBuilderContext,
* @param name the name of the child context
* @return a new {@link MapperMergeContext} with this context as its parent
*/
MapperMergeContext createChildContext(String name, ObjectMapper.Dynamic dynamic) {
public MapperMergeContext createChildContext(String name, ObjectMapper.Dynamic dynamic) {
return createChildContext(mapperBuilderContext.createChildContext(name, dynamic));
}

Expand All @@ -69,7 +69,7 @@ MapperMergeContext createChildContext(MapperBuilderContext childContext) {
return new MapperMergeContext(childContext, newFieldsBudget);
}

MapperBuilderContext getMapperBuilderContext() {
public MapperBuilderContext getMapperBuilderContext() {
return mapperBuilderContext;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public CompressedXContent toCompressedXContent() {
/**
* Returns the root object for the current mapping
*/
RootObjectMapper getRoot() {
public RootObjectMapper getRoot() {
javanna marked this conversation as resolved.
Show resolved Hide resolved
return root;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,16 @@ protected Parameter<?>[] getParameters() {
return new Parameter<?>[] { elementType, dims, indexed, similarity, indexOptions, meta };
}

public Builder similarity(VectorSimilarity vectorSimilarity) {
similarity.setValue(vectorSimilarity);
return this;
}

public Builder dimensions(int dimensions) {
this.dims.setValue(dimensions);
return this;
}

@Override
public DenseVectorFieldMapper build(MapperBuilderContext context) {
return new DenseVectorFieldMapper(
Expand Down Expand Up @@ -754,7 +764,7 @@ public static ElementType fromString(String name) {
ElementType.FLOAT
);

enum VectorSimilarity {
public enum VectorSimilarity {
L2_NORM {
@Override
float score(float similarity, ElementType elementType, int dim) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a test here that ensures the InferenceFieldMapper logic is correct? Generally this is done by making a small test only plugin that provides a mapper that satisfies this InferenceFieldMapper interface.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I've added them in a new test class - MappingLookupInferenceFieldMapperTests. LMKWYT!


import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -106,6 +107,12 @@ public void testCopyToFieldsParsing() throws Exception {

fieldMapper = mapperService.documentMapper().mappers().getMapper("new_field");
assertThat(fieldMapper.typeName(), equalTo("long"));

MappingLookup mappingLookup = mapperService.mappingLookup();
assertThat(mappingLookup.sourcePaths("another_field"), equalTo(Set.of("copy_test", "int_to_str_test", "another_field")));
assertThat(mappingLookup.sourcePaths("new_field"), equalTo(Set.of("new_field", "int_to_str_test")));
assertThat(mappingLookup.sourcePaths("copy_test"), equalTo(Set.of("copy_test", "cyclic_test")));
assertThat(mappingLookup.sourcePaths("cyclic_test"), equalTo(Set.of("cyclic_test", "copy_test")));
}

public void testCopyToFieldsInnerObjectParsing() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@ public void testSourcePathFields() throws IOException {
final Set<String> fieldsUsingSourcePath = new HashSet<>();
((FieldMapper) mapper).sourcePathUsedBy().forEachRemaining(mapper1 -> fieldsUsingSourcePath.add(mapper1.name()));
assertThat(fieldsUsingSourcePath, equalTo(Set.of("field.subfield1", "field.subfield2")));

assertThat(mapperService.mappingLookup().sourcePaths("field.subfield1"), equalTo(Set.of("field")));
assertThat(mapperService.mappingLookup().sourcePaths("field.subfield2"), equalTo(Set.of("field")));
}

public void testUnknownLegacyFieldsUnderKnownRootField() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ public static MetadataRolloverService getMetadataRolloverService(
AllocationService allocationService = mock(AllocationService.class);
when(allocationService.reroute(any(ClusterState.class), any(String.class), any())).then(i -> i.getArguments()[0]);
when(allocationService.getShardRoutingRoleStrategy()).thenReturn(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY);
MappingLookup mappingLookup = null;
MappingLookup mappingLookup = MappingLookup.EMPTY;
if (dataStream != null) {
RootObjectMapper.Builder root = new RootObjectMapper.Builder("_doc", ObjectMapper.Defaults.SUBOBJECTS);
root.add(
Expand Down Expand Up @@ -718,6 +718,7 @@ public static IndicesService mockIndicesServices(MappingLookup mappingLookup) th
when(documentMapper.mapping()).thenReturn(mapping);
when(documentMapper.mappers()).thenReturn(MappingLookup.EMPTY);
when(documentMapper.mappingSource()).thenReturn(mapping.toCompressedXContent());
when(documentMapper.mappers()).thenReturn(mappingLookup);
RoutingFieldMapper routingFieldMapper = mock(RoutingFieldMapper.class);
when(routingFieldMapper.required()).thenReturn(false);
when(documentMapper.routingFieldMapper()).thenReturn(routingFieldMapper);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1030,7 +1030,7 @@ public final void testMinimalIsInvalidInRoutingPath() throws IOException {
}
}

private String minimalIsInvalidRoutingPathErrorMessage(Mapper mapper) {
protected String minimalIsInvalidRoutingPathErrorMessage(Mapper mapper) {
if (mapper instanceof FieldMapper fieldMapper && fieldMapper.fieldType().isDimension() == false) {
return "All fields that match routing_path must be configured with [time_series_dimension: true] "
+ "or flattened fields with a list of dimensions in [time_series_dimensions] and "
Expand Down
12 changes: 12 additions & 0 deletions x-pack/plugin/inference/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
*/
apply plugin: 'elasticsearch.internal-es-plugin'
apply plugin: 'elasticsearch.internal-cluster-test'
apply plugin: 'elasticsearch.internal-yaml-rest-test'

restResources {
restApi {
include '_common', 'indices', 'inference', 'index'
}
}

esplugin {
name 'x-pack-inference'
Expand All @@ -24,6 +31,11 @@ dependencies {
compileOnly project(path: xpackModule('core'))
testImplementation(testArtifact(project(xpackModule('core'))))
testImplementation project(':modules:reindex')
clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin')

api "com.ibm.icu:icu4j:${versions.icu4j}"
}

tasks.named('yamlRestTest') {
usesDefaultDistribution()
}
1 change: 1 addition & 0 deletions x-pack/plugin/inference/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
requires org.apache.httpcomponents.httpasyncclient;
requires org.apache.httpcomponents.httpcore.nio;
requires org.apache.lucene.core;
requires org.elasticsearch.logging;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this inclusion.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch - it seems we're not using logging. I'm removing it 👍

requires com.ibm.icu;

exports org.elasticsearch.xpack.inference.action;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.indices.SystemIndexDescriptor;
import org.elasticsearch.inference.InferenceServiceExtension;
import org.elasticsearch.inference.InferenceServiceRegistry;
import org.elasticsearch.plugins.ActionPlugin;
import org.elasticsearch.plugins.ExtensiblePlugin;
import org.elasticsearch.plugins.MapperPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.plugins.SystemIndexPlugin;
import org.elasticsearch.rest.RestController;
Expand All @@ -50,6 +52,7 @@
import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender;
import org.elasticsearch.xpack.inference.external.http.sender.RequestExecutorServiceSettings;
import org.elasticsearch.xpack.inference.logging.ThrottlerManager;
import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper;
import org.elasticsearch.xpack.inference.registry.ModelRegistry;
import org.elasticsearch.xpack.inference.rest.RestDeleteInferenceModelAction;
import org.elasticsearch.xpack.inference.rest.RestGetInferenceModelAction;
Expand All @@ -67,12 +70,13 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class InferencePlugin extends Plugin implements ActionPlugin, ExtensiblePlugin, SystemIndexPlugin {
public class InferencePlugin extends Plugin implements ActionPlugin, ExtensiblePlugin, SystemIndexPlugin, MapperPlugin {

/**
* When this setting is true the verification check that
Expand Down Expand Up @@ -260,4 +264,12 @@ public void close() {

IOUtils.closeWhileHandlingException(inferenceServiceRegistry.get(), throttlerToClose);
}

@Override
public Map<String, Mapper.TypeParser> getMappers() {
if (SemanticTextFeature.isEnabled()) {
return Map.of(SemanticTextFieldMapper.CONTENT_TYPE, SemanticTextFieldMapper.PARSER);
}
return Map.of();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.inference;

import org.elasticsearch.common.util.FeatureFlag;

/**
* semantic_text feature flag. When the feature is complete, this flag will be removed.
*/
public class SemanticTextFeature {

private SemanticTextFeature() {}

private static final FeatureFlag FEATURE_FLAG = new FeatureFlag("semantic_text");

public static boolean isEnabled() {
return FEATURE_FLAG.isEnabled();
}
}
Loading