forked from elastic/elasticsearch
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* WIP * Add ElasticInferenceServiceTests TODOs * Add ElasticInferenceServiceActionCreatorTests TODOs * Add ElasticInferenceServiceResponseHandlerTests TODOs * Add ElasticInferenceServiceSparseEmbeddingsRequestTests TODOs * Add ElasticInferenceServiceSparseEmbeddingsModelTests TODOs * spotless apply * Fix conflicts * Add EmptySecretSettingsTests * Add named writeables to InferenceNamedWriteablesProvider * Remove addressed todos * Translate model to correct endpoint * Remove addressed TODO * Add docs to ElasticInferenceServiceFeature * Implement and test truncation/request * Add some EIS tests * Support chunked inference * Check model config * Add more tests * Add response handler * Add more tests + HTTP 413 handling * Fix some tests * Spotless * Fixes * Switch back to original response structure * Implement pass-through chunking * Spotless * Fix after rebase * Spotless * Log error upon failing to parse error response * Remove TODOs * Update docs/changelog/111154.yaml --------- Co-authored-by: Adam Demjen <[email protected]>
- Loading branch information
Showing
40 changed files
with
2,894 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 111154 | ||
summary: EIS integration | ||
area: Inference | ||
type: feature | ||
issues: [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
server/src/main/java/org/elasticsearch/inference/EmptySecretSettings.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.inference; | ||
|
||
import org.elasticsearch.TransportVersion; | ||
import org.elasticsearch.TransportVersions; | ||
import org.elasticsearch.common.io.stream.StreamInput; | ||
import org.elasticsearch.common.io.stream.StreamOutput; | ||
import org.elasticsearch.xcontent.XContentBuilder; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* This class defines an empty secret settings object. This is useful for services that do not have any secret settings. | ||
*/ | ||
public record EmptySecretSettings() implements SecretSettings { | ||
public static final String NAME = "empty_secret_settings"; | ||
|
||
public static final EmptySecretSettings INSTANCE = new EmptySecretSettings(); | ||
|
||
public EmptySecretSettings(StreamInput in) { | ||
this(); | ||
} | ||
|
||
@Override | ||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { | ||
builder.startObject(); | ||
builder.endObject(); | ||
return builder; | ||
} | ||
|
||
@Override | ||
public String getWriteableName() { | ||
return NAME; | ||
} | ||
|
||
@Override | ||
public TransportVersion getMinimalSupportedVersion() { | ||
return TransportVersions.ML_INFERENCE_EIS_INTEGRATION_ADDED; | ||
} | ||
|
||
@Override | ||
public void writeTo(StreamOutput out) throws IOException {} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
38 changes: 38 additions & 0 deletions
38
...csearch/xpack/inference/external/action/elastic/ElasticInferenceServiceActionCreator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.inference.external.action.elastic; | ||
|
||
import org.elasticsearch.xpack.inference.external.action.ExecutableAction; | ||
import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction; | ||
import org.elasticsearch.xpack.inference.external.http.sender.ElasticInferenceServiceSparseEmbeddingsRequestManager; | ||
import org.elasticsearch.xpack.inference.external.http.sender.Sender; | ||
import org.elasticsearch.xpack.inference.services.ServiceComponents; | ||
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSparseEmbeddingsModel; | ||
|
||
import java.util.Objects; | ||
|
||
import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage; | ||
|
||
public class ElasticInferenceServiceActionCreator implements ElasticInferenceServiceActionVisitor { | ||
|
||
private final Sender sender; | ||
|
||
private final ServiceComponents serviceComponents; | ||
|
||
public ElasticInferenceServiceActionCreator(Sender sender, ServiceComponents serviceComponents) { | ||
this.sender = Objects.requireNonNull(sender); | ||
this.serviceComponents = Objects.requireNonNull(serviceComponents); | ||
} | ||
|
||
@Override | ||
public ExecutableAction create(ElasticInferenceServiceSparseEmbeddingsModel model) { | ||
var requestManager = new ElasticInferenceServiceSparseEmbeddingsRequestManager(model, serviceComponents); | ||
var errorMessage = constructFailedToSendRequestMessage(model.uri(), "Elastic Inference Service sparse embeddings"); | ||
return new SenderExecutableAction(sender, requestManager, errorMessage); | ||
} | ||
} |
17 changes: 17 additions & 0 deletions
17
...csearch/xpack/inference/external/action/elastic/ElasticInferenceServiceActionVisitor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.inference.external.action.elastic; | ||
|
||
import org.elasticsearch.xpack.inference.external.action.ExecutableAction; | ||
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSparseEmbeddingsModel; | ||
|
||
public interface ElasticInferenceServiceActionVisitor { | ||
|
||
ExecutableAction create(ElasticInferenceServiceSparseEmbeddingsModel model); | ||
|
||
} |
54 changes: 54 additions & 0 deletions
54
...lasticsearch/xpack/inference/external/elastic/ElasticInferenceServiceResponseHandler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.inference.external.elastic; | ||
|
||
import org.apache.logging.log4j.Logger; | ||
import org.elasticsearch.xpack.inference.external.http.HttpResult; | ||
import org.elasticsearch.xpack.inference.external.http.retry.BaseResponseHandler; | ||
import org.elasticsearch.xpack.inference.external.http.retry.ContentTooLargeException; | ||
import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; | ||
import org.elasticsearch.xpack.inference.external.http.retry.RetryException; | ||
import org.elasticsearch.xpack.inference.external.request.Request; | ||
import org.elasticsearch.xpack.inference.external.response.elastic.ElasticInferenceServiceErrorResponseEntity; | ||
import org.elasticsearch.xpack.inference.logging.ThrottlerManager; | ||
|
||
import static org.elasticsearch.xpack.inference.external.http.HttpUtils.checkForEmptyBody; | ||
|
||
public class ElasticInferenceServiceResponseHandler extends BaseResponseHandler { | ||
|
||
public ElasticInferenceServiceResponseHandler(String requestType, ResponseParser parseFunction) { | ||
super(requestType, parseFunction, ElasticInferenceServiceErrorResponseEntity::fromResponse); | ||
} | ||
|
||
@Override | ||
public void validateResponse(ThrottlerManager throttlerManager, Logger logger, Request request, HttpResult result) | ||
throws RetryException { | ||
checkForFailureStatusCode(request, result); | ||
checkForEmptyBody(throttlerManager, logger, request, result); | ||
} | ||
|
||
void checkForFailureStatusCode(Request request, HttpResult result) throws RetryException { | ||
int statusCode = result.response().getStatusLine().getStatusCode(); | ||
if (statusCode >= 200 && statusCode < 300) { | ||
return; | ||
} | ||
|
||
if (statusCode == 500) { | ||
throw new RetryException(true, buildError(SERVER_ERROR, request, result)); | ||
} else if (statusCode == 400) { | ||
throw new RetryException(false, buildError(BAD_REQUEST, request, result)); | ||
} else if (statusCode == 405) { | ||
throw new RetryException(false, buildError(METHOD_NOT_ALLOWED, request, result)); | ||
} else if (statusCode == 413) { | ||
throw new ContentTooLargeException(buildError(CONTENT_TOO_LARGE, request, result)); | ||
} | ||
|
||
throw new RetryException(false, buildError(UNSUCCESSFUL, request, result)); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
28 changes: 28 additions & 0 deletions
28
...ticsearch/xpack/inference/external/http/sender/ElasticInferenceServiceRequestManager.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.inference.external.http.sender; | ||
|
||
import org.elasticsearch.threadpool.ThreadPool; | ||
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceModel; | ||
|
||
import java.util.Objects; | ||
|
||
public abstract class ElasticInferenceServiceRequestManager extends BaseRequestManager { | ||
|
||
protected ElasticInferenceServiceRequestManager(ThreadPool threadPool, ElasticInferenceServiceModel model) { | ||
super(threadPool, model.getInferenceEntityId(), RateLimitGrouping.of(model), model.rateLimitServiceSettings().rateLimitSettings()); | ||
} | ||
|
||
record RateLimitGrouping(int modelIdHash) { | ||
public static RateLimitGrouping of(ElasticInferenceServiceModel model) { | ||
Objects.requireNonNull(model); | ||
|
||
return new RateLimitGrouping(model.rateLimitServiceSettings().modelId().hashCode()); | ||
} | ||
} | ||
} |
71 changes: 71 additions & 0 deletions
71
...inference/external/http/sender/ElasticInferenceServiceSparseEmbeddingsRequestManager.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.inference.external.http.sender; | ||
|
||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.elasticsearch.action.ActionListener; | ||
import org.elasticsearch.inference.InferenceServiceResults; | ||
import org.elasticsearch.xpack.inference.common.Truncator; | ||
import org.elasticsearch.xpack.inference.external.elastic.ElasticInferenceServiceResponseHandler; | ||
import org.elasticsearch.xpack.inference.external.http.retry.RequestSender; | ||
import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler; | ||
import org.elasticsearch.xpack.inference.external.request.elastic.ElasticInferenceServiceSparseEmbeddingsRequest; | ||
import org.elasticsearch.xpack.inference.external.response.elastic.ElasticInferenceServiceSparseEmbeddingsResponseEntity; | ||
import org.elasticsearch.xpack.inference.services.ServiceComponents; | ||
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSparseEmbeddingsModel; | ||
|
||
import java.util.List; | ||
import java.util.function.Supplier; | ||
|
||
import static org.elasticsearch.xpack.inference.common.Truncator.truncate; | ||
|
||
public class ElasticInferenceServiceSparseEmbeddingsRequestManager extends ElasticInferenceServiceRequestManager { | ||
|
||
private static final Logger logger = LogManager.getLogger(ElasticInferenceServiceSparseEmbeddingsRequestManager.class); | ||
|
||
private static final ResponseHandler HANDLER = createSparseEmbeddingsHandler(); | ||
|
||
private final ElasticInferenceServiceSparseEmbeddingsModel model; | ||
|
||
private final Truncator truncator; | ||
|
||
private static ResponseHandler createSparseEmbeddingsHandler() { | ||
return new ElasticInferenceServiceResponseHandler( | ||
"Elastic Inference Service sparse embeddings", | ||
ElasticInferenceServiceSparseEmbeddingsResponseEntity::fromResponse | ||
); | ||
} | ||
|
||
public ElasticInferenceServiceSparseEmbeddingsRequestManager( | ||
ElasticInferenceServiceSparseEmbeddingsModel model, | ||
ServiceComponents serviceComponents | ||
) { | ||
super(serviceComponents.threadPool(), model); | ||
this.model = model; | ||
this.truncator = serviceComponents.truncator(); | ||
} | ||
|
||
@Override | ||
public void execute( | ||
InferenceInputs inferenceInputs, | ||
RequestSender requestSender, | ||
Supplier<Boolean> hasRequestCompletedFunction, | ||
ActionListener<InferenceServiceResults> listener | ||
) { | ||
List<String> docsInput = DocumentsOnlyInput.of(inferenceInputs).getInputs(); | ||
var truncatedInput = truncate(docsInput, model.getServiceSettings().maxInputTokens()); | ||
|
||
ElasticInferenceServiceSparseEmbeddingsRequest request = new ElasticInferenceServiceSparseEmbeddingsRequest( | ||
truncator, | ||
truncatedInput, | ||
model | ||
); | ||
execute(new ExecutableInferenceRequest(requestSender, logger, request, HANDLER, hasRequestCompletedFunction, listener)); | ||
} | ||
} |
12 changes: 12 additions & 0 deletions
12
...lasticsearch/xpack/inference/external/request/elastic/ElasticInferenceServiceRequest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.inference.external.request.elastic; | ||
|
||
import org.elasticsearch.xpack.inference.external.request.Request; | ||
|
||
public interface ElasticInferenceServiceRequest extends Request {} |
Oops, something went wrong.