forked from SeldonIO/seldon-core
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
commit 373df43 Author: Lucian Carata <[email protected]> Date: Thu Dec 12 01:09:30 2024 +0000 feat(k6): add scenario with multiple stages ramping up/down RPS (SeldonIO#6031) The added load test scenario allows one to configure an arbitrary number of stages, with each consisting of a linear ramp-up/down to the desired requests per second and a hold/plateau time. Within each stage, the duration for which the inference RPS is held constant is configured via one element in the `CONSTANT_RATE_DURATIONS_SECONDS` environment variable (a vector of comma separated values), with the ramp-up/ down duration preceding it being 1/3rd of the hold time. commit 34cf313 Author: paulb-seldon <[email protected]> Date: Wed Dec 11 16:59:20 2024 +0000 fix(docs): Docs on upgrading from 2.7 - 2.8 (SeldonIO#6143) * Docs on upgrading from 2.7 - 2.8 * Wording update commit 1c40f62 Author: Sherif Akoush <[email protected]> Date: Wed Dec 11 14:32:40 2024 +0000 fix: Add timeout to contexts in client calls (SeldonIO#6125) * add timeout context from infer call for modelgateway * add timeout context to pipeline gateway * set timeout context on process request * add a test for grpc call timeout * add agent k8s api call timeout * add context timeout for shutting down services * add timeout for controller k8s api calls * add timeout for control plane context * add timeout context to reconcile logic * pr comments commit 74032a4 Author: paulb-seldon <[email protected]> Date: Tue Dec 10 17:17:14 2024 +0000 Format spaces in install docs (SeldonIO#6140) commit 7e6c8f1 Author: Sherif Akoush <[email protected]> Date: Tue Dec 10 16:32:37 2024 +0000 fix(docs): add a table for core 2 dependencies in docs (SeldonIO#6139) * add table for core 2 deps in dosc * review comments commit c1d320e Author: Niall D <[email protected]> Date: Tue Dec 10 16:16:55 2024 +0000 feat(scheduler): account for multiple instances of a model per server when scheduling (SeldonIO#6054) * just checking in whatever I have * testing all the code * remove comment * linting * document unused param * changing the proto around * use parallelWorkers instead of instanceCount for mlserver * comma * rename ModelConfig * use modelWithVersion as param commit a7bfb00 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon Dec 9 21:35:13 2024 +0000 Bump grafana/grafana from 11.3.1 to 11.4.0 in /scheduler (SeldonIO#6133) Bumps grafana/grafana from 11.3.1 to 11.4.0. --- updated-dependencies: - dependency-name: grafana/grafana dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> commit f129bd1 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon Dec 9 21:33:47 2024 +0000 Bump envoyproxy/envoy from v1.32.1 to v1.32.2 in /scheduler (SeldonIO#6134) Bumps envoyproxy/envoy from v1.32.1 to v1.32.2. --- updated-dependencies: - dependency-name: envoyproxy/envoy dependency-type: direct:production ... Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> commit 208791b Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon Dec 9 21:31:49 2024 +0000 Bump google.golang.org/grpc from 1.68.0 to 1.68.1 in /hodometer (SeldonIO#6136) Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.68.0 to 1.68.1. - [Release notes](https://github.com/grpc/grpc-go/releases) - [Commits](grpc/grpc-go@v1.68.0...v1.68.1) --- updated-dependencies: - dependency-name: google.golang.org/grpc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> commit 2abeb80 Author: Rajakavitha Kodhandapani <[email protected]> Date: Mon Dec 9 18:31:14 2024 +0530 fix(docs): first draft of the securing endpoints (SeldonIO#5991) * first draft of the securing endpoints * added the output * updated the policy name * added a note * Added context, minor grammar edits * Update docs-gb/models/securing-endpoints.md Co-authored-by: Rajakavitha Kodhandapani <[email protected]> * incorporate review suggestions * fixing the links * added an example for all models * removed the example to create a vs for all models * fixed formatting * formatting changes * Update securing-endpoints.md * added a link to the services meshes main docs page --------- Co-authored-by: Rakavitha Kodhandapani <[email protected]> Co-authored-by: Paul Bridi <[email protected]> Co-authored-by: paulb-seldon <[email protected]> commit 4125273 Author: Niall D <[email protected]> Date: Fri Dec 6 13:52:35 2024 +0000 refactor(envoy): moving envoy/resources headers to util (SeldonIO#6129) * moving headers to util * removing a newline * lint commit f284b4a Author: Sherif Akoush <[email protected]> Date: Fri Dec 6 09:45:15 2024 +0000 fix(cli): Kafka inspect output formatting (SeldonIO#6130) * add kafka inspect consumer timeout (-d) as parameter * add formatting commit 6d89d57 Author: Lucian Carata <[email protected]> Date: Fri Dec 6 01:51:54 2024 +0000 feat(docs): improve HPA documentation (SeldonIO#6091) * highlight constraints and limitations of a HPA-based approach * remove note on statefulsets being created sequentially - we are specifically configuring k8s to allow for parallel creation of statefulset pods. * highlight importance of the `metrics-relist-interval` setting * simplify config example to no longer use regex metric matches * clarify example using HPA label selectors * clarify the need to use the `AverageValue` target type * clarify the relation between query rate window size and prometheus scrape interval Merge branch 'v2' into INFRA-1420/add-clusters-before-updating-routes-part-2
- Loading branch information
Showing
54 changed files
with
1,715 additions
and
604 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# Securing model endpoints | ||
|
||
In enterprise use cases, you may need to control who can access the endpoints for deployed models or pipelines. You can leverage existing authentication mechanisms in your cluster or environment, such as service mesh-level controls, or use cloud provider solutions like Apigee on GCP, Amazon API Gateway on AWS, or a provider-agnostic gateway like Gravitee. Seldon Core 2 integrates with various [service meshes](../kubernetes/service-meshes/) that support these requirements. Though Seldon Core 2 is service-mesh agnostic, the example on this page demonstrates how to set up authentication and authorization to secure a model endpoint using the Istio service mesh. | ||
|
||
## Securing Endpoints with Istio | ||
|
||
Service meshes offer a flexible way of defining authentication and authorization rules for your models. With Istio, for example, you can configure multiple layers of security within an Istio Gateway, such as a [TLS for HTTPS at the gateway](https://istio.io/latest/docs/tasks/traffic-management/ingress/secure-ingress/#configure-a-tls-ingress-gateway-for-a-single-host) level, [mutual TLS (mTLS) for secure internal communication](https://istio.io/latest/docs/tasks/traffic-management/ingress/secure-ingress/#configure-a-mutual-tls-ingress-gateway), as well as [AuthorizationPolicies](https://istio.io/latest/docs/reference/config/security/authorization-policy/) and [RequestAuthentication](https://istio.io/latest/docs/reference/config/security/request_authentication/) policies to enforce both authentication and authorization controls. | ||
|
||
**Prerequisites** | ||
* [Deploy a model](../kubernetes/service-meshes/istio.md) | ||
* [Configure a gateway](../kubernetes/service-meshes/istio.md) | ||
* [Create a virtual service to expose the REST and gRPC endpoints](../kubernetes/service-meshes/istio.md) | ||
* Configure a OIDC provider to authenticate. Obtain the `issuer` url, `jwksUri`, and the `Access token` from the OIDC provider. | ||
{% hint style="info" %} | ||
**Note** There are many types of authorization policies that you can configure to enable access control on workloads in the mesh. | ||
{% endhint %} | ||
|
||
In the following example, you can secure the endpoint such that any requests to the endpoint without the access token are denied. | ||
|
||
To secure the endpoints of a model, you need to: | ||
1. Create a `RequestAuthentication` resource named `ingress-jwt-auth` in the `istio-system namespace`. Replace `<OIDC_TOKEN_ISSUER>` and `<OIDC_TOKEN_ISSUER_JWKS>` with your OIDC provider’s specific issuer URL and JWKS (JSON Web Key Set) URI. | ||
|
||
```yaml | ||
apiVersion: security.istio.io/v1beta1 | ||
kind: RequestAuthentication | ||
metadata: | ||
name: ingress-jwt-auth | ||
namespace: istio-system # This is the namespace where Istio Ingress Gateway usually resides | ||
spec: | ||
selector: | ||
matchLabels: | ||
istio: istio-ingressgateway # Apply to Istio Ingress Gateway pods | ||
jwtRules: | ||
- issuer: <OIDC_TOKEN_ISSUER> | ||
jwksUri: <OIDC_TOKEN_ISSUER_JWKS> | ||
``` | ||
Create the resource using `kubectl apply -f ingress-jwt-auth.yaml`. | ||
|
||
2. Create an authorization policy `deny-empty-jwt` in the namespace `istio-system`. | ||
|
||
```yaml | ||
apiVersion: security.istio.io/v1beta1 | ||
kind: AuthorizationPolicy | ||
metadata: | ||
name: deny-empty-jwt | ||
namespace: istio-system | ||
spec: | ||
action: DENY | ||
rules: | ||
- from: | ||
- source: | ||
notRequestPrincipals: | ||
- '*' # Denies requests without a valid JWT principal | ||
to: | ||
- operation: | ||
paths: | ||
- /v2/* # Applies to requests with this path pattern | ||
selector: | ||
matchLabels: | ||
app: istio-ingressgateway # Applies to Istio Ingress Gateway pods | ||
``` | ||
Create the resource using `kubectl apply -f deny-empty-jwt.yaml`. | ||
|
||
3. To verify that the requests without an access token are denied send this request: | ||
```bash | ||
curl -i http://$MESH_IP/v2/models/iris/infer \ | ||
-H "Content-Type: application/json" \ | ||
-H "seldon-model":iris \ | ||
-d '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' | ||
``` | ||
The output is similar to: | ||
```bash | ||
HTTP/1.1 403 Forbidden | ||
content-length: 19 | ||
content-type: text/plain | ||
date: Fri, 25 Oct 2024 11:14:33 GMT | ||
server: istio-envoy | ||
connection: close | ||
Closing connection 0 | ||
RBAC: access denied | ||
``` | ||
Now, send the same request with an access token: | ||
```bash | ||
curl -i http://$MESH_IP/v2/models/iris/infer \ | ||
-H "Content-Type: application/json" \ | ||
-H "Authorization: Bearer $ACCESS_TOKEN" \ | ||
-H "seldon-model":iris \ | ||
-d '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' | ||
``` | ||
The output is similar to: | ||
```bash | ||
HTTP/1.1 200 OK | ||
ce-endpoint: iris_1 | ||
ce-id: 2fb8a086-ee22-4285-9826-9d38111cbb9e | ||
ce-inferenceservicename: mlserver | ||
ce-modelid: iris_1 | ||
ce-namespace: seldon-mesh | ||
ce-requestid: 2fb8a086-ee22-4285-9826-9d38111cbb9e | ||
ce-source: io.seldon.serving.deployment.mlserver.seldon-mesh | ||
ce-specversion: 0.3 | ||
ce-type: io.seldon.serving.inference.response | ||
content-length: 213 | ||
content-type: application/json | ||
date: Fri, 25 Oct 2024 11:44:49 GMT | ||
server: envoy | ||
x-request-id: csdo9cbc2nks73dtlk3g | ||
x-envoy-upstream-service-time: 9 | ||
x-seldon-route: :iris_1: | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,7 +44,7 @@ func (r *ExperimentReconciler) handleFinalizer(ctx context.Context, logger logr. | |
// Add our finalizer | ||
if !utils.ContainsStr(experiment.ObjectMeta.Finalizers, constants.ExperimentFinalizerName) { | ||
experiment.ObjectMeta.Finalizers = append(experiment.ObjectMeta.Finalizers, constants.ExperimentFinalizerName) | ||
if err := r.Update(context.Background(), experiment); err != nil { | ||
if err := r.Update(ctx, experiment); err != nil { | ||
return true, err | ||
} | ||
} | ||
|
@@ -84,6 +84,8 @@ func (r *ExperimentReconciler) handleFinalizer(ctx context.Context, logger logr. | |
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile | ||
func (r *ExperimentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { | ||
logger := log.FromContext(ctx).WithName("Reconcile") | ||
ctx, cancel := context.WithTimeout(ctx, constants.ReconcileTimeout) | ||
defer cancel() | ||
|
||
experiment := &mlopsv1alpha1.Experiment{} | ||
if err := r.Get(ctx, req.NamespacedName, experiment); err != nil { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,7 +49,7 @@ func (r *PipelineReconciler) handleFinalizer( | |
// Add our finalizer | ||
if !utils.ContainsStr(pipeline.ObjectMeta.Finalizers, constants.PipelineFinalizerName) { | ||
pipeline.ObjectMeta.Finalizers = append(pipeline.ObjectMeta.Finalizers, constants.PipelineFinalizerName) | ||
if err := r.Update(context.Background(), pipeline); err != nil { | ||
if err := r.Update(ctx, pipeline); err != nil { | ||
return true, err | ||
} | ||
} | ||
|
@@ -94,6 +94,8 @@ func (r *PipelineReconciler) handleFinalizer( | |
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile | ||
func (r *PipelineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { | ||
logger := log.FromContext(ctx).WithName("Reconcile") | ||
ctx, cancel := context.WithTimeout(ctx, constants.ReconcileTimeout) | ||
defer cancel() | ||
|
||
pipeline := &mlopsv1alpha1.Pipeline{} | ||
if err := r.Get(ctx, req.NamespacedName, pipeline); err != nil { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,7 +65,7 @@ func (r *SeldonRuntimeReconciler) handleFinalizer(ctx context.Context, logger lo | |
// Add our finalizer | ||
if !utils.ContainsStr(runtime.ObjectMeta.Finalizers, constants.RuntimeFinalizerName) { | ||
runtime.ObjectMeta.Finalizers = append(runtime.ObjectMeta.Finalizers, constants.RuntimeFinalizerName) | ||
if err := r.Update(context.Background(), runtime); err != nil { | ||
if err := r.Update(ctx, runtime); err != nil { | ||
return true, err | ||
} | ||
} | ||
|
@@ -120,6 +120,8 @@ func (r *SeldonRuntimeReconciler) handleFinalizer(ctx context.Context, logger lo | |
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile | ||
func (r *SeldonRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { | ||
logger := log.FromContext(ctx).WithName("Reconcile") | ||
ctx, cancel := context.WithTimeout(ctx, constants.ReconcileTimeout) | ||
defer cancel() | ||
|
||
seldonRuntime := &mlopsv1alpha1.SeldonRuntime{} | ||
if err := r.Get(ctx, req.NamespacedName, seldonRuntime); err != nil { | ||
|
@@ -214,9 +216,11 @@ func (r *SeldonRuntimeReconciler) updateStatus(seldonRuntime *mlopsv1alpha1.Seld | |
// Find SeldonRuntimes that reference the changes SeldonConfig | ||
// TODO: pass an actual context from the caller to be used here | ||
func (r *SeldonRuntimeReconciler) mapSeldonRuntimesFromSeldonConfig(_ context.Context, obj client.Object) []reconcile.Request { | ||
logger := log.FromContext(context.Background()).WithName("mapSeldonRuntimesFromSeldonConfig") | ||
ctx, cancel := context.WithTimeout(context.Background(), constants.K8sAPICallsTxTimeout) | ||
defer cancel() | ||
logger := log.FromContext(ctx).WithName("mapSeldonRuntimesFromSeldonConfig") | ||
var seldonRuntimes mlopsv1alpha1.SeldonRuntimeList | ||
if err := r.Client.List(context.Background(), &seldonRuntimes); err != nil { | ||
if err := r.Client.List(ctx, &seldonRuntimes); err != nil { | ||
logger.Error(err, "error listing seldonRuntimes") | ||
return nil | ||
} | ||
|
Oops, something went wrong.