Skip to content

Commit

Permalink
Merge branch 'main' into robot-test-actions
Browse files Browse the repository at this point in the history
  • Loading branch information
rkubis authored Jan 17, 2024
2 parents 0fba52a + 97a976b commit 8a0d674
Show file tree
Hide file tree
Showing 22 changed files with 556 additions and 173 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ jobs:

- name: Upload documentation
if: matrix.session == 'docs-build'
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: docs
path: clients/python/docs/_build
2 changes: 1 addition & 1 deletion api/openapi/model-registry.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1576,7 +1576,7 @@ components:
runtime:
description: Model runtime.
type: string
state:
desiredState:
$ref: '#/components/schemas/InferenceServiceState'
InferenceServiceCreate:
description: >-
Expand Down
2 changes: 1 addition & 1 deletion clients/python/noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def mypy(session: Session) -> None:
def tests(session: Session) -> None:
"""Run the test suite."""
session.install(".")
session.install("coverage[toml]", "pytest", "pytest-cov", "pygments")
session.install("coverage[toml]", "pytest", "pytest-cov", "pygments", "testcontainers")
try:
session.run(
"pytest",
Expand Down
234 changes: 210 additions & 24 deletions clients/python/poetry.lock

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions clients/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ readme = "README.md"
python = ">= 3.9, < 3.11"
attrs = "^21.0"
ml-metadata = "^1.14.0"
# Testcontainers for python, remote mlmd grpc tested with:
# ml-metadata = { url = "https://github.com/tarilabs/ml-metadata/releases/download/1.14.0/ml_metadata-1.14.0-py3-none-any.whl" }
# or might consider as well:
# ml-metadata = { url = "https://github.com/tarilabs/ml-metadata-remote/releases/download/1.14.0/ml_metadata-1.14.0-py3-none-any.whl" }
typing-extensions = "^4.8"

[tool.poetry.group.dev.dependencies]
Expand All @@ -22,6 +26,7 @@ pytest-cov = "^4.1.0"
sphinx-autobuild = "^2021.3.14"
ruff = "^0.1.6"
mypy = "^1.7.0"
testcontainers = "^3.7.1"

[tool.coverage.run]
branch = true
Expand Down
86 changes: 81 additions & 5 deletions clients/python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,81 @@
import os
import time
from typing import Union

import pytest
from ml_metadata import errors, metadata_store
from ml_metadata.proto import (
ArtifactType,
ConnectionConfig,
ContextType,
metadata_store_pb2,
)
from ml_metadata.proto.metadata_store_pb2 import MetadataStoreClientConfig
from model_registry.core import ModelRegistryAPIClient
from model_registry.store.wrapper import MLMDStore
from model_registry.types import ModelArtifact, ModelVersion, RegisteredModel
from testcontainers.core.container import DockerContainer
from testcontainers.core.waiting_utils import wait_for_logs

ProtoTypeType = Union[ArtifactType, ContextType]

# ruff: noqa: PT021 supported
@pytest.fixture(scope="session")
def mlmd_conn(request) -> MetadataStoreClientConfig:
model_registry_root_dir = model_registry_root(request)
print("Assuming this is the Model Registry root directory:", model_registry_root_dir)
shared_volume = model_registry_root_dir / "test/config/ml-metadata"
sqlite_db_file = shared_volume / "metadata.sqlite.db"
if sqlite_db_file.exists():
msg = f"The file {sqlite_db_file} already exists; make sure to cancel it before running these tests."
raise FileExistsError(msg)
container = DockerContainer("gcr.io/tfx-oss-public/ml_metadata_store_server:1.14.0")
container.with_exposed_ports(8080)
container.with_volume_mapping(shared_volume, "/tmp/shared", "rw") # noqa this is file target in container
container.with_env("METADATA_STORE_SERVER_CONFIG_FILE", "/tmp/shared/conn_config.pb") # noqa this is target in container
container.start()
wait_for_logs(container, "Server listening on")
os.system('docker container ls --format "table {{.ID}}\t{{.Names}}\t{{.Ports}}" -a') # noqa governed test
print("waited for logs and port")
cfg = MetadataStoreClientConfig(
host = "localhost",
port = int(container.get_exposed_port(8080))
)
print(cfg)

# this callback is needed in order to perform the container.stop()
# removing this callback might result in mlmd container shutting down before the tests had chance to fully run,
# and resulting in grpc connection resets.
def teardown():
container.stop()
print("teardown of plain_wrapper completed.")

request.addfinalizer(teardown)

time.sleep(3) # allowing some time for mlmd grpc to fully stabilize (is "spent" once per pytest session anyway)
_throwaway_store = metadata_store.MetadataStore(cfg)
wait_for_grpc(container, _throwaway_store)

return cfg


def model_registry_root(request):
return (request.config.rootpath / "../..").resolve() # resolves to absolute path


@pytest.fixture()
def plain_wrapper() -> MLMDStore:
config = ConnectionConfig()
config.fake_database.SetInParent()
return MLMDStore(config)
def plain_wrapper(request, mlmd_conn: MetadataStoreClientConfig) -> MLMDStore:
sqlite_db_file = model_registry_root(request) / "test/config/ml-metadata/metadata.sqlite.db"
def teardown():
try:
os.remove(sqlite_db_file)
print(f"Removed {sqlite_db_file} successfully.")
except Exception as e:
print(f"An error occurred while removing {sqlite_db_file}: {e}")
print("plain_wrapper_after_each done.")

request.addfinalizer(teardown)

return MLMDStore(mlmd_conn)


def set_type_attrs(mlmd_obj: ProtoTypeType, name: str, props: list[str]):
Expand Down Expand Up @@ -78,3 +135,22 @@ def mr_api(store_wrapper: MLMDStore) -> ModelRegistryAPIClient:
mr = object.__new__(ModelRegistryAPIClient)
mr._store = store_wrapper
return mr


def wait_for_grpc(container: DockerContainer, store: metadata_store.MetadataStore, timeout=6, interval=2):
start = time.time()
while True:
duration = time.time() - start
results = None
try:
results = store.get_contexts()
except errors.UnavailableError as e:
print(e)
print("Container logs:\n", container.get_logs())
print("Container not ready. Retrying...")
if results is not None:
return duration
if timeout and duration > timeout:
raise TimeoutError("wait_for_grpc not ready %.3f seconds"
% timeout)
time.sleep(interval)
126 changes: 126 additions & 0 deletions doc/remote_only_packaging_of_MLMD_Python_lib.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Remote-only packaging of MLMD Python lib

## Context and Problem statement

Google’s ML Metadata (MLMD) is a project composed of a C++ server, and a Python client library.
The server exposes a gRPC interface, and is only distributed for x86-64 architectures.
It is embedded in the client's wheel binary, providing an additional convenience [method for running the server locally (in memory)](https://www.tensorflow.org/tfx/guide/mlmd#metadata_storage_backends_and_store_connection_configuration),
whilst also making it [architecture specific](https://pypi.org/project/ml-metadata/1.14.0/#files).

The [Model Registry project](https://docs.google.com/document/d/1G-pjdGaS2kLELsB5kYk_D4AmH-fTfnCnJOhJ8xENjx0/edit?usp=sharing) (MR) is built on top of MLMD.
The Go implementation interfaces with the MLMD server via gRPC, typically available as a Docker container.
The [MR Python client](https://github.com/opendatahub-io/model-registry/tree/main/clients/python#readme) wraps the MLMD client.

As the MLMD client is architecture specific, so is the MR Python client, which **severely limits the targets it can run on**, as it only supports x86-64.
This **poses many challenges to contributors** using other CPU architectures, specially ARM, as that's become more prevalent in recent years.

Since the Model Registry python client does _not_ leverage the embedded MLMD server's in-memory connection, we present options for a “soft-fork” and re-distribution of the MLMD client as a pure Python library, making it platform/architecture agnostic.

The resulting artifact, a MLMD python wheel supporting only remote gRPC connections at the same time being a “pure library” hence not requiring specific CPU architectures, OSes, or CPython versions, could be as well published on PyPI under the ODH org, and become the dependency of [MR python client](/clients/python/README.md).


## Goals

* consider required changes to “soft-fork” MLMD wheel to support only remote gRPC connections
* repackaging as a pure library


## Non-Goals

* “hard”-fork of MLMD
* maintaining original MLMD tests (those bound to embedded server)


## Proposed solution

Refer to the conclusions section for the motivations behind selecting:
1. soft-fork upstream repo, modify pip+bazel build, to produce the distributable Python client ("Alternative B", below)
2. create a `ml-metadata-remote` or similarly named package on PyPI based on the distributable wheel and files from the step1 above ("Packaging Option1", below)

For documentation purposes, the exploration of the different alternatives is reported below.


## Alternative A: repackage the resulting wheel

This solution explores the steps required to simply repackage the existing and distributed (by Google) MLMD python wheel, removing all embedded binaries, and repackaging the wheel as a pure library.

This has been experimented with success on this repository: ([link](https://github.com/tarilabs/ml-metadata-remote))

The steps required are recorded here: ([link](https://github.com/tarilabs/ml-metadata-remote/commits/v1.14.0))

and mainly consists of:

1. Download one platform-specific MLMD v1.14.0 wheel and extract its content ([reference](https://github.com/tarilabs/ml-metadata-remote/commit/39dd0c7dcd063e0440a6354017445dada8423f0c#diff-b335630551682c19a781afebcf4d07bf978fb1f8ac04c6bf87428ed5106870f5))
2. Remove embedded code, apply required code changes ([reference](https://github.com/tarilabs/ml-metadata-remote/commit/bcb1f0ffd37600e056342aff39e154bb35422668#diff-f363c85a1cf3536a48a7b721b02a6999b80a08b9c305d185327e87e2769b6f21))
3. Recompute dist-info checksums before repackaging ([reference](https://github.com/tarilabs/ml-metadata-remote/commit/fda125fb742ab8ecf4a7153705717d8b50f59326#diff-53bdc596caf062825dbb42b65e5b2305db70d2e533c03bc677b13cc8c7cfd236))
4. Repackage the directories as a new pure library wheel ([reference](https://github.com/tarilabs/ml-metadata-remote/commit/5d199f808eea0cb7ba78a0702be8de3306477df8))

The resulting artifact has been [tested](https://github.com/tarilabs/ml-metadata-remote#readme:~:text=Testing%20with%20launching%20a%20local%20server) locally with a gRPC connection to MLMD server made available via Docker. The resulting artifact is directly available for local download: ([link](https://github.com/tarilabs/ml-metadata-remote/releases/tag/1.14.0))


## Alternative B: build by soft-fork upstream repo

This solution explores how to use the upstream MLMD repo by Google and by making necessary code changes, so to directly produce with the pip+bazel build the wheel as a pure library.

This has been experimented with success on this fork: ([link](https://github.com/tarilabs/ml-metadata/commits/remote-r1.14.0))

The steps required mainly consists of:

1. Make changes to the bazel BUILD file ([reference](https://github.com/tarilabs/ml-metadata/commit/079aeb3a9da69eb960e428a7866e279d0bfb533b#diff-c8858dec4f58c1d8a280af8c117ff8480f7ed4ae863b96e1ba20b52f83222aab))
2. Make changes to sh build script ([reference](https://github.com/tarilabs/ml-metadata/commit/079aeb3a9da69eb960e428a7866e279d0bfb533b#diff-125a2f247ce39f711e1c8a77f430bd5b1b865cd10b5c5fef0d9140d276c617f2))
3. Make changes to setup.py build file ([reference](https://github.com/tarilabs/ml-metadata/commit/079aeb3a9da69eb960e428a7866e279d0bfb533b#diff-60f61ab7a8d1910d86d9fda2261620314edcae5894d5aaa236b821c7256badd7))
4. Apply required code changes analogously to “Alternative A” (see other changes in [this commit](https://github.com/tarilabs/ml-metadata/commit/079aeb3a9da69eb960e428a7866e279d0bfb533b))

The resulting artifact has been [tested](https://github.com/tarilabs/ml-metadata/commit/794ec39d97e3ac70db2ca18fcf5807c44f339f0b) locally with a gRPC connection to MLMD server made available via Docker, similar to instructions provided in “Alternative A”.


## Packaging

In this section we consider packaging and delivery options for the resulting artifact from the alternative selected above.


### Packaging Option1: separate repo on ODH

This delivery option considers having a separate repo on ODH, called “ml-metadata-remote” (or the likes). Repeat the exercise from the alternative selected above on this repo. Then deliver this as a package on PyPI.

Pros:

* Well isolated dependency
* also, if one day upstream Google MLMD resolves to be platform/arch agnostic, is just a matter of changing again the consumed dependency from MR python client
* Google code (copyright header) is isolated from Model Registry code
* The resulting artifact could also be re-used by other communities/users

Cons:

* Additional artifact to publish on PyPI


### Packaging Option2: mix resulting artifact inside Model Registry repo

This delivery option considers placing the resulting artifact by executing the exercise from the alternative selected above and placing it directly inside the Model Registry repo, with the python client source [location](https://github.com/opendatahub-io/model-registry/tree/main/clients/python). (for analogy, this is similar to “shading”/”uberjar” in Java world for those familiar with the concept)

Pros:

* Only one artifact to publish on PyPI

Cons:

* Google code (copyright header) is mixed with Model Registry code
* at this stage is not clear if any implications with uplifting the MR project in KF community
* The resulting artifact cannot be re-used by other communities/users
* If one day upstream Google MLMD resolves to be platform/arch agnostic, changing back the MR python client to use the original ml-metadata could require extra work and effort


## Conclusion

Based on analysis of the alternatives provided, we decided to further pursue:
- the repackaging by **Alternative B** because makes it actually easier to demonstrate the steps and modifications required using as baseline the upstream repo.
- the distribution by **Packaging Option1** because it will make it easier to "revert" to the upstream `ml-metadata` dependency if upstream will publish for all architectures, OSes, etc. and as the pros outweight considered cons.

MR python client [tests](https://github.com/opendatahub-io/model-registry/blob/259b39320953bf05942dcec1fb5ec74f7eb5d4a7/clients/python/tests/conftest.py#L19) should be rewritten using Testcontainers, and not leveraging the embedded server (already done with [this PR](https://github.com/opendatahub-io/model-registry/pull/225)).

The group concur this is a sensible approach ([recorded here](https://redhat-internal.slack.com/archives/C05LGBNUK9C/p1700763823505259?thread_ts=1700427888.670999&cid=C05LGBNUK9C)).

This change would also better align the test approach used for the MR python client, by aligning with the same strategy of the MR core Go layer test framework, which already makes use of Testcontainers for Go ([reference](https://github.com/opendatahub-io/model-registry/blob/259b39320953bf05942dcec1fb5ec74f7eb5d4a7/internal/testutils/test_container_utils.go#L59)).

This would allow to update the constraint on the version for the `attrs` dependency as part of this activity.
14 changes: 7 additions & 7 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@ module github.com/opendatahub-io/model-registry
go 1.19

require (
github.com/go-chi/chi/v5 v5.0.10
github.com/go-chi/chi/v5 v5.0.11
github.com/go-chi/cors v1.2.1
github.com/golang/glog v1.2.0
github.com/spf13/cobra v1.8.0
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.18.1
github.com/spf13/viper v1.18.2
github.com/stretchr/testify v1.8.4
github.com/testcontainers/testcontainers-go v0.26.0
google.golang.org/grpc v1.59.0
google.golang.org/protobuf v1.31.0
google.golang.org/grpc v1.60.1
google.golang.org/protobuf v1.32.0
)

require (
Expand All @@ -31,9 +31,9 @@ require (
dario.cat/mergo v1.0.0 // indirect
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
github.com/Microsoft/go-winio v0.6.1 // indirect
github.com/Microsoft/hcsshim v0.11.1 // indirect
github.com/Microsoft/hcsshim v0.11.4 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/containerd/containerd v1.7.7 // indirect
github.com/containerd/containerd v1.7.11 // indirect
github.com/cpuguy83/dockercfg v0.3.1 // indirect
github.com/docker/distribution v2.8.2+incompatible // indirect
github.com/docker/docker v24.0.7+incompatible // indirect
Expand All @@ -43,7 +43,7 @@ require (
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/uuid v1.4.0
github.com/google/uuid v1.5.0
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/klauspost/compress v1.17.0 // indirect
Expand Down
Loading

0 comments on commit 8a0d674

Please sign in to comment.