diff --git a/Makefile b/Makefile index 5803fa4..3329828 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ test: .PHONY: test-e2e test-e2e: - poetry run pytest --e2e -s -x -rA + poetry run pytest --e2e -s -x -rA -v .PHONY: test-e2e-model-registry test-e2e-model-registry: diff --git a/e2e/quay-lite/config.yaml b/e2e/quay-lite/config.yaml index 72fd8cc..de87311 100644 --- a/e2e/quay-lite/config.yaml +++ b/e2e/quay-lite/config.yaml @@ -4,8 +4,8 @@ DATA_MODEL_CACHE_CONFIG: primary: host: quay-redis SUPER_USERS: -- admin -- user1 + - admin + - user1 AUTHENTICATION_TYPE: Database DB_URI: postgresql://quay:quay@quay-postgresql:5432/quay BUILDLOGS_REDIS: @@ -15,15 +15,15 @@ USER_EVENTS_REDIS: host: quay-redis port: 6379 BITTORRENT_FILENAME_PEPPER: 0ee18f90-5b6d-42d2-ab5e-ec9fcd846272 -DATABASE_SECRET_KEY: '30060361640793187613697366923211113205676925445650250274752125083971638376224' +DATABASE_SECRET_KEY: "30060361640793187613697366923211113205676925445650250274752125083971638376224" DEFAULT_TAG_EXPIRATION: 2w DISTRIBUTED_STORAGE_CONFIG: default: - - LocalStorage - - storage_path: /datastorage/registry + - LocalStorage + - storage_path: /datastorage/registry DISTRIBUTED_STORAGE_DEFAULT_LOCATIONS: [] DISTRIBUTED_STORAGE_PREFERENCE: -- default + - default ENTERPRISE_LOGO_URL: /static/img/quay-horizontal-color.svg EXTERNAL_TLS_TERMINATION: true FEATURE_ANONYMOUS_ACCESS: true @@ -59,13 +59,13 @@ REPO_MIRROR_TLS_VERIFY: true SETUP_COMPLETE: true SIGNING_ENGINE: gpg2 TAG_EXPIRATION_OPTIONS: -- 0s -- 1d -- 1w -- 2w -- 4w + - 0s + - 1d + - 1w + - 2w + - 4w TEAM_RESYNC_STALE_TIME: 60m -TESTING: false +TESTING: true USERFILES_LOCATION: default USERFILES_PATH: userfiles/ USE_CDN: false @@ -77,4 +77,4 @@ CORS_ORIGIN: - "http://localhost:9000" FEATURE_UI_V2: True FEATURE_USER_METADATA: True -IGNORE_UNKNOWN_MEDIATYPES: True +IGNORE_UNKNOWN_MEDIATYPES: false diff --git a/e2e/quay-lite/quay-app-config.yaml b/e2e/quay-lite/quay-app-config.yaml index 293a324..e91bb5c 100644 --- a/e2e/quay-lite/quay-app-config.yaml +++ b/e2e/quay-lite/quay-app-config.yaml @@ -4,4 +4,4 @@ metadata: name: quay-app-config type: Opaque data: - config.yaml: REFUQV9NT0RFTF9DQUNIRV9DT05GSUc6CiAgZW5naW5lOiByZWRpcwogIHJlZGlzX2NvbmZpZzoKICAgIHByaW1hcnk6CiAgICAgIGhvc3Q6IHF1YXktcmVkaXMKU1VQRVJfVVNFUlM6Ci0gYWRtaW4KLSB1c2VyMQpBVVRIRU5USUNBVElPTl9UWVBFOiBEYXRhYmFzZQpEQl9VUkk6IHBvc3RncmVzcWw6Ly9xdWF5OnF1YXlAcXVheS1wb3N0Z3Jlc3FsOjU0MzIvcXVheQpCVUlMRExPR1NfUkVESVM6CiAgaG9zdDogcXVheS1yZWRpcwogIHBvcnQ6IDYzNzkKVVNFUl9FVkVOVFNfUkVESVM6CiAgaG9zdDogcXVheS1yZWRpcwogIHBvcnQ6IDYzNzkKQklUVE9SUkVOVF9GSUxFTkFNRV9QRVBQRVI6IDBlZTE4ZjkwLTViNmQtNDJkMi1hYjVlLWVjOWZjZDg0NjI3MgpEQVRBQkFTRV9TRUNSRVRfS0VZOiAnMzAwNjAzNjE2NDA3OTMxODc2MTM2OTczNjY5MjMyMTExMTMyMDU2NzY5MjU0NDU2NTAyNTAyNzQ3NTIxMjUwODM5NzE2MzgzNzYyMjQnCkRFRkFVTFRfVEFHX0VYUElSQVRJT046IDJ3CkRJU1RSSUJVVEVEX1NUT1JBR0VfQ09ORklHOgogIGRlZmF1bHQ6CiAgLSBMb2NhbFN0b3JhZ2UKICAtIHN0b3JhZ2VfcGF0aDogL2RhdGFzdG9yYWdlL3JlZ2lzdHJ5CkRJU1RSSUJVVEVEX1NUT1JBR0VfREVGQVVMVF9MT0NBVElPTlM6IFtdCkRJU1RSSUJVVEVEX1NUT1JBR0VfUFJFRkVSRU5DRToKLSBkZWZhdWx0CkVOVEVSUFJJU0VfTE9HT19VUkw6IC9zdGF0aWMvaW1nL3F1YXktaG9yaXpvbnRhbC1jb2xvci5zdmcKRVhURVJOQUxfVExTX1RFUk1JTkFUSU9OOiB0cnVlCkZFQVRVUkVfQU5PTllNT1VTX0FDQ0VTUzogdHJ1ZQpGRUFUVVJFX0FQUF9SRUdJU1RSWTogZmFsc2UKRkVBVFVSRV9BUFBfU1BFQ0lGSUNfVE9LRU5TOiB0cnVlCkZFQVRVUkVfQlVJTERfU1VQUE9SVDogZmFsc2UKRkVBVFVSRV9DSEFOR0VfVEFHX0VYUElSQVRJT046IHRydWUKRkVBVFVSRV9ESVJFQ1RfTE9HSU46IHRydWUKRkVBVFVSRV9NQUlMSU5HOiBmYWxzZQpGRUFUVVJFX1BBUlRJQUxfVVNFUl9BVVRPQ09NUExFVEU6IHRydWUKRkVBVFVSRV9SRVBPX01JUlJPUjogZmFsc2UKRkVBVFVSRV9SRVFVSVJFX1RFQU1fSU5WSVRFOiB0cnVlCkZFQVRVUkVfUkVTVFJJQ1RFRF9WMV9QVVNIOiBmYWxzZQpGRUFUVVJFX1NFQ1VSSVRZX05PVElGSUNBVElPTlM6IGZhbHNlCkZFQVRVUkVfU0VDVVJJVFlfU0NBTk5FUjogZmFsc2UKRkVBVFVSRV9VU0VSTkFNRV9DT05GSVJNQVRJT046IHRydWUKRkVBVFVSRV9VU0VSX0lOSVRJQUxJWkU6IHRydWUKRkVBVFVSRV9VU0VSX0NSRUFUSU9OOiB0cnVlCkZFQVRVUkVfVVNFUl9MT0dfQUNDRVNTOiB0cnVlCkZFQVRVUkVfUFJPWFlfQ0FDSEU6IHRydWUKR0lUSFVCX0xPR0lOX0NPTkZJRzoge30KR0lUSFVCX1RSSUdHRVJfQ09ORklHOiB7fQpHSVRMQUJfVFJJR0dFUl9LSU5EOiB7fQpMT0dfQVJDSElWRV9MT0NBVElPTjogZGVmYXVsdApNQUlMX0RFRkFVTFRfU0VOREVSOiBhZG1pbkBleGFtcGxlLmNvbQpNQUlMX1BPUlQ6IDU4NwpNQUlMX1VTRV9UTFM6IHRydWUKUFJFRkVSUkVEX1VSTF9TQ0hFTUU6IGh0dHAKUkVHSVNUUllfVElUTEU6IFJlZCBIYXQgUXVheSBMSVRFClJFR0lTVFJZX1RJVExFX1NIT1JUOiBSZWQgSGF0IFF1YXkgTElURQpSRVBPX01JUlJPUl9TRVJWRVJfSE9TVE5BTUU6IG51bGwKUkVQT19NSVJST1JfVExTX1ZFUklGWTogdHJ1ZQpTRVRVUF9DT01QTEVURTogdHJ1ZQpTSUdOSU5HX0VOR0lORTogZ3BnMgpUQUdfRVhQSVJBVElPTl9PUFRJT05TOgotIDBzCi0gMWQKLSAxdwotIDJ3Ci0gNHcKVEVBTV9SRVNZTkNfU1RBTEVfVElNRTogNjBtClRFU1RJTkc6IGZhbHNlClVTRVJGSUxFU19MT0NBVElPTjogZGVmYXVsdApVU0VSRklMRVNfUEFUSDogdXNlcmZpbGVzLwpVU0VfQ0ROOiBmYWxzZQpGRUFUVVJFX1FVT1RBX01BTkFHRU1FTlQ6IFRydWUKU0VSVkVSX0hPU1ROQU1FOiBsb2NhbGhvc3Q6NTAwMQpCUk9XU0VSX0FQSV9DQUxMU19YSFJfT05MWTogRmFsc2UKQ09SU19PUklHSU46CiAgLSAiaHR0cHM6Ly9zdGFnZS5mb28ucmVkaGF0LmNvbToxMzM3IgogIC0gImh0dHA6Ly9sb2NhbGhvc3Q6OTAwMCIKRkVBVFVSRV9VSV9WMjogVHJ1ZQpGRUFUVVJFX1VTRVJfTUVUQURBVEE6IFRydWUKSUdOT1JFX1VOS05PV05fTUVESUFUWVBFUzogVHJ1ZQo= + config.yaml: REFUQV9NT0RFTF9DQUNIRV9DT05GSUc6CiAgZW5naW5lOiByZWRpcwogIHJlZGlzX2NvbmZpZzoKICAgIHByaW1hcnk6CiAgICAgIGhvc3Q6IHF1YXktcmVkaXMKU1VQRVJfVVNFUlM6CiAgLSBhZG1pbgogIC0gdXNlcjEKQVVUSEVOVElDQVRJT05fVFlQRTogRGF0YWJhc2UKREJfVVJJOiBwb3N0Z3Jlc3FsOi8vcXVheTpxdWF5QHF1YXktcG9zdGdyZXNxbDo1NDMyL3F1YXkKQlVJTERMT0dTX1JFRElTOgogIGhvc3Q6IHF1YXktcmVkaXMKICBwb3J0OiA2Mzc5ClVTRVJfRVZFTlRTX1JFRElTOgogIGhvc3Q6IHF1YXktcmVkaXMKICBwb3J0OiA2Mzc5CkJJVFRPUlJFTlRfRklMRU5BTUVfUEVQUEVSOiAwZWUxOGY5MC01YjZkLTQyZDItYWI1ZS1lYzlmY2Q4NDYyNzIKREFUQUJBU0VfU0VDUkVUX0tFWTogIjMwMDYwMzYxNjQwNzkzMTg3NjEzNjk3MzY2OTIzMjExMTEzMjA1Njc2OTI1NDQ1NjUwMjUwMjc0NzUyMTI1MDgzOTcxNjM4Mzc2MjI0IgpERUZBVUxUX1RBR19FWFBJUkFUSU9OOiAydwpESVNUUklCVVRFRF9TVE9SQUdFX0NPTkZJRzoKICBkZWZhdWx0OgogICAgLSBMb2NhbFN0b3JhZ2UKICAgIC0gc3RvcmFnZV9wYXRoOiAvZGF0YXN0b3JhZ2UvcmVnaXN0cnkKRElTVFJJQlVURURfU1RPUkFHRV9ERUZBVUxUX0xPQ0FUSU9OUzogW10KRElTVFJJQlVURURfU1RPUkFHRV9QUkVGRVJFTkNFOgogIC0gZGVmYXVsdApFTlRFUlBSSVNFX0xPR09fVVJMOiAvc3RhdGljL2ltZy9xdWF5LWhvcml6b250YWwtY29sb3Iuc3ZnCkVYVEVSTkFMX1RMU19URVJNSU5BVElPTjogdHJ1ZQpGRUFUVVJFX0FOT05ZTU9VU19BQ0NFU1M6IHRydWUKRkVBVFVSRV9BUFBfUkVHSVNUUlk6IGZhbHNlCkZFQVRVUkVfQVBQX1NQRUNJRklDX1RPS0VOUzogdHJ1ZQpGRUFUVVJFX0JVSUxEX1NVUFBPUlQ6IGZhbHNlCkZFQVRVUkVfQ0hBTkdFX1RBR19FWFBJUkFUSU9OOiB0cnVlCkZFQVRVUkVfRElSRUNUX0xPR0lOOiB0cnVlCkZFQVRVUkVfTUFJTElORzogZmFsc2UKRkVBVFVSRV9QQVJUSUFMX1VTRVJfQVVUT0NPTVBMRVRFOiB0cnVlCkZFQVRVUkVfUkVQT19NSVJST1I6IGZhbHNlCkZFQVRVUkVfUkVRVUlSRV9URUFNX0lOVklURTogdHJ1ZQpGRUFUVVJFX1JFU1RSSUNURURfVjFfUFVTSDogZmFsc2UKRkVBVFVSRV9TRUNVUklUWV9OT1RJRklDQVRJT05TOiBmYWxzZQpGRUFUVVJFX1NFQ1VSSVRZX1NDQU5ORVI6IGZhbHNlCkZFQVRVUkVfVVNFUk5BTUVfQ09ORklSTUFUSU9OOiB0cnVlCkZFQVRVUkVfVVNFUl9JTklUSUFMSVpFOiB0cnVlCkZFQVRVUkVfVVNFUl9DUkVBVElPTjogdHJ1ZQpGRUFUVVJFX1VTRVJfTE9HX0FDQ0VTUzogdHJ1ZQpGRUFUVVJFX1BST1hZX0NBQ0hFOiB0cnVlCkdJVEhVQl9MT0dJTl9DT05GSUc6IHt9CkdJVEhVQl9UUklHR0VSX0NPTkZJRzoge30KR0lUTEFCX1RSSUdHRVJfS0lORDoge30KTE9HX0FSQ0hJVkVfTE9DQVRJT046IGRlZmF1bHQKTUFJTF9ERUZBVUxUX1NFTkRFUjogYWRtaW5AZXhhbXBsZS5jb20KTUFJTF9QT1JUOiA1ODcKTUFJTF9VU0VfVExTOiB0cnVlClBSRUZFUlJFRF9VUkxfU0NIRU1FOiBodHRwClJFR0lTVFJZX1RJVExFOiBSZWQgSGF0IFF1YXkgTElURQpSRUdJU1RSWV9USVRMRV9TSE9SVDogUmVkIEhhdCBRdWF5IExJVEUKUkVQT19NSVJST1JfU0VSVkVSX0hPU1ROQU1FOiBudWxsClJFUE9fTUlSUk9SX1RMU19WRVJJRlk6IHRydWUKU0VUVVBfQ09NUExFVEU6IHRydWUKU0lHTklOR19FTkdJTkU6IGdwZzIKVEFHX0VYUElSQVRJT05fT1BUSU9OUzoKICAtIDBzCiAgLSAxZAogIC0gMXcKICAtIDJ3CiAgLSA0dwpURUFNX1JFU1lOQ19TVEFMRV9USU1FOiA2MG0KVEVTVElORzogdHJ1ZQpVU0VSRklMRVNfTE9DQVRJT046IGRlZmF1bHQKVVNFUkZJTEVTX1BBVEg6IHVzZXJmaWxlcy8KVVNFX0NETjogZmFsc2UKRkVBVFVSRV9RVU9UQV9NQU5BR0VNRU5UOiBUcnVlClNFUlZFUl9IT1NUTkFNRTogbG9jYWxob3N0OjUwMDEKQlJPV1NFUl9BUElfQ0FMTFNfWEhSX09OTFk6IEZhbHNlCkNPUlNfT1JJR0lOOgogIC0gImh0dHBzOi8vc3RhZ2UuZm9vLnJlZGhhdC5jb206MTMzNyIKICAtICJodHRwOi8vbG9jYWxob3N0OjkwMDAiCkZFQVRVUkVfVUlfVjI6IFRydWUKRkVBVFVSRV9VU0VSX01FVEFEQVRBOiBUcnVlCklHTk9SRV9VTktOT1dOX01FRElBVFlQRVM6IGZhbHNlCg== diff --git a/e2e/test_cli.sh b/e2e/test_cli.sh index 4e8954d..e6bf124 100755 --- a/e2e/test_cli.sh +++ b/e2e/test_cli.sh @@ -1,6 +1,5 @@ -#!/bin/bash +#!/usr/bin/env bash -SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")" set -e echo "Preparing venv ..." @@ -15,21 +14,23 @@ echo "Running E2E test for CLI ..." omlmd push localhost:5001/mmortari/mlartifact:v1 README.md --empty-metadata --plain-http omlmd push localhost:5001/mmortari/mlartifact:v1 README.md --metadata tests/data/md.json --plain-http -omlmd pull localhost:5001/mmortari/mlartifact:v1 -o tmp/a --plain-http -file_count=$(find "tmp/a" -type f | wc -l) +DIR="tmp/a" +omlmd pull localhost:5001/mmortari/mlartifact:v1 -o "$DIR" --plain-http +file_count=$(find "$DIR" -type f | wc -l) if [ "$file_count" -eq 3 ]; then echo "Expected 3 files in $DIR, ok." else - echo "I was expecting 3 files in $DIR, FAIL." + echo "Expected 3 files in $DIR, got $file_count, FAIL." exit 1 fi -omlmd pull localhost:5001/mmortari/mlartifact:v1 -o tmp/b --media-types "application/x-mlmodel" --plain-http -file_count=$(find "tmp/b" -type f | wc -l) +DIR="tmp/b" +omlmd pull localhost:5001/mmortari/mlartifact:v1 -o "$DIR" --media-types "application/vnd.oci.image.layer.v1.tar" --plain-http +file_count=$(find "$DIR" -type f | wc -l) if [ "$file_count" -eq 1 ]; then - echo "Expected 1 files in $DIR, ok." + echo "Expected 1 file in $DIR, ok." else - echo "I was expecting 1 files in $DIR, FAIL." + echo "Expected 1 file in $DIR, got $file_count, FAIL." exit 1 fi @@ -38,7 +39,7 @@ omlmd crawl localhost:5001/mmortari/mlartifact:v1 localhost:5001/mmortari/mlarti omlmd crawl --plain-http \ localhost:5001/mmortari/mlartifact:v1 \ localhost:5001/mmortari/mlartifact:v1 \ - localhost:5001/mmortari/mlartifact:v1 \ - | jq "max_by(.config.customProperties.accuracy).reference" + localhost:5001/mmortari/mlartifact:v1 | + jq "max_by(.config.customProperties.accuracy).reference" deactivate diff --git a/omlmd/cli.py b/omlmd/cli.py index 8d66eca..20a7400 100644 --- a/omlmd/cli.py +++ b/omlmd/cli.py @@ -73,6 +73,11 @@ def crawl(plain_http: bool, targets: tuple[str]): required=True, type=click.Path(path_type=Path, exists=True, resolve_path=True), ) +@click.option( + "--as-artifact", + is_flag=True, + help="Push as an artifact (default is as a blob)", +) @cloup.option_group( "Metadata options", cloup.option( @@ -88,6 +93,7 @@ def push( plain_http: bool, target: str, path: Path, + as_artifact: bool, metadata: Path | None, empty_metadata: bool, ): @@ -96,4 +102,6 @@ def push( if empty_metadata: logger.warning(f"Pushing to {target} with empty metadata.") md = deserialize_mdfile(metadata) if metadata else {} - click.echo(Helper.from_default_registry(plain_http).push(target, path, **md)) + click.echo( + Helper.from_default_registry(plain_http).push(target, path, as_artifact, **md) + ) diff --git a/omlmd/constants.py b/omlmd/constants.py index 0354377..d20dad0 100644 --- a/omlmd/constants.py +++ b/omlmd/constants.py @@ -1,4 +1,7 @@ +from oras.defaults import default_blob_media_type + FILENAME_METADATA_JSON = "model_metadata.omlmd.json" -FILENAME_METADATA_YAML = "model_metadata.omlmd.yaml" -MIME_APPLICATION_CONFIG = "application/x-config" MIME_APPLICATION_MLMODEL = "application/x-mlmodel" +MIME_APPLICATION_MLMETADATA = "application/x-mlmetadata+json" +MIME_BLOB = default_blob_media_type +MIME_MANIFEST_CONFIG = "application/vnd.oci.image.config.v1+json" diff --git a/omlmd/helpers.py b/omlmd/helpers.py index 4a254f3..0caef96 100644 --- a/omlmd/helpers.py +++ b/omlmd/helpers.py @@ -1,17 +1,23 @@ from __future__ import annotations +import json import logging import os +import platform +import tarfile +import typing as t import urllib.request from collections.abc import Sequence -from dataclasses import dataclass, field, fields +from dataclasses import dataclass, field from pathlib import Path +from textwrap import dedent from .constants import ( FILENAME_METADATA_JSON, - FILENAME_METADATA_YAML, - MIME_APPLICATION_CONFIG, + MIME_APPLICATION_MLMETADATA, MIME_APPLICATION_MLMODEL, + MIME_BLOB, + MIME_MANIFEST_CONFIG, ) from .listener import Event, Listener, PushEvent from .model_metadata import ModelMetadata @@ -20,6 +26,54 @@ logger = logging.getLogger(__name__) +@dataclass +class DeferredLayer: + src: Path + dest: Path + media_type: str + transform: t.Callable[[], None] | None = None + owned: bool = True + + def __post_init__(self): + if self.dest.exists(): + self.owned = False + + @classmethod + def raw(cls, src: Path, media_type: str) -> DeferredLayer: + return cls(src, src, media_type) + + @classmethod + def blob(cls, src: Path, gz: bool = False) -> DeferredLayer: + oflag = "w" + media_type = MIME_BLOB + if gz: + oflag += ":gz" + media_type += "+gzip" + + dest = src.with_suffix(".tar") + + def _tar(): + with tarfile.open(dest, oflag) as tf: + tf.add(src, arcname=src.name) + + return cls(src, dest, media_type, _tar) + + def as_layer(self) -> str: + if self.owned and self.transform: + self.transform() + return f"{self.dest}:{self.media_type}" + + +def get_arch() -> str: + mac = platform.machine() + if mac == "x86_64": + return "amd64" + if mac == "arm64" or mac == "aarch64": + return "arm64" + msg = f"Unsupported architecture: {mac}" + raise NotImplementedError(msg) + + def download_file(uri: str): file_name = os.path.basename(uri) urllib.request.urlretrieve(uri, file_name) @@ -41,70 +95,82 @@ def push( self, target: str, path: Path | str, - name: str | None = None, - description: str | None = None, - author: str | None = None, - model_format_name: str | None = None, - model_format_version: str | None = None, + as_artifact: bool = False, **kwargs, ): - dataclass_fields = { - f.name for f in fields(ModelMetadata) - } # avoid anything specified in kwargs which would collide - custom_properties = { - k: v for k, v in kwargs.items() if k not in dataclass_fields - } - model_metadata = ModelMetadata( - name=name, - description=description, - author=author, - customProperties=custom_properties, - model_format_name=model_format_name, - model_format_version=model_format_version, - ) - owns_meta_files = True + owns_meta = True if isinstance(path, str): path = Path(path) - json_meta = path.parent / FILENAME_METADATA_JSON - yaml_meta = path.parent / FILENAME_METADATA_YAML - if model_metadata.is_empty() and json_meta.exists() and yaml_meta.exists(): - owns_meta_files = False - logger.warning("No metadata supplied, but reusing md files found in path.") - logger.debug(f"{json_meta}, {yaml_meta}") - with open(json_meta, "r") as f: - model_metadata = ModelMetadata.from_json(f.read()) - elif (p := json_meta).exists() or (p := yaml_meta).exists(): - raise RuntimeError( - f"File '{p}' already exists. Aborting TODO: demonstrator." + meta_path = path.parent / FILENAME_METADATA_JSON + if meta_path.exists(): + owns_meta = False + logger.warning("Reusing intermediate metadata files.") + logger.debug(f"{meta_path}") + model_metadata = ModelMetadata(**json.loads(meta_path.read_bytes())) + if kwargs and ModelMetadata.from_dict(kwargs) != model_metadata: + err = dedent(f""" + OMLMD intermediate metadata files found at '{meta_path}'. + Cannot resolve with conflicting keyword args: {kwargs}. + You can reuse the existing metadata by omitting any keywords. + If that was NOT intended, please REMOVE that file from your environment before re-running. + + Note for advanced users: if merging keys with existing metadata is desired, you should create a Feature Request upstream: https://github.com/containers/omlmd""") + raise RuntimeError(err) + else: + model_metadata = ModelMetadata.from_dict(kwargs) + meta_path.write_text(json.dumps(model_metadata.to_dict())) + + manifest_path = path.parent / "manifest.json" + model: DeferredLayer | None = None + meta: DeferredLayer | None = None + if not as_artifact: + manifest_path.write_text( + json.dumps( + { + "architecture": get_arch(), + "os": "linux", + } + ) ) + config = DeferredLayer.raw(manifest_path, MIME_MANIFEST_CONFIG) + model = DeferredLayer.blob(path) + meta = DeferredLayer.blob(meta_path, gz=True) else: - json_meta.write_text(model_metadata.to_json()) - yaml_meta.write_text(model_metadata.to_yaml()) - - manifest_cfg = f"{json_meta}:{MIME_APPLICATION_CONFIG}" - files = [ - f"{path}:{MIME_APPLICATION_MLMODEL}", - manifest_cfg, - f"{yaml_meta}:{MIME_APPLICATION_CONFIG}", + manifest_path.write_text( + json.dumps( + { + "artifactType": MIME_APPLICATION_MLMODEL, + } + ) + ) + config = DeferredLayer.raw(manifest_path, MIME_APPLICATION_MLMODEL) + model = DeferredLayer.raw(path, MIME_APPLICATION_MLMODEL) + meta = DeferredLayer.raw(meta_path, MIME_APPLICATION_MLMETADATA) + meta.owned = owns_meta + + layers = [ + config, + model, + meta, ] try: - # print(target, files, model_metadata.to_annotations_dict()) result = self._registry.push( target=target, - files=files, + files=[lay.as_layer() for lay in layers], manifest_annotations=model_metadata.to_annotations_dict(), - manifest_config=manifest_cfg, + manifest_config=config.as_layer(), do_chunked=True, ) - self.notify_listeners( - PushEvent.from_response(result, target, model_metadata) - ) - return result finally: - if owns_meta_files: - json_meta.unlink() - yaml_meta.unlink() + for lay in layers: + if lay.owned: + lay.dest.unlink() + if owns_meta and meta_path.exists(): + meta_path.unlink() + + self.notify_listeners(PushEvent.from_response(result, target, model_metadata)) + return result def pull( self, target: str, outdir: Path | str, media_types: Sequence[str] | None = None diff --git a/omlmd/listener.py b/omlmd/listener.py index 8122fde..455edab 100644 --- a/omlmd/listener.py +++ b/omlmd/listener.py @@ -10,9 +10,7 @@ class Listener(ABC): - """ - TODO: not yet settled for multi-method or current single update method. - """ + # TODO: not yet settled for multi-method or current single update method. @abstractmethod def update(self, source: t.Any, event: Event) -> None: diff --git a/omlmd/model_metadata.py b/omlmd/model_metadata.py index b669eb6..ad96697 100644 --- a/omlmd/model_metadata.py +++ b/omlmd/model_metadata.py @@ -1,8 +1,8 @@ from __future__ import annotations import json +import typing as t from dataclasses import asdict, dataclass, field, fields -from typing import Any import yaml @@ -12,15 +12,12 @@ class ModelMetadata: name: str | None = None description: str | None = None author: str | None = None - customProperties: dict[str, Any] | None = field(default_factory=dict) + customProperties: dict[str, t.Any] | None = field(default_factory=dict) uri: str | None = None model_format_name: str | None = None model_format_version: str | None = None - def to_json(self) -> str: - return json.dumps(self.to_dict(), indent=4) - - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> dict[str, t.Any]: return asdict(self) def to_annotations_dict(self) -> dict[str, str]: @@ -37,30 +34,12 @@ def to_annotations_dict(self) -> dict[str, str]: v ) # post-fix "+json" for OCI annotation which is a str representing a json return result - - def is_empty(self) -> bool: - return all(getattr(self, f.name) is None for f in fields(ModelMetadata) if f.name != "customProperties") and not self.customProperties - - @staticmethod - def from_json(json_str: str) -> "ModelMetadata": - data = json.loads(json_str) - return ModelMetadata(**data) - - def to_yaml(self) -> str: - return yaml.dump(self.to_dict(), default_flow_style=False) - - @staticmethod - def from_yaml(yaml_str: str) -> "ModelMetadata": - data = yaml.safe_load(yaml_str) - return ModelMetadata(**data) @staticmethod - def from_dict(data: dict[str, Any]) -> "ModelMetadata": + def from_dict(data: dict[str, t.Any]) -> "ModelMetadata": known_keys = {f.name for f in fields(ModelMetadata)} - known_properties = {key: data.get(key) for key in known_keys if key in data} - custom_properties = { - key: value for key, value in data.items() if key not in known_keys - } + known_properties = {key: data.pop(key) for key in known_keys if key in data} + custom_properties = {key: value for key, value in data.items()} return ModelMetadata(**known_properties, customProperties=custom_properties) diff --git a/poetry.lock b/poetry.lock index 31ce5b5..46014dc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1456,13 +1456,13 @@ files = [ [[package]] name = "oras" -version = "0.2.23" +version = "0.2.25" description = "OCI Registry as Storage Python SDK" optional = false python-versions = "*" files = [ - {file = "oras-0.2.23-py3-none-any.whl", hash = "sha256:de4db3f1b4b7d8404ead0a051364697a39c266ee848b146b65e277440d17099d"}, - {file = "oras-0.2.23.tar.gz", hash = "sha256:040c7fd4d94de4e31ad7b576592c8a7a0c8a3afa363fca81e3babbd405a1857a"}, + {file = "oras-0.2.25-py3-none-any.whl", hash = "sha256:96fc22cada3da820dcb69d08b1b0595f9b9a7a091e3d8158ad72040fec1b088d"}, + {file = "oras-0.2.25.tar.gz", hash = "sha256:6a9788f47265034c4a56e1ffb911a969eca5b1e633d651c065d2f68eb90333bf"}, ] [package.dependencies] @@ -2582,4 +2582,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "09148e1d60c1b8c92d31467b68e406f6dbf2566f3d0b0cff7f7f01067276d726" +content-hash = "8b206963d144d52c7c454acae4d27beb415eb3956d1b9390080697c879cdd76e" diff --git a/pyproject.toml b/pyproject.toml index de386f5..27b93c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ Changelog = "https://github.com/containers/omlmd/releases" [tool.poetry.dependencies] python = "^3.9" -oras = "^0.2.23" +oras = "^0.2.25" pyyaml = "^6.0.1" click = "^8.1.7" cloup = "^3.0.5" diff --git a/tests/test_e2e_model_registry.py b/tests/test_e2e_model_registry.py index 972b0ac..8629814 100644 --- a/tests/test_e2e_model_registry.py +++ b/tests/test_e2e_model_registry.py @@ -77,7 +77,9 @@ def update(self, source: Helper, event: Event) -> None: assert mv assert mv.description == "Lorem ipsum" assert mv.author == "John Doe" - assert mv.custom_properties == {"accuracy": 0.987} + assert mv.custom_properties == { + "accuracy": accuracy_value, + } ma = model_registry.get_model_artifact("mnist", v) assert ma diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 00bc193..9cc6e7b 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -1,5 +1,7 @@ +import io import json import subprocess +import tarfile import tempfile import typing as t from hashlib import sha256 @@ -7,13 +9,19 @@ import pytest -from omlmd.constants import MIME_APPLICATION_MLMODEL +from omlmd.constants import MIME_BLOB from omlmd.helpers import Helper from omlmd.listener import Event, Listener from omlmd.model_metadata import ModelMetadata, deserialize_mdfile from omlmd.provider import OMLMDRegistry +def untar(tar: Path, out: Path): + out.write_bytes( + t.cast(io.BufferedReader, tarfile.open(tar, "r:*").extractfile(tar.stem)).read() + ) + + def test_call_push_using_md_from_file(mocker): helper = Helper() mocker.patch.object(helper, "push", return_value=None) @@ -100,12 +108,33 @@ def test_push_pull_chunked(tmp_path, target): omlmd.push(target, temp, **md) omlmd.pull(target, tmp_path) - assert len(list(tmp_path.iterdir())) == 3 - assert tmp_path.joinpath(temp.name).stat().st_size == base_size + files = list(tmp_path.iterdir()) + print(files) + assert len(files) == 3 + print(tmp_path) + out = tmp_path.joinpath(temp.name) + untar(out.with_suffix(".tar"), out) + assert temp.stat().st_size == base_size finally: temp.unlink() +@pytest.mark.e2e +def test_e2e_push_pull_as_artifact(tmp_path, target): + omlmd = Helper() + omlmd.push( + target, + Path(__file__).parent / ".." / "README.md", + as_artifact=True, + name="mnist", + description="Lorem ipsum", + author="John Doe", + accuracy=0.987, + ) + omlmd.pull(target, tmp_path) + assert len(list(tmp_path.iterdir())) == 3 + + @pytest.mark.e2e def test_e2e_push_pull(tmp_path, target): omlmd = Helper() @@ -132,7 +161,7 @@ def test_e2e_push_pull_with_filters(tmp_path, target): author="John Doe", accuracy=0.987, ) - omlmd.pull(target, tmp_path, media_types=[MIME_APPLICATION_MLMODEL]) + omlmd.pull(target, tmp_path, media_types=[MIME_BLOB]) assert len(list(tmp_path.iterdir())) == 1 @@ -155,10 +184,11 @@ def test_e2e_push_pull_column(tmp_path, target): omlmd.push(target, temp, **md) omlmd.pull(target, tmp_path) - with open(tmp_path.joinpath(temp.name), "r") as f: - pulled = f.read() - assert pulled == content - pulled_sha = sha256(pulled.encode("utf-8")).hexdigest() - assert pulled_sha == content_sha + out = tmp_path.joinpath(temp.name) + untar(out.with_suffix(".tar"), out) + pulled = out.read_text() + assert pulled == content + pulled_sha = sha256(pulled.encode("utf-8")).hexdigest() + assert pulled_sha == content_sha finally: temp.unlink() diff --git a/tests/test_omlmd.py b/tests/test_omlmd.py index 786a684..7028c31 100644 --- a/tests/test_omlmd.py +++ b/tests/test_omlmd.py @@ -8,14 +8,14 @@ def test_dry_run_model_metadata_json_yaml_conversions(): metadata = ModelMetadata(name="Example Model", author="John Doe") - json_str = metadata.to_json() - yaml_str = metadata.to_yaml() + json_str = json.dumps(metadata.to_dict(), indent=4) + yaml_str = yaml.dump(metadata.to_dict(), default_flow_style=False) print("JSON representation:\n", json_str) print("YAML representation:\n", yaml_str) - metadata_from_json = ModelMetadata.from_json(json_str) - metadata_from_yaml = ModelMetadata.from_yaml(yaml_str) + metadata_from_json = ModelMetadata(**json.loads(json_str)) + metadata_from_yaml = ModelMetadata(**yaml.safe_load(yaml_str)) print("Metadata from JSON:\n", metadata_from_json) print("Metadata from YAML:\n", metadata_from_yaml) @@ -72,26 +72,3 @@ def test_from_dict(): customProperties={"accuracy": 0.987}, ) assert ModelMetadata.from_dict(data) == md - - -def test_is_empty(): - md = ModelMetadata( - name="mnist", - description="Lorem ipsum", - author="John Doe", - customProperties={"accuracy": 0.987}, - ) - assert not md.is_empty() - - md = ModelMetadata() - assert md.is_empty() - - md = ModelMetadata( - customProperties={"accuracy": 0.987}, - ) - assert not md.is_empty() - - md = ModelMetadata( - name="mnist", - ) - assert not md.is_empty()