Skip to content

Commit

Permalink
Merge pull request #664 from bioimage-io/fix_for_pydantic_2.10
Browse files Browse the repository at this point in the history
Fix URL validation + housekeeping
  • Loading branch information
FynnBe authored Dec 6, 2024
2 parents a218c75 + 44aa991 commit 485deee
Show file tree
Hide file tree
Showing 21 changed files with 253 additions and 111 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
include:
- python-version: "3.12"
is-dev-version: true
run_expensive_tests: true
run-expensive-tests: true
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
Expand Down Expand Up @@ -53,7 +53,7 @@ jobs:
- run: pytest
env:
BIOIMAGEIO_CACHE_PATH: bioimageio_cache
SKIP_EXPENSIVE_TESTS: ${{ matrix.run_expensive_tests && 'false' || 'true' }}
RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }}
- uses: actions/cache/save@v4
# explicit restore/save instead of cache action to cache even if coverage fails
with:
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ To keep the bioimageio.spec Python package version in sync with the (model) desc

### bioimageio.spec Python package

#### bioimageio.spec 0.5.3.6

* fix URL validation (checking with actual http requests was erroneously skipped)

#### bioimageio.spec 0.5.3.5

* fix loading tifffile in python 3.8 (pin tifffile)
Expand Down
2 changes: 1 addition & 1 deletion bioimageio/spec/VERSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "0.5.3.5"
"version": "0.5.3.6"
}
5 changes: 3 additions & 2 deletions bioimageio/spec/_internal/common_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
from .io_utils import write_content_to_zip
from .node import Node
from .packaging_context import PackagingContext
from .root_url import RootHttpUrl
from .url import HttpUrl
from .utils import (
assert_all_params_set_explicitly,
Expand Down Expand Up @@ -343,12 +344,12 @@ def validation_summary(self) -> ValidationSummary:
assert self._validation_summary is not None, "access only after initialization"
return self._validation_summary

_root: Union[HttpUrl, DirectoryPath] = PrivateAttr(
_root: Union[RootHttpUrl, DirectoryPath, ZipPath] = PrivateAttr(
default_factory=lambda: validation_context_var.get().root
)

@property
def root(self) -> Union[HttpUrl, DirectoryPath]:
def root(self) -> Union[RootHttpUrl, DirectoryPath, ZipPath]:
return self._root

@classmethod
Expand Down
17 changes: 14 additions & 3 deletions bioimageio/spec/_internal/type_guards.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import collections.abc
from typing import Any, Dict, Mapping, Sequence, Tuple
from typing import Any, Dict, List, Mapping, Sequence, Tuple

import numpy as np
from numpy.typing import NDArray
from typing_extensions import TypeGuard


Expand All @@ -25,6 +27,15 @@ def is_sequence(v: Any) -> TypeGuard[Sequence[Any]]:
return isinstance(v, collections.abc.Sequence)


def is_tuple(v: Any) -> TypeGuard[Tuple[Any]]:
"""to avoid Tuple[Unknown]"""
def is_tuple(v: Any) -> TypeGuard[Tuple[Any, ...]]:
"""to avoid Tuple[Unknown, ...]"""
return isinstance(v, tuple)


def is_list(v: Any) -> TypeGuard[List[Any]]:
"""to avoid List[Unknown]"""
return isinstance(v, list)


def is_ndarray(v: Any) -> TypeGuard[NDArray[Any]]:
return isinstance(v, np.ndarray)
46 changes: 24 additions & 22 deletions bioimageio/spec/_internal/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@
import requests
import requests.exceptions
from loguru import logger
from pydantic import RootModel, model_validator
from pydantic import RootModel
from typing_extensions import Literal, assert_never

from .field_warning import issue_warning
from .root_url import RootHttpUrl
from .validation_context import validation_context_var


def _validate_url(url: Union[str, pydantic.HttpUrl]) -> pydantic.AnyUrl:
def _validate_url(url: Union[str, pydantic.HttpUrl]) -> pydantic.HttpUrl:
return _validate_url_impl(url, request_mode="head")


def _validate_url_impl(
url: Union[str, pydantic.HttpUrl],
request_mode: Literal["head", "get_stream", "get"],
timeout: int = 3,
) -> pydantic.AnyUrl:
) -> pydantic.HttpUrl:

url = str(url)
val_url = url
Expand Down Expand Up @@ -76,7 +76,9 @@ def _validate_url_impl(
msg_context={"error": str(e)},
)
else:
if response.status_code == 302: # found
if response.status_code == 200: # ok
pass
elif response.status_code == 302: # found
pass
elif response.status_code in (301, 303, 308):
issue_warning(
Expand All @@ -88,17 +90,10 @@ def _validate_url_impl(
"location": response.headers.get("location"),
},
)
elif response.status_code == 403: # forbidden
if request_mode == "head":
return _validate_url_impl(
url, request_mode="get_stream", timeout=timeout
)
elif request_mode == "get_stream":
return _validate_url_impl(url, request_mode="get", timeout=timeout)
elif request_mode == "get":
raise ValueError(f"{response.status_code}: {response.reason} {url}")
else:
assert_never(request_mode)
elif request_mode == "head":
return _validate_url_impl(url, request_mode="get_stream", timeout=timeout)
elif request_mode == "get_stream":
return _validate_url_impl(url, request_mode="get", timeout=timeout)
elif response.status_code == 405:
issue_warning(
"{status_code}: {reason} {value}",
Expand All @@ -108,10 +103,15 @@ def _validate_url_impl(
"reason": response.reason,
},
)
elif response.status_code != 200:
elif request_mode == "get":
raise ValueError(f"{response.status_code}: {response.reason} {url}")
else:
assert_never(request_mode)

return pydantic.AnyUrl(url)
return ( # pyright: ignore[reportUnknownVariableType]
# TODO: remove pyright ignore for pydantic > 2.9
pydantic.HttpUrl(url) # pyright: ignore[reportCallIssue]
)


class HttpUrl(RootHttpUrl):
Expand All @@ -120,12 +120,14 @@ class HttpUrl(RootHttpUrl):
root_model: ClassVar[Type[RootModel[Any]]] = RootModel[pydantic.HttpUrl]
_exists: Optional[bool] = None

@model_validator(mode="after")
def _validate_url(self):
url = self._validated
def _after_validator(self):
self = super()._after_validator()
context = validation_context_var.get()
if context.perform_io_checks and str(url) not in context.known_files:
self._validated = _validate_url(url)
if (
context.perform_io_checks
and str(self._validated) not in context.known_files
):
self._validated = _validate_url(self._validated)
self._exists = True

return self
Expand Down
5 changes: 5 additions & 0 deletions bioimageio/spec/_internal/validated_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
CoreSchema,
no_info_after_validator_function,
)
from typing_extensions import Self


class ValidatedString(str):
Expand All @@ -18,6 +19,10 @@ class ValidatedString(str):
def __new__(cls, object: object):
self = super().__new__(cls, object)
self._validated = cls.root_model.model_validate(str(self)).root
return self._after_validator()

def _after_validator(self) -> Self:
"""add validation after the `root_model`"""
return self

@classmethod
Expand Down
4 changes: 2 additions & 2 deletions bioimageio/spec/_internal/validation_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pydantic import DirectoryPath

from ._settings import settings
from .io_basics import AbsoluteDirectory, FileName, Sha256
from .io_basics import FileName, Sha256
from .root_url import RootHttpUrl
from .warning_levels import WarningLevel

Expand All @@ -21,7 +21,7 @@ class ValidationContext:
init=False, default_factory=list
)

root: Union[RootHttpUrl, AbsoluteDirectory, ZipFile] = Path()
root: Union[RootHttpUrl, DirectoryPath, ZipFile] = Path()
"""url/directory serving as base to resolve any relative file paths"""

warning_level: WarningLevel = 50
Expand Down
23 changes: 10 additions & 13 deletions bioimageio/spec/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def load_description(
format_version: Union[Literal["discover"], Literal["latest"], str] = DISCOVER,
perform_io_checks: bool = settings.perform_io_checks,
known_files: Optional[Dict[str, Sha256]] = None,
sha256: Optional[Sha256] = None,
) -> Union[ResourceDescr, InvalidDescr]:
"""load a bioimage.io resource description
Expand All @@ -45,6 +46,7 @@ def load_description(
absolute file paths is still being checked.
known_files: Allows to bypass download and hashing of referenced files
(even if perform_io_checks is True).
sha256: Optional SHA-256 value of **source**
Returns:
An object holding all metadata of the bioimage.io resource
Expand All @@ -55,7 +57,7 @@ def load_description(
logger.warning("returning already loaded description '{}' as is", name)
return source # pyright: ignore[reportReturnType]

opened = open_bioimageio_yaml(source)
opened = open_bioimageio_yaml(source, sha256=sha256)

context = validation_context_var.get().replace(
root=opened.original_root,
Expand All @@ -78,6 +80,7 @@ def load_model_description(
format_version: Union[Literal["discover"], Literal["latest"], str] = DISCOVER,
perform_io_checks: bool = settings.perform_io_checks,
known_files: Optional[Dict[str, Sha256]] = None,
sha256: Optional[Sha256] = None,
) -> AnyModelDescr:
"""same as `load_description`, but addtionally ensures that the loaded
description is valid and of type 'model'.
Expand All @@ -90,6 +93,7 @@ def load_model_description(
format_version=format_version,
perform_io_checks=perform_io_checks,
known_files=known_files,
sha256=sha256,
)
return ensure_description_is_model(rd)

Expand All @@ -101,6 +105,7 @@ def load_dataset_description(
format_version: Union[Literal["discover"], Literal["latest"], str] = DISCOVER,
perform_io_checks: bool = settings.perform_io_checks,
known_files: Optional[Dict[str, Sha256]] = None,
sha256: Optional[Sha256] = None,
) -> AnyDatasetDescr:
"""same as `load_description`, but addtionally ensures that the loaded
description is valid and of type 'dataset'.
Expand All @@ -110,6 +115,7 @@ def load_dataset_description(
format_version=format_version,
perform_io_checks=perform_io_checks,
known_files=known_files,
sha256=sha256,
)
return ensure_description_is_dataset(rd)

Expand Down Expand Up @@ -140,19 +146,9 @@ def load_description_and_validate_format_only(
format_version: Union[Literal["discover"], Literal["latest"], str] = DISCOVER,
perform_io_checks: bool = settings.perform_io_checks,
known_files: Optional[Dict[str, Sha256]] = None,
sha256: Optional[Sha256] = None,
) -> ValidationSummary:
"""load a bioimage.io resource description
Args:
source: Path or URL to an rdf.yaml or a bioimage.io package
(zip-file with rdf.yaml in it).
format_version: (optional) Use this argument to load the resource and
convert its metadata to a higher format_version.
perform_io_checks: Wether or not to perform validation that requires file io,
e.g. downloading a remote files. The existence of local
absolute file paths is still being checked.
known_files: Allows to bypass download and hashing of referenced files
(even if perform_io_checks is True).
"""same as `load_description`, but only return the validation summary.
Returns:
Validation summary of the bioimage.io resource found at `source`.
Expand All @@ -163,6 +159,7 @@ def load_description_and_validate_format_only(
format_version=format_version,
perform_io_checks=perform_io_checks,
known_files=known_files,
sha256=sha256,
)
assert rd.validation_summary is not None
return rd.validation_summary
4 changes: 2 additions & 2 deletions bioimageio/spec/application/v0_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ..generic.v0_3 import CiteEntry as CiteEntry
from ..generic.v0_3 import DeprecatedLicenseId as DeprecatedLicenseId
from ..generic.v0_3 import Doi as Doi
from ..generic.v0_3 import GenericDescrBase, LinkedResourceNode, ResourceId
from ..generic.v0_3 import GenericDescrBase, LinkedResourceBase, ResourceId
from ..generic.v0_3 import LicenseId as LicenseId
from ..generic.v0_3 import LinkedResource as LinkedResource
from ..generic.v0_3 import Maintainer as Maintainer
Expand Down Expand Up @@ -47,7 +47,7 @@ class ApplicationDescr(GenericDescrBase):
"""The primary source of the application"""


class LinkedApplication(LinkedResourceNode):
class LinkedApplication(LinkedResourceBase):
"""Reference to a bioimage.io application."""

id: ApplicationId
Expand Down
9 changes: 8 additions & 1 deletion bioimageio/spec/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@
FileDescr,
YamlValue,
)
from ._internal.io_basics import AbsoluteDirectory, AbsoluteFilePath, FileName, Sha256
from ._internal.io_basics import (
AbsoluteDirectory,
AbsoluteFilePath,
FileName,
Sha256,
ZipPath,
)
from ._internal.root_url import RootHttpUrl
from ._internal.types import (
FilePath,
Expand All @@ -34,4 +40,5 @@
"Sha256",
"ValidationError",
"YamlValue",
"ZipPath",
]
4 changes: 2 additions & 2 deletions bioimageio/spec/dataset/v0_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ..generic.v0_3 import (
DocumentationSource,
GenericDescrBase,
LinkedResourceNode,
LinkedResourceBase,
_author_conv, # pyright: ignore[reportPrivateUsage]
_maintainer_conv, # pyright: ignore[reportPrivateUsage]
)
Expand Down Expand Up @@ -105,7 +105,7 @@ def _convert(cls, data: Dict[str, Any], /) -> Dict[str, Any]:
return data


class LinkedDataset(LinkedResourceNode):
class LinkedDataset(LinkedResourceBase):
"""Reference to a bioimage.io dataset."""

id: DatasetId
Expand Down
Loading

0 comments on commit 485deee

Please sign in to comment.