From ec31203d5e0f08a4ea855bf7a357ad351a97ddaf Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Mon, 27 Mar 2023 15:47:49 -0700 Subject: [PATCH] Make use of jsonable_encoder, buildout tests for submitting STAC items --- lib/ingestor-api/runtime/src/schemas.py | 28 ++-- lib/ingestor-api/runtime/src/utils.py | 6 +- lib/ingestor-api/runtime/tests/conftest.py | 9 +- .../runtime/tests/test_registration.py | 129 ++++++++++++++++-- lib/ingestor-api/runtime/tests/test_utils.py | 5 +- 5 files changed, 139 insertions(+), 38 deletions(-) diff --git a/lib/ingestor-api/runtime/src/schemas.py b/lib/ingestor-api/runtime/src/schemas.py index 24271f4..79db560 100644 --- a/lib/ingestor-api/runtime/src/schemas.py +++ b/lib/ingestor-api/runtime/src/schemas.py @@ -3,10 +3,10 @@ import enum import json from datetime import datetime -from decimal import Decimal from typing import TYPE_CHECKING, Dict, List, Optional, Union from urllib.parse import urlparse +from fastapi.encoders import jsonable_encoder from fastapi.exceptions import RequestValidationError from pydantic import ( BaseModel, @@ -30,7 +30,7 @@ def is_accessible(cls, href): url = urlparse(href) if url.scheme in ["https", "http"]: - validators.url_is_accessible(href) + validators.url_is_accessible(href=href) elif url.scheme in ["s3"]: validators.s3_object_is_accessible( bucket=url.hostname, key=url.path.lstrip("/") @@ -73,19 +73,6 @@ class Ingestion(BaseModel): item: Union[Item, Json[Item]] - class Config: - json_encoders = { - # Custom JSON serializer to ensure that item encodes as string. - # NOTE: when serializing, must call as ingestion.json(models_as_dict=False) - Item: lambda item: item.json(by_alias=True), - } - - def json(self, *args, **kwargs): - # Update default to not represent models (e.g. `items` property) as a dict to - # allow our `json_encoders` override to properly serialize `items` property - kwargs.setdefault("models_as_dict", False) - return super().json(*args, **kwargs) - @validator("created_at", pre=True, always=True, allow_reuse=True) @validator("updated_at", pre=True, always=True, allow_reuse=True) def set_ts_now(cls, v): @@ -104,9 +91,16 @@ def save(self, db: "services.Database"): db.write(self) return self - def dynamodb_dict(self, by_alias=True): + def dynamodb_dict(self): """DynamoDB-friendly serialization""" - return json.loads(self.json(by_alias=by_alias), parse_float=Decimal) + # convert to dictionary + output = self.dict(exclude={"item"}) + + # add STAC item as string + output["item"] = self.item.json() + + # make JSON-friendly (will be able to do with Pydantic V2, https://github.com/pydantic/pydantic/issues/1409#issuecomment-1423995424) + return jsonable_encoder(output) @dataclasses.dataclass diff --git a/lib/ingestor-api/runtime/src/utils.py b/lib/ingestor-api/runtime/src/utils.py index 0a975c0..1da2b57 100644 --- a/lib/ingestor-api/runtime/src/utils.py +++ b/lib/ingestor-api/runtime/src/utils.py @@ -1,11 +1,10 @@ -import json from typing import Sequence import boto3 -import orjson import pydantic from pypgstac.db import PgstacDB from pypgstac.load import Methods +from fastapi.encoders import jsonable_encoder from .loader import Loader from .schemas import Ingestion @@ -42,7 +41,8 @@ def load_items(creds: DbCreds, ingestions: Sequence[Ingestion]): with PgstacDB(dsn=creds.dsn_string, debug=True) as db: loader = Loader(db=db) - items = [json.loads(orjson.dumps(i.item.dict())) for i in ingestions] + # serialize to JSON-friendly dicts (won't be necessary in Pydantic v2, https://github.com/pydantic/pydantic/issues/1409#issuecomment-1423995424) + items = jsonable_encoder(i.item for i in ingestions) loading_result = loader.load_items( file=items, # use insert_ignore to avoid overwritting existing items or upsert to replace diff --git a/lib/ingestor-api/runtime/tests/conftest.py b/lib/ingestor-api/runtime/tests/conftest.py index b83aafa..f2d357e 100644 --- a/lib/ingestor-api/runtime/tests/conftest.py +++ b/lib/ingestor-api/runtime/tests/conftest.py @@ -129,13 +129,13 @@ def example_stac_item(): ], "assets": { "visual": { - "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.tif", # noqa + "href": "https://TEST_API.com/open-cogs/stac-examples/20201211_223832_CS2.tif", # noqa "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "3-Band Visual", "roles": ["visual"], }, "thumbnail": { - "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.jpg", # noqa + "href": "https://TEST_API.com/open-cogs/stac-examples/20201211_223832_CS2.jpg", # noqa "title": "Thumbnail", "type": "image/jpeg", "roles": ["thumbnail"], @@ -248,10 +248,7 @@ def client_authenticated(app): """ from src.dependencies import get_username - def skip_auth(): - pass - - app.dependency_overrides[get_username] = skip_auth + app.dependency_overrides[get_username] = lambda: 'test_user' return TestClient(app) diff --git a/lib/ingestor-api/runtime/tests/test_registration.py b/lib/ingestor-api/runtime/tests/test_registration.py index 2d400dc..55721de 100644 --- a/lib/ingestor-api/runtime/tests/test_registration.py +++ b/lib/ingestor-api/runtime/tests/test_registration.py @@ -2,9 +2,12 @@ import json from datetime import timedelta from typing import TYPE_CHECKING, List +from unittest.mock import call, patch +from fastapi.encoders import jsonable_encoder import pytest + if TYPE_CHECKING: from fastapi.testclient import TestClient from src import schemas, services @@ -12,6 +15,121 @@ ingestion_endpoint = "/ingestions" +@pytest.fixture() +def collection_exists(): + with patch("src.validators.collection_exists", return_value=True) as m: + yield m + + +@pytest.fixture() +def collection_missing(): + def bad_collection(collection_id: str): + raise ValueError("MOCKED MISSING COLLECTION ERROR") + + with patch("src.validators.collection_exists", side_effect=bad_collection) as m: + yield m + + +@pytest.fixture() +def asset_exists(): + with patch("src.validators.url_is_accessible", return_value=True) as m: + yield m + + +@pytest.fixture() +def asset_missing(): + def bad_asset_url(href: str): + raise ValueError("MOCKED INACCESSIBLE URL ERROR") + + with patch("src.validators.url_is_accessible", side_effect=bad_asset_url) as m: + yield m + + +class TestCreate: + @pytest.fixture(autouse=True) + def setup( + self, + api_client: "TestClient", + mock_table: "services.Table", + example_ingestion: "schemas.Ingestion", + ): + from src import services + + self.api_client = api_client + self.mock_table = mock_table + self.db = services.Database(self.mock_table) + self.example_ingestion = example_ingestion + + def test_unauthenticated_create(self): + response = self.api_client.post( + ingestion_endpoint, + json=jsonable_encoder(self.example_ingestion.item), + ) + + assert response.status_code == 403 + + def test_create(self, client_authenticated, collection_exists, asset_exists): + response = self.api_client.post( + ingestion_endpoint, + json=jsonable_encoder(self.example_ingestion.item), + ) + + assert response.status_code == 201 + assert collection_exists.called_once_with( + self.example_ingestion.item.collection + ) + + stored_data = self.db.fetch_many(status="queued")["items"] + assert len(stored_data) == 1 + assert json.loads(stored_data[0].json(by_alias=True)) == response.json() + + def test_validates_missing_collection( + self, client_authenticated, collection_missing, asset_exists + ): + response = self.api_client.post( + ingestion_endpoint, + json=jsonable_encoder(self.example_ingestion.item), + ) + + collection_missing.assert_called_once_with( + collection_id=self.example_ingestion.item.collection + ) + assert response.status_code == 422, "should get validation error" + assert ( + len(self.db.fetch_many(status="queued")["items"]) == 0 + ), "data should not be stored in DB" + + def test_validates_missing_assets( + self, client_authenticated, collection_exists, asset_missing + ): + response = self.api_client.post( + ingestion_endpoint, + json=jsonable_encoder(self.example_ingestion.item), + ) + + collection_exists.assert_called_once_with( + collection_id=self.example_ingestion.item.collection + ) + asset_missing.assert_has_calls( + [ + call(href=asset.href) + for asset in self.example_ingestion.item.assets.values() + ], + any_order=True, + ) + assert response.status_code == 422, "should get validation error" + for asset_type in self.example_ingestion.item.assets.keys(): + assert any( + [ + err["loc"] == ["body", "assets", asset_type, "href"] + for err in response.json()["detail"] + ] + ), "should reference asset type in validation error response" + assert ( + len(self.db.fetch_many(status="queued")["items"]) == 0 + ), "data should not be stored in DB" + + class TestList: @pytest.fixture(autouse=True) def setup( @@ -36,8 +154,7 @@ def populate_table(self, count=100) -> List["schemas.Ingestion"]: def test_simple_lookup(self): self.mock_table.put_item(Item=self.example_ingestion.dynamodb_dict()) - ingestion = self.example_ingestion.dynamodb_dict() - ingestion["item"] = json.loads(ingestion["item"]) + ingestion = jsonable_encoder(self.example_ingestion) response = self.api_client.get(ingestion_endpoint) assert response.status_code == 200 assert response.json() == { @@ -58,13 +175,7 @@ def test_next_response(self): response = self.api_client.get(ingestion_endpoint, params={"limit": limit}) assert response.status_code == 200 assert json.loads(base64.b64decode(response.json()["next"])) == expected_next - ingestions = [] - for ingestion in example_ingestions[:limit]: - item = ingestion.dynamodb_dict() - item["item"] = json.loads(item["item"]) - ingestions.append(item) - - assert response.json()["items"] == ingestions + assert response.json()["items"] == jsonable_encoder(example_ingestions[:limit]) @pytest.mark.skip(reason="Test is currently broken") def test_get_next_page(self): diff --git a/lib/ingestor-api/runtime/tests/test_utils.py b/lib/ingestor-api/runtime/tests/test_utils.py index bb95a4e..f03bb1d 100644 --- a/lib/ingestor-api/runtime/tests/test_utils.py +++ b/lib/ingestor-api/runtime/tests/test_utils.py @@ -1,9 +1,8 @@ -import json from unittest.mock import Mock, patch -import orjson import pytest from pypgstac.load import Methods +from fastapi.encoders import jsonable_encoder from src.utils import DbCreds @@ -31,6 +30,6 @@ def test_load_items(loader, pgstacdb, example_ingestion, dbcreds): utils.load_items(dbcreds, list([example_ingestion])) loader.return_value.load_items.assert_called_once_with( - file=[json.loads(orjson.dumps(example_ingestion.item.dict()))], + file=jsonable_encoder([example_ingestion.item]), insert_mode=Methods.upsert, )