Skip to content

Commit

Permalink
Merge pull request #21 from openweathermap/feature/validation
Browse files Browse the repository at this point in the history
Added _validate_collection and validate_custom_attributes methods
  • Loading branch information
iakovdudinsky authored Oct 19, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents 5d429e8 + 2a4f858 commit 039674f
Showing 7 changed files with 156 additions and 102 deletions.
32 changes: 9 additions & 23 deletions deker/ABC/base_array.py
Original file line number Diff line number Diff line change
@@ -32,14 +32,13 @@
from deker.dimensions import Dimension, TimeDimension
from deker.errors import DekerMetaDataError, DekerValidationError
from deker.log import SelfLoggerMixin
from deker.schemas import ArraySchema, TimeDimensionSchema, VArraySchema
from deker.schemas import ArraySchema, VArraySchema
from deker.subset import Subset, VSubset
from deker.tools.array import check_memory, get_id
from deker.validators import is_valid_uuid
from deker.tools.schema import create_dimensions
from deker.types.private.classes import ArrayMeta, Serializer
from deker.types.private.typings import FancySlice, Numeric, Slice
from deker.validators import process_attributes
from deker.validators import process_attributes, is_valid_uuid, validate_custom_attributes_update


if TYPE_CHECKING:
@@ -381,26 +380,13 @@ def update_custom_attributes(self, attributes: dict) -> None:
:param attributes: attributes for updating
"""
if not attributes:
raise DekerValidationError("No attributes passed for update")
for s in self.schema.dimensions:
if (
isinstance(s, TimeDimensionSchema)
and isinstance(s.start_value, str)
and s.start_value.startswith("$")
):
if s.start_value[1:] in self.primary_attributes:
continue
if s.start_value[1:] not in attributes:
for d in self.dimensions:
if d.name == s.name:
attributes[s.start_value[1:]] = d.start_value # type: ignore[attr-defined]
else:
for attr in self.schema.attributes:
if not attr.primary and attr.name not in attributes:
attributes[attr.name] = self.custom_attributes[attr.name]

process_attributes(self.schema, self.primary_attributes, attributes)
attributes = validate_custom_attributes_update(
self.schema,
self.dimensions,
self.primary_attributes,
self.custom_attributes,
attributes,
)
self._adapter.update_meta_custom_attributes(self, attributes)
self.custom_attributes = attributes
self.logger.info(f"{self!s} custom attributes updated: {attributes}")
15 changes: 12 additions & 3 deletions deker/client.py
Original file line number Diff line number Diff line change
@@ -400,8 +400,10 @@ def get_collection(
self.logger.info(f"Collection {name} not found")
return None

def collection_from_dict(self, collection_data: dict) -> Collection:
"""Create a new ``Collection`` in the database from collection metadata dictionary.
def _validate_collection(self, collection_data: dict) -> Collection:
"""Validate ``Collection`` object and return it without creation.
Not recommended to use except for validation.
:param collection_data: Dictionary with collection metadata
"""
@@ -432,9 +434,16 @@ def collection_from_dict(self, collection_data: dict) -> Collection:

elif k not in collection_data[key]:
collection_data[key][k] = default_fields[key][k]
collection = self.__adapter.create_collection_from_meta( # type: ignore[return-value]
return self.__adapter.create_collection_from_meta( # type: ignore[return-value]
collection_data, self.__factory
)

def collection_from_dict(self, collection_data: dict) -> Collection:
"""Create a new ``Collection`` in the database from collection metadata dictionary.
:param collection_data: Dictionary with collection metadata
"""
collection = self._validate_collection(collection_data)
self.__adapter.create(collection)
self.logger.debug(f"Collection {collection.name} created from dict")
return collection # type: ignore[return-value]
10 changes: 7 additions & 3 deletions deker/collection.py
Original file line number Diff line number Diff line change
@@ -263,7 +263,10 @@ def arrays(self) -> ArrayManager:

@not_deleted
def create(
self, primary_attributes: Optional[dict] = None, custom_attributes: Optional[dict] = None
self,
primary_attributes: Optional[dict] = None,
custom_attributes: Optional[dict] = None,
id_: Optional[str] = None,
) -> Union[Array, VArray]:
"""Create ``Array`` or ``VArray`` according to collection main schema.
@@ -272,12 +275,13 @@ def create(
Otherwise, only ``Arrays`` will be created.
:param primary_attributes: ``Array`` or ``VArray`` primary attribute
:param custom_attributes: ``VArray`` or ``VArray`` custom attributes
:param custom_attributes: ``Array`` or ``VArray`` custom attributes
:param id_: ``Array`` or ``VArray`` unique UUID string
"""
schema = self.array_schema
shape = schema.arrays_shape if hasattr(schema, "arrays_shape") else schema.shape
check_memory(shape, schema.dtype, self.__adapter.ctx.config.memory_limit)
array = self.__manager.create(primary_attributes, custom_attributes)
array = self.__manager.create(primary_attributes, custom_attributes, id_)
self.logger.debug(
f"{array.__class__.__name__} id={array.id} {primary_attributes=}, {custom_attributes=} created"
)
8 changes: 4 additions & 4 deletions deker/managers.py
Original file line number Diff line number Diff line change
@@ -109,7 +109,7 @@ def _create( # type: ignore
schema: "BaseArraysSchema",
primary_attributes: Optional[dict] = None,
custom_attributes: Optional[dict] = None,
id_: Optional[str]=None
id_: Optional[str] = None,
) -> Union[Array, VArray]:
"""Create Array or VArray.
@@ -141,7 +141,7 @@ def create(
self,
primary_attributes: Optional[dict] = None,
custom_attributes: Optional[dict] = None,
id_: Optional[str] = None
id_: Optional[str] = None,
) -> Union[Array, VArray]:
"""Create array or varray.
@@ -184,7 +184,7 @@ def create(
self,
primary_attributes: Optional[dict] = None,
custom_attributes: Optional[dict] = None,
id_: Optional[str] = None
id_: Optional[str] = None,
) -> VArray:
"""Create varray in collection.
@@ -232,7 +232,7 @@ def create(
self,
primary_attributes: Optional[dict] = None,
custom_attributes: Optional[dict] = None,
id_: Optional[str] = None
id_: Optional[str] = None,
) -> Array:
"""Create array in collection.
68 changes: 59 additions & 9 deletions deker/validators.py
Original file line number Diff line number Diff line change
@@ -21,9 +21,9 @@

from deker_tools.time import get_utc

from deker.dimensions import Dimension, TimeDimension
from deker.errors import DekerValidationError


if TYPE_CHECKING:
from deker.schemas import ArraySchema, AttributeSchema, VArraySchema

@@ -42,14 +42,21 @@ def process_time_dimension_attrs(attributes: dict, attr_name: str) -> datetime.d
return time_attribute


def __process_attrs(
def __process_attributes_types(
attrs_schema: Tuple["AttributeSchema", ...],
attributes: dict,
primary_attributes: dict,
custom_attributes: dict,
) -> None:
"""Validate attributes types over schema and update dicts if needed.
:param attrs_schema: attributes schema
:param primary_attributes: primary attributes to validate
:param custom_attributes: custom attributes to validate
"""
attributes = {**primary_attributes, **custom_attributes}
for attr in attrs_schema:
if attr.primary:
# check if primary attribute is not missing and its type
if attr.name not in attributes:
raise DekerValidationError(f"Key attribute missing: {attr.name}")
if not isinstance(primary_attributes[attr.name], attr.dtype):
@@ -59,6 +66,7 @@ def __process_attrs(
)

else:
# check if custom attribute is not missing and its type
custom_attribute = custom_attributes.get(attr.name)
if custom_attribute is not None and not isinstance(custom_attribute, attr.dtype):
raise DekerValidationError(
@@ -71,6 +79,7 @@ def __process_attrs(
raise DekerValidationError(f'Custom attribute "{attr.name}" cannot be None')
custom_attributes[attr.name] = None

# convert attribute with datetime to utc if needed
if attr.dtype == datetime.datetime and attr.name in attributes:
try:
utc = get_utc(attributes[attr.name])
@@ -87,7 +96,7 @@ def process_attributes(
primary_attributes: Optional[dict],
custom_attributes: Optional[dict],
) -> Tuple[dict, dict]:
"""Validate attributes over schema.
"""Validate attributes over schema and return them.
:param schema: ArraySchema or VArraySchema instance
:param primary_attributes: attributes to validate
@@ -99,10 +108,8 @@ def process_attributes(

attrs_schema = schema.attributes if schema else None

if primary_attributes is None:
primary_attributes = {}
if custom_attributes is None:
custom_attributes = {}
primary_attributes = primary_attributes or {}
custom_attributes = custom_attributes or {}

if any((primary_attributes, custom_attributes)) and not attrs_schema:
raise DekerValidationError(f"{array_type} attributes schema is missing".capitalize())
@@ -130,10 +137,53 @@ def process_attributes(
f"Setting additional attributes not listed in schema is not allowed. "
f"Invalid attributes: {sorted(extra_names)}"
)
__process_attrs(attrs_schema, attributes, primary_attributes, custom_attributes) # type: ignore[arg-type]
__process_attributes_types(
attrs_schema, primary_attributes, custom_attributes # type: ignore[arg-type]
)
return primary_attributes, custom_attributes


def validate_custom_attributes_update(
schema: Union["ArraySchema", "VArraySchema"],
dimensions: Tuple[Union[Dimension, TimeDimension], ...],
primary_attributes: dict,
custom_attributes: dict,
attributes: Optional[dict],
) -> dict:
"""Validate custom attributes update over schema.
:param schema: ArraySchema or VArraySchema instance
:param dimensions: tuple of (V)Array dimensions
:param primary_attributes: (V)Array primary attributes
:param custom_attributes: old custom attributes
:param attributes: new custom attributes to validate
"""
from deker.schemas import TimeDimensionSchema

if not attributes:
raise DekerValidationError("No attributes passed for update")
for s in schema.dimensions:
if (
isinstance(s, TimeDimensionSchema)
and isinstance(s.start_value, str)
and s.start_value.startswith("$")
):
if s.start_value[1:] in primary_attributes:
continue
if s.start_value[1:] not in attributes:
for d in dimensions:
if d.name == s.name:
attributes[s.start_value[1:]] = d.start_value # type: ignore[attr-defined]
else:
# fill attributes to update dict with already existing custom attributes values
for attr in schema.attributes:
if not attr.primary and attr.name not in attributes:
attributes[attr.name] = custom_attributes[attr.name]

process_attributes(schema, primary_attributes, attributes)
return attributes


def is_valid_uuid(id_: str) -> bool:
"""Validate if id is in uuid format.
5 changes: 4 additions & 1 deletion tests/test_cases/test_collections/test_collection_methods.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import uuid
from datetime import datetime, timedelta, timezone
from io import BytesIO

@@ -64,11 +65,13 @@ def test_delete_collection(
assert not path.exists()
assert not schema.exists()

@pytest.mark.parametrize("array_params", [{}, {"id_": str(uuid.uuid4())}])
def test_clear_collection(
self,
array_collection: Collection,
collection_adapter: LocalCollectionAdapter,
storage_adapter,
array_params
):
"""Test collection clears its data well.
@@ -78,7 +81,7 @@ def test_clear_collection(
schema = path / (array_collection.name + collection_adapter.file_ext)
assert path.exists()
assert schema.exists()
array = array_collection.create()
array = array_collection.create(**array_params)
assert array
array_paths = get_paths(array, array_collection.path)
main_path = array_paths.main / (array.id + storage_adapter.file_ext)
Loading

0 comments on commit 039674f

Please sign in to comment.