From 2fcabcc16515f901fd5ab4997e10aeb36efc0629 Mon Sep 17 00:00:00 2001 From: cirun Date: Sun, 4 Jun 2023 23:14:49 +0200 Subject: [PATCH 1/2] add logic to support custom schemas --- ckanext/scheming/ckan_dataset.yaml | 3 +- ckanext/scheming/custom_schema.py | 119 +++++++++++++++++++++ ckanext/scheming/plugins.py | 166 ++++++++++++++++------------- ckanext/scheming/subfields.yaml | 2 + 4 files changed, 215 insertions(+), 75 deletions(-) create mode 100644 ckanext/scheming/custom_schema.py diff --git a/ckanext/scheming/ckan_dataset.yaml b/ckanext/scheming/ckan_dataset.yaml index 9559ec76..64e47d1c 100644 --- a/ckanext/scheming/ckan_dataset.yaml +++ b/ckanext/scheming/ckan_dataset.yaml @@ -19,6 +19,7 @@ dataset_fields: - field_name: notes label: Description form_snippet: markdown.html + validators: not_empty form_placeholder: eg. Some useful notes about the data - field_name: tag_string @@ -43,7 +44,7 @@ dataset_fields: - field_name: version label: Version - validators: ignore_missing unicode_safe package_version_validator + validators: not_empty unicode_safe form_placeholder: '1.0' - field_name: author diff --git a/ckanext/scheming/custom_schema.py b/ckanext/scheming/custom_schema.py new file mode 100644 index 00000000..e401fa0b --- /dev/null +++ b/ckanext/scheming/custom_schema.py @@ -0,0 +1,119 @@ +import os +import yaml + +from pydantic import BaseModel, validator, root_validator, create_model +from pydantic.fields import ModelField, Field +from pydantic.class_validators import Validator +from typing import List, Optional + +from ckan.plugins.toolkit import get_validator, config + + +def attach_validators_to_field(submodel, validators): + for f_name, validators_ in validators.items(): + for validator in validators_: + submodel.__fields__[f_name].class_validators.update({"validator": validator}) + + +class CustomModel(BaseModel): + + class Config: + extra = "allow" + + @classmethod + def from_yaml(cls, file_path): + cls.__annotations__ = {} + with open(file_path, "r") as yaml_file: + data = yaml.safe_load(yaml_file) + + for field_data in data.get('dataset_fields'): + new_fields = {} + new_annotations = {} + class_validators = {} + breakpoint() + f_name = field_data['field_name'] + if 'repeating_subfields' in field_data: + submodel_fields = {} + repeating_subfields = field_data['repeating_subfields'] + for subfield in repeating_subfields: + + subfield_name = subfield['field_name'] + subfield_type = subfield.get('type', str) + subfield_required = ... if subfield.get('required') else None + subfield_validators = subfield.get('validators') + + sub_validators = {} + if subfield_validators: + validators = subfield_validators.split() + for validator in validators: + validator_name = validator + pydantic_validator = f'pydantic_{validator}' + + try: + validator = get_validator(pydantic_validator) + except: + validator = get_validator(validator) + _validator = Validator(validator) + + if sub_validators.get(subfield_name, None): + sub_validators[subfield_name].append(_validator) + else: + sub_validators.update({subfield_name: [_validator]}) + + subfield_value = (subfield_type, subfield_required) + submodel_fields[subfield_name] = subfield_value + # breakpoint() + submodel = type(f_name.capitalize(), (BaseModel,), submodel_fields) + attach_validators_to_field(submodel, sub_validators) + new_annotations[f_name] = List[submodel] + + else: + required = field_data.get('required', None) + extra_validators = field_data.get('validators', None) + + if required: + required = ... + type_ = (field_data.get('type', str), required) + + if isinstance(type_, tuple): + try: + f_annotation, f_value = type_ + except ValueError as e: + raise Exception( + 'field definitions should be a tuple of (, )' + ) from e + else: + f_annotation, f_value = None, type_ + + if f_annotation: + new_annotations[f_name] = f_annotation + if extra_validators: + validators = extra_validators.split() + + for validator in validators: + validator_name = validator + pydantic_validator = f'pydantic_{validator}' + try: + validator = get_validator(pydantic_validator) + except: + validator = get_validator(validator) + _validator = Validator(validator) + class_validators.update({validator_name: _validator}) + # breakpoint() + new_fields[f_name] = ModelField.infer(name=f_name, value=f_value, annotation=f_annotation, class_validators=class_validators, config=cls.__config__) + cls.schema().update({f_name: {'title': f_name.capitalize(), 'type': new_annotations[f_name]}}) + cls.__fields__.update(new_fields) + cls.__annotations__.update(new_annotations) + return cls + + # @root_validator(pre=True) + def validate_fields(cls, values): + breakpoint() + # TODO + + return values + + +__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) +dataset_schema = config.get('scheming.dataset_schemas', 'ckan_dataset.yaml').split(':')[-1] +pydantic_model = CustomModel.from_yaml(f"{__location__}/{dataset_schema}") diff --git a/ckanext/scheming/plugins.py b/ckanext/scheming/plugins.py index 0980a684..eb6b5f09 100644 --- a/ckanext/scheming/plugins.py +++ b/ckanext/scheming/plugins.py @@ -7,6 +7,7 @@ import six import yaml +import pydantic import ckan.plugins as p try: @@ -34,6 +35,7 @@ add_public_directory, missing, check_ckan_version, + ValidationError ) from ckanext.scheming import helpers, validation, logic, loader, views @@ -223,93 +225,109 @@ def resource_form(self): def package_types(self): return list(self._schemas) - def validate(self, context, data_dict, schema, action): + def validate(self, context, data_dict, schema, pydantic_model, action): """ Validate and convert for package_create, package_update and package_show actions. """ - thing, action_type = action.split('_') + # thing, action_type = action.split('_') t = data_dict.get('type') - if not t or t not in self._schemas: - return data_dict, {'type': [ - "Unsupported dataset type: {t}".format(t=t)]} + # if not t or t not in self._schemas: + # return data_dict, {'type': [ + # "Unsupported dataset type: {t}".format(t=t)]} scheming_schema = self._expanded_schemas[t] - before = scheming_schema.get('before_validators') - after = scheming_schema.get('after_validators') - if action_type == 'show': - get_validators = _field_output_validators - before = after = None - elif action_type == 'create': - get_validators = _field_create_validators - else: - get_validators = _field_validators - - if before: - schema['__before'] = validation.validators_from_string( - before, None, scheming_schema) - if after: - schema['__after'] = validation.validators_from_string( - after, None, scheming_schema) - fg = ( - (scheming_schema['dataset_fields'], schema, True), - (scheming_schema['resource_fields'], schema['resources'], False) - ) - - composite_convert_fields = [] - for field_list, destination, is_dataset in fg: - for f in field_list: - convert_this = is_dataset and f['field_name'] not in schema - destination[f['field_name']] = get_validators( - f, - scheming_schema, - convert_this - ) - if convert_this and 'repeating_subfields' in f: - composite_convert_fields.append(f['field_name']) - - def composite_convert_to(key, data, errors, context): - unflat = unflatten(data) - for f in composite_convert_fields: - if f not in unflat: - continue - data[(f,)] = json.dumps(unflat[f], default=lambda x:None if x == missing else x) - convert_to_extras((f,), data, errors, context) - del data[(f,)] - - if action_type == 'show': - if composite_convert_fields: - for ex in data_dict['extras']: - if ex['key'] in composite_convert_fields: - data_dict[ex['key']] = json.loads(ex['value']) - data_dict['extras'] = [ - ex for ex in data_dict['extras'] - if ex['key'] not in composite_convert_fields - ] - else: - dataset_composite = { + # before = scheming_schema.get('before_validators') + # after = scheming_schema.get('after_validators') + # if action_type == 'show': + # get_validators = _field_output_validators + # before = after = None + # elif action_type == 'create': + # get_validators = _field_create_validators + # else: + # get_validators = _field_validators + + # if before: + # schema['__before'] = validation.validators_from_string( + # before, None, scheming_schema) + # if after: + # schema['__after'] = validation.validators_from_string( + # after, None, scheming_schema) + # fg = ( + # (scheming_schema['dataset_fields'], schema, True), + # (scheming_schema['resource_fields'], schema['resources'], False) + # ) + + # composite_convert_fields = [] + # for field_list, destination, is_dataset in fg: + # for f in field_list: + # convert_this = is_dataset and f['field_name'] not in schema + # destination[f['field_name']] = get_validators( + # f, + # scheming_schema, + # convert_this + # ) + # if convert_this and 'repeating_subfields' in f: + # composite_convert_fields.append(f['field_name']) + + # def composite_convert_to(key, data, errors, context): + # unflat = unflatten(data) + # for f in composite_convert_fields: + # if f not in unflat: + # continue + # data[(f,)] = json.dumps(unflat[f], default=lambda x:None if x == missing else x) + # convert_to_extras((f,), data, errors, context) + # del data[(f,)] + + # if action_type == 'show': + # if composite_convert_fields: + # for ex in data_dict['extras']: + # if ex['key'] in composite_convert_fields: + # data_dict[ex['key']] = json.loads(ex['value']) + # data_dict['extras'] = [ + # ex for ex in data_dict['extras'] + # if ex['key'] not in composite_convert_fields + # ] + # else: + # dataset_composite = { + # f['field_name'] + # for f in scheming_schema['dataset_fields'] + # if 'repeating_subfields' in f + # } + # if dataset_composite: + # expand_form_composite(data_dict, dataset_composite) + # resource_composite = { + # f['field_name'] + # for f in scheming_schema['resource_fields'] + # if 'repeating_subfields' in f + # } + # if resource_composite and 'resources' in data_dict: + # for res in data_dict['resources']: + # expand_form_composite(res, resource_composite.copy()) + # # convert composite package fields to extras so they are stored + # breakpoint() + # if composite_convert_fields: + # schema = dict( + # schema, + # __after=schema.get('__after', []) + [composite_convert_to]) + dataset_composite = { f['field_name'] for f in scheming_schema['dataset_fields'] if 'repeating_subfields' in f } - if dataset_composite: - expand_form_composite(data_dict, dataset_composite) - resource_composite = { - f['field_name'] - for f in scheming_schema['resource_fields'] - if 'repeating_subfields' in f - } - if resource_composite and 'resources' in data_dict: - for res in data_dict['resources']: - expand_form_composite(res, resource_composite.copy()) - # convert composite package fields to extras so they are stored - if composite_convert_fields: - schema = dict( - schema, - __after=schema.get('__after', []) + [composite_convert_to]) + # breakpoint() + if dataset_composite: + expand_form_composite(data_dict, dataset_composite) + from ckanext.scheming.custom_schema import pydantic_model as custom_schema - return navl_validate(data_dict, schema, context) + try: + validated_data = custom_schema(**data_dict) + except pydantic.ValidationError as e: + breakpoint() + return e.errors + return validated_data + # return navl_validate(data_dict, schema, context) def get_actions(self): """ diff --git a/ckanext/scheming/subfields.yaml b/ckanext/scheming/subfields.yaml index 357310f4..81cedad1 100644 --- a/ckanext/scheming/subfields.yaml +++ b/ckanext/scheming/subfields.yaml @@ -19,6 +19,7 @@ dataset_fields: - field_name: notes label: Description + validators: not_empty form_snippet: markdown.html form_placeholder: eg. Some useful notes about the data required: True @@ -44,6 +45,7 @@ dataset_fields: label: Publication Date preset: date - field_name: online_linkage + validators: not_empty unicode_safe label: Online Linkage preset: multiple_text form_blanks: 2 From f1e1a9281ee1006859abd107bb0b10767ef2cffd Mon Sep 17 00:00:00 2001 From: cirun Date: Sun, 11 Jun 2023 22:05:36 +0200 Subject: [PATCH 2/2] validate_fields --- ckanext/scheming/ckan_dataset.yaml | 3 +- ckanext/scheming/custom_schema.py | 44 ++++++++++++++++++++---------- ckanext/scheming/plugins.py | 20 ++++++++------ 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/ckanext/scheming/ckan_dataset.yaml b/ckanext/scheming/ckan_dataset.yaml index 64e47d1c..8834f882 100644 --- a/ckanext/scheming/ckan_dataset.yaml +++ b/ckanext/scheming/ckan_dataset.yaml @@ -8,6 +8,7 @@ dataset_fields: - field_name: title label: Title + required: true preset: title form_placeholder: eg. A descriptive title @@ -44,7 +45,7 @@ dataset_fields: - field_name: version label: Version - validators: not_empty unicode_safe + validators: not_empty form_placeholder: '1.0' - field_name: author diff --git a/ckanext/scheming/custom_schema.py b/ckanext/scheming/custom_schema.py index e401fa0b..879bd89e 100644 --- a/ckanext/scheming/custom_schema.py +++ b/ckanext/scheming/custom_schema.py @@ -19,18 +19,19 @@ class CustomModel(BaseModel): class Config: extra = "allow" + arbitrary_types_allowed = True @classmethod def from_yaml(cls, file_path): cls.__annotations__ = {} + cls._validators = {} with open(file_path, "r") as yaml_file: data = yaml.safe_load(yaml_file) for field_data in data.get('dataset_fields'): new_fields = {} new_annotations = {} - class_validators = {} - breakpoint() + f_name = field_data['field_name'] if 'repeating_subfields' in field_data: submodel_fields = {} @@ -68,11 +69,8 @@ def from_yaml(cls, file_path): new_annotations[f_name] = List[submodel] else: - required = field_data.get('required', None) + required = ... if field_data.get('required') else None extra_validators = field_data.get('validators', None) - - if required: - required = ... type_ = (field_data.get('type', str), required) if isinstance(type_, tuple): @@ -89,7 +87,7 @@ def from_yaml(cls, file_path): new_annotations[f_name] = f_annotation if extra_validators: validators = extra_validators.split() - + cls._validators[f_name] = [] for validator in validators: validator_name = validator pydantic_validator = f'pydantic_{validator}' @@ -97,20 +95,36 @@ def from_yaml(cls, file_path): validator = get_validator(pydantic_validator) except: validator = get_validator(validator) - _validator = Validator(validator) - class_validators.update({validator_name: _validator}) - # breakpoint() - new_fields[f_name] = ModelField.infer(name=f_name, value=f_value, annotation=f_annotation, class_validators=class_validators, config=cls.__config__) + + cls._validators[f_name].append(validator) + + new_fields[f_name] = ModelField.infer(name=f_name, value=f_value, annotation=f_annotation, class_validators={}, config=cls.__config__) cls.schema().update({f_name: {'title': f_name.capitalize(), 'type': new_annotations[f_name]}}) cls.__fields__.update(new_fields) cls.__annotations__.update(new_annotations) return cls - # @root_validator(pre=True) + @root_validator(pre=True) def validate_fields(cls, values): - breakpoint() - # TODO - + errors = {} + for name, f in cls.__fields__.items(): + extra_validators = cls._validators.get(name) + errors[name] = [] + if f.required and not values[name]: + errors[name].append("Missing value") + + if not isinstance(values[name], f.type_): + errors[name].append(f"Must be of {f.type_} type") + + if extra_validators: + for validator_func in extra_validators: + try: + v = validator_func(values[name], values, cls.__config__, cls.__fields__[name]) + except ValueError as e: + errors[name].append("Missing value") + if not errors[name]: + del errors[name] + values["errors"] = errors return values diff --git a/ckanext/scheming/plugins.py b/ckanext/scheming/plugins.py index eb6b5f09..09e8035c 100644 --- a/ckanext/scheming/plugins.py +++ b/ckanext/scheming/plugins.py @@ -225,7 +225,7 @@ def resource_form(self): def package_types(self): return list(self._schemas) - def validate(self, context, data_dict, schema, pydantic_model, action): + def validate(self, context, data_dict, schema, action): """ Validate and convert for package_create, package_update and package_show actions. @@ -316,17 +316,19 @@ def validate(self, context, data_dict, schema, pydantic_model, action): for f in scheming_schema['dataset_fields'] if 'repeating_subfields' in f } - # breakpoint() + breakpoint() if dataset_composite: expand_form_composite(data_dict, dataset_composite) from ckanext.scheming.custom_schema import pydantic_model as custom_schema - - try: - validated_data = custom_schema(**data_dict) - except pydantic.ValidationError as e: - breakpoint() - return e.errors - return validated_data + # try: + # validated_data, errors = schema(**data_dict) + result = custom_schema(**data_dict).dict() + errors = result.pop('errors') + return result, errors + # except pydantic.ValidationError as e: + # breakpoint() + # return e.errors() + # return validated_data # return navl_validate(data_dict, schema, context) def get_actions(self):