Skip to content

Commit

Permalink
hxlm (#11): moved RecipeHType from HType to Model; improved HMeta way…
Browse files Browse the repository at this point in the history
… to parse raw schemas
  • Loading branch information
fititnt committed Mar 1, 2021
1 parent 4118708 commit 51f0c64
Show file tree
Hide file tree
Showing 7 changed files with 200 additions and 53 deletions.
11 changes: 9 additions & 2 deletions hxlm/core/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
"""TODO: document me
"""
from hxlm.core.model.base import *
from hxlm.core.model.meta import *
from hxlm.core.model.base import (
HConteiner,
HDataset,
)
from hxlm.core.model.meta import (
HMeta
)
# from hxlm.core.model.file import *
# from hxlm.core.model.recipe import *
73 changes: 39 additions & 34 deletions hxlm/core/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ def encryption(self, value):
else:
self._encryption = EncryptionHtype(code=value)

def export_schema_dataset(self):
# TODO: improve this. Still just outputing the input

return self._dataset_raw

@property
def sensitive(self):
return self._sensitive
Expand Down Expand Up @@ -171,40 +176,40 @@ def sensitive(self, value):
self._sensitive = SensitiveHtype(code=value)


class HFile:
"""HFile is an reference for an file on an HConteiner
Differentes from HFile to HDataset
- HFile is more generic than HDataset
- HFile does not have attribute sensitive (but can have encryption)
TODO: both Excel, CKan and formats like HDF5 work with MULDIPLE datasets.
so, which structure use for this? (E.Rocha, 2021-02-26 08:10 UTC)
"""

def __init__(self, encryption: Type[EncryptionHtype] = None,
sensitive: Type[SensitiveHtype] = None):
self._encryption = encryption
self._sensitive = sensitive

def describe(self):
mdataset_description = {
'kind': "HFile",
'encryption': self._encryption
}
verbose_event()
return mdataset_description

@property
def encryption(self):
return self._encryption

@encryption.setter
def encryption(self, value):
if isinstance(value, EncryptionHtype):
self._encryption = value
else:
self._encryption = EncryptionHtype(code=value)
# class HFile:
# """HFile is an reference for an file on an HConteiner

# Differentes from HFile to HDataset
# - HFile is more generic than HDataset
# - HFile does not have attribute sensitive (but can have encryption)

# TODO: both Excel, CKan and formats like HDF5 work with MULDIPLE datasets.
# so, which structure use for this? (E.Rocha, 2021-02-26 08:10 UTC)
# """

# def __init__(self, encryption: Type[EncryptionHtype] = None,
# sensitive: Type[SensitiveHtype] = None):
# self._encryption = encryption
# self._sensitive = sensitive

# def describe(self):
# mdataset_description = {
# 'kind': "HFile",
# 'encryption': self._encryption
# }
# verbose_event()
# return mdataset_description

# @property
# def encryption(self):
# return self._encryption

# @encryption.setter
# def encryption(self, value):
# if isinstance(value, EncryptionHtype):
# self._encryption = value
# else:
# self._encryption = EncryptionHtype(code=value)


@dataclass(init=True)
Expand Down
69 changes: 69 additions & 0 deletions hxlm/core/model/file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""hxlm.core.model.meta contains HMeta
Copyleft 🄯 2021, Emerson Rocha (Etica.AI) <[email protected]>
License: Public Domain / BSD Zero Clause License
SPDX-License-Identifier: Unlicense OR 0BSD
"""

class HFile:
"""HMeta is the main entry point to glue collections of HConteiner and etc
In practice, is mostly used to, with help with external utils, abstract
hmeta.yml from disk
"""

def __init__(self, file_raw=None):
self.kind: str = 'HFile'
self._file_raw = file_raw


def load_schema_file(self, file_raw):
"""load_schema_file load object and convert to HFile
How the object is saved on disk (or received from external sources)
is out of scope of this class.
Args:
dataset_raw (Object): Load generic object to HFile
"""

self._file_raw = file_raw
return self
# self._parse_schemas_raw()
# print(schemas)


# class HFile:
# """HFile is an reference for an file on an HConteiner

# Differentes from HFile to HDataset
# - HFile is more generic than HDataset
# - HFile does not have attribute sensitive (but can have encryption)

# TODO: both Excel, CKan and formats like HDF5 work with MULDIPLE datasets.
# so, which structure use for this? (E.Rocha, 2021-02-26 08:10 UTC)
# """

# def __init__(self, encryption: Type[EncryptionHtype] = None,
# sensitive: Type[SensitiveHtype] = None):
# self._encryption = encryption
# self._sensitive = sensitive

# def describe(self):
# mdataset_description = {
# 'kind': "HFile",
# 'encryption': self._encryption
# }
# verbose_event()
# return mdataset_description

# @property
# def encryption(self):
# return self._encryption

# @encryption.setter
# def encryption(self, value):
# if isinstance(value, EncryptionHtype):
# self._encryption = value
# else:
# self._encryption = EncryptionHtype(code=value)
56 changes: 50 additions & 6 deletions hxlm/core/model/meta.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""hxlm.core.model.meta contans HMeta
"""hxlm.core.model.meta contains HMeta
Copyleft 🄯 2021, Emerson Rocha (Etica.AI) <[email protected]>
Expand All @@ -12,6 +12,8 @@
from hxlm.core.model import (
HDataset
)
from hxlm.core.model.file import HFile
from hxlm.core.model.recipe import HRecipe


class HMeta:
Expand All @@ -25,6 +27,9 @@ def __init__(self, schemas_raw=None):
self._schemas_raw = schemas_raw
self._schemas = []
self._hdatasets = []
self._hfiles = []
self._hrecipes = []
self._htasks = []
if self._schemas_raw:
self._parse_schemas_raw()

Expand All @@ -38,17 +43,56 @@ def _parse_schemas_raw(self):
if 'hmeta' in item:
# print('_parse_schemas_raw item item.hmeta yes')
if 'hdatasets' in item:
self._parse_schemas_raw_hdataset(item)
self._parse_schemas_raw_hdatasets(item)
if 'hfiles' in item:
self._parse_schemas_raw_hfile(item)
if 'hrecipes' in item:
self._parse_schemas_raw_hrecipe(item)
if 'htasks' in item:
self._parse_schemas_raw_htask(item)
else:
raise HXLmException(
'No hmeta found on this item of this file. Error?')

def _parse_schemas_raw_hdataset(self, hdataset):
hdataset = HDataset().load_schema_dataset(hdataset)
self._hdatasets.append(hdataset)
# print('todo', hdataset)
def _parse_schemas_raw_hdatasets(self, hdatasets):
dataset_ = []
for dataset in hdatasets:
dataset_ = HDataset().load_schema_dataset(dataset)
self._hdatasets.append(dataset_)

def _parse_schemas_raw_hfile(self, hfile):
hfile = HFile().load_schema_file(hfile)
self._hfiles.append(hfile)

def _parse_schemas_raw_hrecipe(self, hrecipe):
hrecipe = HRecipe().load_schema_recipe(hrecipe)
self._hrecipes.append(hrecipe)

def _parse_schemas_raw_htask(self, htask):
"""HTask is an draft
"""
self._htasks.append(htask)
# htask = HTask().load_schema_recipe(htask)
# self._hrecipes.append(htask)

def export_schemas(self):
# as input would be this:
#return self._schemas_raw
schemas = []

print('testtest _hdatasets', self._hdatasets)

if len(self._hdatasets) > 0:
hdatasets_ = []
for dataset in self._hdatasets:
hdatasets_.append(dataset.export_schema_dataset())

schemas.append({'hdatasets': hdatasets_})

return schemas

def export_schemas_raw(self):
# as input would be this:
return self._schemas_raw

def load_schemas(self, schemas_raw):
Expand Down
37 changes: 28 additions & 9 deletions hxlm/core/htype/recipe.py → hxlm/core/model/recipe.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,38 @@
"""hxlm.core.htype.recipe
TODO: this actually is more complicated than averagen HType, and is not just
an dataclass. We should rework on something else
(Emerson Rocha, 2021-03-01 02:20 UTC)
See
- https://github.com/HXLStandard/hxl-proxy/wiki/JSON-recipes
- https://github.com/OCHA-DAP/hxl-recipes
"""hxlm.core.model.recipe contains HRecipe
Copyleft 🄯 2021, Emerson Rocha (Etica.AI) <[email protected]>
License: Public Domain / BSD Zero Clause License
SPDX-License-Identifier: Unlicense OR 0BSD
"""

class HRecipe:
"""HMeta is the main entry point to glue collections of HConteiner and etc
In practice, is mostly used to, with help with external utils, abstract
hmeta.yml from disk
"""

def __init__(self, recipe_raw=None):
self.kind: str = 'HRecipe'
self._file_raw = recipe_raw


def load_schema_recipe(self, recipe_raw):
"""load_schema_recipe load object and convert to HRecipe
How the object is saved on disk (or received from external sources)
is out of scope of this class.
Args:
load_schema_recipe (Object): Load generic object to HRecipe
"""

self._recipe_raw = recipe_raw
return self
# self._parse_schemas_raw()
# print(schemas)



from dataclasses import dataclass

Expand Down
5 changes: 4 additions & 1 deletion hxlm/core/schema/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ def get_schema_as_hmeta(file):
# data = yaml.safe_load_all(f)
# print(data)
# return data
return hmeta.export_schemas()
# return hmeta.export_schemas()

# For debug, use this (will just export the input)
return {'as_meta': hmeta.export_schemas(), 'raw': hmeta.export_schemas_raw()}


class Dumper(yaml.Dumper):
Expand Down
2 changes: 1 addition & 1 deletion hxlm/data/baseline/hmeta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
filters:
- filter: with_columns
with_columns: "#vocab+id+v_iso6393_3letter,#vocab+code+v_6391,#vocab+name"
- filter: with_columns
- filter: without_rows
without_rows: "#vocab+code+v_6391="

- hmeta: hxml.core.data.baseline2
Expand Down

0 comments on commit 51f0c64

Please sign in to comment.