Skip to content

Commit

Permalink
feature: implemented json and yaml parser
Browse files Browse the repository at this point in the history
  • Loading branch information
lingjie00 committed Sep 10, 2022
1 parent 5505009 commit 3f414df
Show file tree
Hide file tree
Showing 25 changed files with 588 additions and 51 deletions.
Binary file modified .coverage
Binary file not shown.
11 changes: 0 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,6 @@ The API documentation is available at

## Parameters

- Depth of layers
- The depth of layer is one key consideration in converting a single config
file into the folder structure.
- The default behaviour is to crate nested folder until a single file
contains no sub-keys. However, in some cases this might not be ideal.
Therefore, user can control the depth of layer through the following
parameter.
```bash
# default is -1, file does not contain sub-key
configen --config-path=config_path --depth=-1
```
- Ignore
- User can choose to ignore some keys and not expand into sub-folders
```bash
Expand Down
5 changes: 5 additions & 0 deletions configen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
.. include:: ../README.md
"""
from importlib.metadata import version
from configen.configen import cli
from configen.base_parser import Parser
from configen.json_parser import JsonParser
from configen.yaml_parser import YamlParser

__author__ = "Ling"
__email__ = "[email protected]"
__version__ = version("configen")
__all__ = [cli, Parser, JsonParser, YamlParser]
176 changes: 159 additions & 17 deletions configen/base_parser.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,39 @@
"""Defines the base parser requirements."""
from __future__ import annotations
from typing import Union

import abc
import os
import re
from collections import deque
from typing import Tuple, Union


class Parser(abc.ABC):
class Parser:
"""The base Parser."""
extension: str

extension: str = ""
"""The parser file extension."""
config: dict
config: dict = {}
"""The loaded config."""

def __init__(self, config: Union[dict, None] = None):
"""Initiate object with optional initial config."""
if config is not None:
assert isinstance(
config, dict
), f"Expected config to be dict get {type(config)}"
self.config = config

def __eq__(self, parser: object) -> bool:
"""Compares if given parser is same as self."""
if isinstance(parser, Parser):
return parser.extension == self.extension
return False

def __str__(self):
"""Returns the print value."""
return self.extension

def _append_extension(self, input_path: str) -> str:
"""Output the input_path with the file extension.
Expand All @@ -26,9 +48,11 @@ def _append_extension(self, input_path: str) -> str:
`_check_extension("config.json")` -> config.json
"""
# TODO: implement better extension checks
if input_path.split(".")[-1] != self.extension:
input_path += f".{self.extension}"
assert isinstance(input_path, str), f"expected type str got {type(input_path)}"

filename, file_extension = os.path.splitext(input_path)
if file_extension != "." + self.extension:
input_path += "." + self.extension
return input_path

@abc.abstractmethod
Expand Down Expand Up @@ -63,7 +87,22 @@ def _write_method(self, filename: str) -> Parser:
"""
pass

def load(self, config: Union[str, dict]) -> Parser:
@staticmethod
def _search_match(name: str, ignored: Tuple[str]) -> bool:
"""Checks if the name is present in the ignored list."""
assert isinstance(name, str), f"Expected name as str, get {type(name)}"
assert isinstance(
ignored, tuple
), f"Expected ignored as tuple, get {type(name)}"

for ignore in ignored:
# if there is a regex match, return true
result = re.search(ignore, name)
if isinstance(result, re.Match):
return True
return False

def load(self, config: Union[str, dict, None], ignored: Tuple[str] = ()) -> Parser:
"""Loads the config (single, or multiple files, or dict).
Params:
Expand All @@ -73,6 +112,8 @@ def load(self, config: Union[str, dict]) -> Parser:
2. filepath for a folder of configs
3. dictionary containing the config itself
ignored: list of regex match strings to ignore in file names
Returns:
self with the config loaded in memory
Expand All @@ -83,16 +124,84 @@ def load(self, config: Union[str, dict]) -> Parser:
dictionary: `load({"name": "config"})
"""
pass
if config is not None:
assert isinstance(
config, (str, dict)
), f"expected (str, dict) got {type(config)}"

if isinstance(ignored, str):
ignored = (ignored,)

assert isinstance(ignored, tuple), "expected ignored as tuple, got {type(ignored)}"

# if config is None, then remove the stored config
if config is None:
self.config = None
return self

# if given dictionary then stores it and end
if isinstance(config, dict):
self.config = config
return self

filename, file_extension = os.path.splitext(config)
# if the config is a single config
if file_extension == "." + self.extension:
self.config = self._load_method(config)
return self

# idea: iterate through the root folder, parse all configs
# stores the folder into a queue, then literately retrieve queue to
# maintain folder hierarchy
files = os.listdir(config)
queue: deque[str] = deque()

if self.config is None:
self.config = {}

# first iteration to get the depth 1 keys and folders
for file in files:
# skip those in the ignored
if self._search_match(file, ignored):
continue
filename, file_extension = os.path.splitext(file)
filepath = os.path.join(config, file)
if file_extension == "." + self.extension:
self.config.update(self._load_method(filepath))
elif os.path.isdir(filepath):
queue.append(filepath)

# while queue is not empty, repeat the procedure
while queue:
folder = queue.pop()
files = os.listdir(folder)
for file in files:
# skip those in the ignored
if self._search_match(file, ignored):
continue
filename, file_extension = os.path.splitext(file)
filepath = os.path.join(folder, file)
base_folder = os.path.basename(folder)
if base_folder not in self.config:
self.config[base_folder] = {}
if file_extension == "." + self.extension:
self.config[base_folder].update(self._load_method(filepath))
elif os.path.isdir(filepath):
queue.append(filepath)

def write(self, filename: str, config: Union[str, dict, None]) -> Parser:
return self

def write(self, filename: str, config: Union[str, dict, None] = None) -> Parser:
"""Writs the config to file.
Parms:
filename: the file to be output as
config: the config file, if not provided use config stored in object
depth: how deep should we go, if -1 then every config file does not
contain sub-keys else the max folder layer is the depth parameter.
Returns:
self with config written to file
Expand All @@ -101,30 +210,63 @@ def write(self, filename: str, config: Union[str, dict, None]) -> Parser:
`write("config.json", {"name": "config1"})`
"""
pass
if config is not None:
assert isinstance(
config, (str, dict)
), f"expected str, dict, None got {type(config)}"

# if given config, need to store the old config and restore later
if config is None:
# if no config is given just replace back the old config
self_config, self.config = self.config, self.config

else:
# if given config
self_config, self.config = self.config, config

self._write_method(filename)

# restore config
self = self.load(self_config)

def convert(self, config_path: str, filename: str, parser: type(Parser)) -> Parser:
return self

def convert(
self, filename: str, parser: type[Parser], config_path: Union[str, None] = None
) -> Parser:
"""Converts the config file into another file extension.
Params:
config_path: file path to the config file
filename: the file path to be written as
parser: the parser to be used for conversion
config_path: file path to the config file, if no path is given then
use the config stored in self
Returns:
self
Example:
`convert("config.json", "config.yml", YamlPaser)`
"""
if config_path is not None:
assert isinstance(
config_path, str
), f"expected str or None got {type(config_path)}"
assert isinstance(filename, str), f"expected str got {type(filename)}"
assert isinstance(parser, Parser), f"expected ktr got {type(parser)}"

# ensure the file extension are correct
config_path = self._append_extension(config_path)
filename = parser.append_extension(filename)
if config_path is not None:
config_path = self._append_extension(config_path)
filename = parser._append_extension(filename)

# load the config from given path
config = self.load(config_path)
if config_path is not None:
config = self.load(config_path).config
else:
config = self.config

# writes config based on the given parser
parser.write(filename=filename, config=config)
Expand Down
11 changes: 10 additions & 1 deletion configen/configen.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,11 @@
"""Entry point for program"""
print("Entry point")


def cli():
"""Command line interface entry point.
Example:
configen config.json
"""
# TODO:: function should be able to read a mixture of config types
pass
10 changes: 3 additions & 7 deletions configen/json_parser.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,23 @@
import json
from ruamel.yaml import YAML
from configen.base_parser import Parser

_yaml = YAML()
_yaml.indent(mapping=2, sequence=4, offset=2)


class JsonParser(Parser):
"""Json parser."""
extension = "json"

def _write_method(self, filename: str) -> Parser:
filename = self.check_extension(filename)
filename = self._append_extension(filename)

with open(filename, "w") as file:
json.dump(self.config, file, indent=4)

return self

def _load_method(self, filename: str) -> dict:
filename = self.check_extension(filename)
filename = self._append_extension(filename)

with open(filename, "r") as json_config:
config = json.load(json_config)
config = json.loads(json_config.read())

return config
19 changes: 9 additions & 10 deletions configen/yaml_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from ruamel.yaml import YAML
from configen.base_parser import Parser

Expand All @@ -10,19 +9,19 @@ class YamlParser(Parser):
"""Yaml parser."""
extension = "yml"

def _load_method(self, filename: str) -> dict:
filename = self.check_extension(filename)

with open(filename, "r") as file:
config = _yaml.load(file.read())

return config

def _write_method(self, filename: str) -> Parser:
# check if the given path ends with a yaml file extension
filename = self.check_extension(filename)
filename = self._append_extension(filename)

with open(filename, "w") as file:
_yaml.dump(self.config, file)

return self

def _load_method(self, filename: str) -> dict:
filename = self._append_extension(filename)

with open(filename, "r") as file:
config = _yaml.load(file.read())

return config
4 changes: 0 additions & 4 deletions notebooks/README.md

This file was deleted.

6 changes: 6 additions & 0 deletions sample-config/config-json/function/function_1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"function1": {
"name": "transform",
"param": "col1"
}
}
6 changes: 6 additions & 0 deletions sample-config/config-json/function/function_2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"function2": {
"name": "load",
"param": "col2"
}
}
9 changes: 9 additions & 0 deletions sample-config/config-json/main.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"name": "config-01",
"training": true,
"parameters": {
"num_nodes": 200,
"num_samples": 100,
"max_time": 40
}
}
Loading

0 comments on commit 3f414df

Please sign in to comment.