diff --git a/.github/workflows/project-actions.yml b/.github/workflows/project-actions.yml index a0b8f90..d8e899c 100644 --- a/.github/workflows/project-actions.yml +++ b/.github/workflows/project-actions.yml @@ -25,7 +25,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8 pytest coverage + python -m pip install flake8 pytest coverage pytest-cov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: install package run: | @@ -38,4 +38,4 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest and coverage run: | - coverage run -m pytest + pytest --cov-report term --cov=src/configen diff --git a/.gitignore b/.gitignore index 3445f1f..5179d2e 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ build/ # ignore data folder data/ + +.coverage diff --git a/README.md b/README.md index 4e7def7..e05e405 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,12 @@ The API documentation is available at # supports regex matching configen config_path -o config.json --ignore "config1.json" "config2.yaml" "debug.*json" ``` +- keep + - Different from ignored, instead read only files with matching file name + ```bash + # supports regex matching + configen config_path -o config.json --keep "only_this_config.yaml" + ``` - verbose - the level of logging to display ```bash @@ -152,11 +158,6 @@ pip install . ```bash # I do not want to create an standalone executable as of now, but the entry is executable -# Therefor, I recommend to add the entry script to path manually, and run from there -export configen="PATH_TO_CLONED_REPO/src/confige/cli.py" -``` - -```bash -# now you can run the confige -$configen config_path -o config.yml +# therefore, you can run configen as a module +python -m configen ``` diff --git a/requirements-dev.txt b/requirements-dev.txt index c14968b..3412d6c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,6 +3,7 @@ python-lsp-server[all] # testing pytest +pytest-cov coverage # linter and formatting diff --git a/src/configen/__main__.py b/src/configen/__main__.py new file mode 100644 index 0000000..d54d5bb --- /dev/null +++ b/src/configen/__main__.py @@ -0,0 +1,5 @@ +from configen.cli import entry +import sys + +if __name__ == "__main__": + entry(sys.argv[1:]) diff --git a/src/configen/base_parser.py b/src/configen/base_parser.py index 8381631..2531ce4 100644 --- a/src/configen/base_parser.py +++ b/src/configen/base_parser.py @@ -93,14 +93,24 @@ def _write_method(self, filename: str) -> Parser: pass @staticmethod - def _search_match(name: str, ignored: Tuple[str]) -> bool: - """Checks if the name is present in the ignored list.""" + def _search_match(name: str, check_list: Tuple[str]) -> bool: + """Checks if the name is present in the ignored list. + + Params: + name: name to be checked + check_list: list to be check + + Returns: + bool representing if name is in check list + """ + if check_list == ("",): + return False assert isinstance(name, str), f"Expected name as str, get {type(name)}" assert isinstance( - ignored, tuple - ), f"Expected ignored as tuple, get {type(name)}" + check_list, tuple + ), f"Expected check_list as tuple, get {type(check_list)}" - for ignore in ignored: + for ignore in check_list: # if there is a regex match, return true result = re.search(ignore, name) if isinstance(result, re.Match): @@ -111,13 +121,16 @@ def join( self, curr_config: Dict[str, Any], filepath: str, - ignored: Tuple[str]): + ignored: Tuple[str], + keep: Tuple[str] = ("",), + merge_conflict: bool = True): """Joins config. Params: curr_config: the existing loaded config filepath: file path to the new config to be loaded ignored: list of file names to be ignored + merge_conflict: if to merge the conflicts Returns: updated config @@ -126,17 +139,24 @@ def join( logger.debug(f"{filepath=}") # base folder will be used as the key base_folder = os.path.basename(filepath) - filename, file_extension = os.path.splitext(filepath) + filename, file_extension = os.path.splitext(base_folder) + logger.debug(f"Joining {filename=} with {file_extension=}") - if self._search_match(filepath, ignored): + if self._search_match(filename, ignored): # ignore the file if it's in the ignored list + logger.debug(f"{filename} is in ignored list, ignored") + return curr_config + + elif keep != ("",) and not self._search_match(filename, keep): + # not in the keep list + logger.debug(f"{filename} not in keep list, ignored") return curr_config if file_extension == "." + self.extension: # load the file if it's of the config format logger.info(f"{'='*5} Reading {filepath}") new_config = self._load_method(filepath) - curr_config = merge(curr_config, new_config) + curr_config = merge(curr_config, new_config, merge_conflict=merge_conflict) logger.debug(f"New config = {curr_config}") elif os.path.isdir(filepath): @@ -151,7 +171,10 @@ def join( def load( self, config: Union[str, dict, None], ignored: Tuple[str] = ("",), - add_path: bool = False + keep: Tuple[str] = ("",), + add_path: bool = False, + replace: bool = False, + merge_conflict: bool = True ) -> Parser: """Loads the config (single, or multiple files, or dict). @@ -163,7 +186,10 @@ def load( 3. dictionary containing the config itself ignored: list of regex match strings to ignore in file names + keep: list of regex match strings to keep (only) add_path: if to add the config filepath + replace: if to replace the existing config + merge_conflict: if to merge the conflicts Returns: self with the config loaded in memory @@ -198,23 +224,30 @@ def load( self.config = config return self + # if replace config, remove the stored config + if replace: + self.config = {} + filename, file_extension = os.path.splitext(config) # if the config is a single config if file_extension == "." + self.extension: - logger.info(f"{'='*5} Loading single file") - self.config = self._load_method(config) - return self + logger.info(f"{'='*5} Loading single file {config}") if self.config is None: self.config = {} - self.config = self.join(self.config, config, ignored=ignored) + self.config = self.join( + self.config, config, ignored=ignored, keep=keep, merge_conflict=merge_conflict) + # FIX: # in some occasions the folder containing the config will become the # level1 key, fix this by loading the values instead base_folder = os.path.basename(config) if base_folder in self.config: self.config = self.config[base_folder] + elif self.config.get("", None) is not None: + self.config = self.config[""] + if add_path: self.config["config_path"] = config @@ -284,7 +317,7 @@ def convert( config_path, str ), f"expected str or None got {type(config_path)}" assert isinstance(filename, str), f"expected str got {type(filename)}" - assert isinstance(parser, Parser), f"expected ktr got {type(parser)}" + assert isinstance(parser, Parser), f"expected str got {type(parser)}" # ensure the file extension are correct if config_path is not None: diff --git a/src/configen/cli.py b/src/configen/cli.py index 0bdac14..4bb1a2b 100755 --- a/src/configen/cli.py +++ b/src/configen/cli.py @@ -28,6 +28,11 @@ def entry(args): "-i", "--ignored", nargs="*", help="list of files to be ignored, support regex", type=str) + parser.add_argument( + "-k", "--keep", + nargs="*", + help="""list of files to be kept (outside of keep list will not be + included), support regex""", type=str) parser.add_argument( "-v", "--verbose", help="debug level", type=str, default="INFO") @@ -46,6 +51,7 @@ def entry(args): config_path = args.path output_path = args.output ignored = tuple(args.ignored) if args.ignored else () + keep = tuple(args.keep) if args.keep else None append_dict = json.loads(args.append) if args.append else {} read_format = args.read @@ -74,8 +80,15 @@ def entry(args): # load config for config_parser_name in read_format: + logger.info(f"Reading {config_parser_name}") config_parser = config_parser_dict[config_parser_name] - config = config_parser.load(config=config_path, ignored=ignored) + files = [config_path] + if os.path.isdir(config_path): + files = os.listdir(config_path) + files = map(lambda x: os.path.join(config_path, x), files) + files = sorted(files) + for file in files: + config = config_parser.load(config=file, ignored=ignored, keep=keep) merge(mega_config, config.config) # override the append dict diff --git a/src/configen/utils.py b/src/configen/utils.py index c0937af..e581f6c 100644 --- a/src/configen/utils.py +++ b/src/configen/utils.py @@ -8,7 +8,8 @@ def merge( a: Dict[Any, Any], b: Dict[Any, Any], path: Optional[List[str]] = None, a_parent: Optional[Dict[str, str]] = None, - b_parent: Optional[Dict[str, str]] = None): + b_parent: Optional[Dict[str, str]] = None, + merge_conflict: bool = True): """Merges dictionary b into dictionary a. Handles duplicate leaf vale @@ -31,13 +32,13 @@ def merge( logger.debug(f"Same value at {current_path}") pass # same leaf value # if both children are list, append them - elif isinstance(a[key], list) and isinstance(b[key], list): + elif isinstance(a[key], list) and isinstance(b[key], list) and merge_conflict: logger.warning(f"Merger at {current_path}") a[key] += b[key] # conflict arise when the value of a and b are different # and they are not both sub-dictionary wich we can combine again # resolve by appending them to a list - else: + elif merge_conflict: logger.warning(f"Conflict at {current_path}") if a_parent is not None and b_parent is not None: parent_key = path[-1] @@ -45,6 +46,8 @@ def merge( a_parent[parent_key] = [a, ] a_parent[parent_key].append(b) logger.warning(f"Added child to parent at {current_path}") + else: + raise ValueError(f"Conflict at {current_path}") # copy value from b if key not present in a else: a[key] = b[key] diff --git a/tests/test_parser.py b/tests/test_parser.py index 8db221d..83ae94a 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,8 +1,8 @@ """Test the parser.""" -import unittest +import os import tempfile # create temp config files +import unittest from contextlib import contextmanager -import os from typing import Tuple from configen.base_parser import Parser @@ -19,41 +19,19 @@ class TestParser(unittest.TestCase): config_truth = { "name": "config-01", "training": True, - "parameters": { - "num_nodes": 200, - "num_samples": 100, - "max_time": 40 - }, + "parameters": {"num_nodes": 200, "num_samples": 100, "max_time": 40}, "pipeline": [ - { - "name": "extraction", - "function": "etl.extraction" - }, - { - "name": "training", - "function": "model.training" - }, - { - "name": "evaluation", - "function": "model.evaluation" - }, - { - "name": "deployment", - "function": "cloud.deploy" - } + {"name": "extraction", "function": "etl.extraction"}, + {"name": "training", "function": "model.training"}, + {"name": "evaluation", "function": "model.evaluation"}, + {"name": "deployment", "function": "cloud.deploy"}, ], "function": { - "function1": { - "name": "transform", - "param": "col1" - }, - "function2": { - "name": "load", - "param": "col2" - } - } + "function1": {"name": "transform", "param": "col1"}, + "function2": {"name": "load", "param": "col2"}, + }, } - new_config = {"name": "new"} + new_config = {"name": "config-01"} # parser configs base_path = os.path.dirname(os.path.realpath(__file__)) @@ -61,11 +39,11 @@ class TestParser(unittest.TestCase): dir_path = os.path.abspath(dir_path) config_path = { "json": os.path.join(dir_path, "sample.json"), - "yml": os.path.join(dir_path, "sample.yml") + "yml": os.path.join(dir_path, "sample.yml"), } config_folder = { "json": os.path.join(dir_path, "config-json/"), - "yml": os.path.join(dir_path, "config-yml/") + "yml": os.path.join(dir_path, "config-yml/"), } config_dict_raw = {} @@ -90,9 +68,7 @@ def write_tempfile(self, filename: str, config: str): def test_write_tempfile(self): """Function should write a config to a specific filename.""" config = {"name": "config1"} - with self.write_tempfile( - filename="config.json", - config=config) as filepath: + with self.write_tempfile(filename="config.json", config=config) as filepath: with open(filepath) as file: self.assertEqual(file.read(), str(config), file.read()) @@ -111,7 +87,9 @@ def test_append_extension(self): # given a extension that is not the parser extension, parser will # still append the extension self.assertEqual( - f"config.tmp.{ext}", parser._append_extension(f"config.tmp.{ext}")) + f"config.tmp.{ext}", parser._append_extension( + f"config.tmp.{ext}") + ) # raises assertion error when give non string input self.assertRaises(AssertionError, parser._append_extension, 123) @@ -124,8 +102,8 @@ def test_load_method(self): # write a sample config sample_config = self.config_dict_raw[ext] with self.write_tempfile( - filename="config." + ext, - config=sample_config) as filename: + filename="config." + ext, config=sample_config + ) as filename: # load the config config_loaded = parser._load_method(filename) # yaml will load the config as ordered dictionary @@ -140,9 +118,7 @@ def test_write_method(self): ext = parser.extension parser = parser(self.config_truth) # write - with self.write_tempfile( - filename="config." + ext, - config="") as filename: + with self.write_tempfile(filename="config." + ext, config="") as filename: # write the config parser._write_method(filename) # rely on the implemented load method @@ -154,21 +130,24 @@ def test_load(self): """Function should be able to load single config, a folder of configs and a dictionary containing the config itself.""" for parser in self.parsers: + # dictionary containing the config parser = parser() ext = parser.extension + loaded_config = parser.load(self.config_truth, replace=True).config + msg = f"loading {parser} with dictionary containing config" + self.assertEqual(loaded_config, self.config_truth, msg) + # single config config_path = self.config_path[ext] - loaded_config = parser.load(config_path).config - self.assertEqual(loaded_config, self.config_truth, parser) + loaded_config = parser.load(config_path, replace=True).config + msg = f"loading {parser} with single config" + self.assertEqual(loaded_config, self.config_truth, msg) # folder of config config_folder = self.config_folder[ext] - loaded_config = parser.load(config_folder).config - self.assertEqual(loaded_config, self.config_truth, parser) - - # dictionary containing the config - loaded_config = parser.load(self.config_truth).config - self.assertEqual(loaded_config, self.config_truth, parser) + loaded_config = parser.load(config_folder, replace=True).config + msg = f"loading {parser} with folder of config" + self.assertEqual(loaded_config, self.config_truth, msg) def test_write(self): """Function should be able to write stored config to file, or a new @@ -177,17 +156,13 @@ def test_write(self): parser = parser(self.config_truth) ext = parser.extension # write stored config - with self.write_tempfile( - filename="config." + ext, - config="") as filename: + with self.write_tempfile(filename="config." + ext, config="") as filename: parser.write(filename) loaded_config = parser._load_method(filename) self.assertEqual(loaded_config, self.config_truth, parser) # write new config - with self.write_tempfile( - filename="config." + ext, - config="") as filename: + with self.write_tempfile(filename="config." + ext, config="") as filename: parser.write(filename, self.new_config) loaded_config = parser._load_method(filename) self.assertEqual(loaded_config, self.new_config, parser) @@ -196,16 +171,19 @@ def test_search_method(self): """Function return True if regex match, else False.""" parser = Parser() ignored = ("pipeline.*",) - self.assertTrue(parser._search_match("pipeline.json", ignored=ignored)) - self.assertFalse(parser._search_match("nihao.yml", ignored=ignored)) + self.assertTrue(parser._search_match( + "pipeline.json", check_list=ignored)) + self.assertFalse(parser._search_match("nihao.yml", check_list=ignored)) ignored = ("^pipeline.*",) - self.assertTrue(parser._search_match("pipeline.json", ignored=ignored)) - self.assertFalse(parser._search_match("nihao.yml", ignored=ignored)) + self.assertTrue(parser._search_match( + "pipeline.json", check_list=ignored)) + self.assertFalse(parser._search_match("nihao.yml", check_list=ignored)) ignored = (".*json",) - self.assertTrue(parser._search_match("pipeline.json", ignored=ignored)) - self.assertFalse(parser._search_match("nihao.yml", ignored=ignored)) + self.assertTrue(parser._search_match( + "pipeline.json", check_list=ignored)) + self.assertFalse(parser._search_match("nihao.yml", check_list=ignored)) def test_ignore(self): """Function should be able to ignore some config files.""" @@ -215,10 +193,9 @@ def test_ignore(self): for parser in self.parsers: parser = parser() ext = parser.extension - parser.load(config=self.config_folder[ext], ignored="pipeline.*") + parser.load( + config=self.config_folder[ext], ignored=("pipeline.*",)) loaded_config = dict(parser.config) - print(loaded_config) - print(config_truth) self.assertEqual(loaded_config, config_truth, parser) def test_convert(self): @@ -235,24 +212,29 @@ def test_convert(self): other_ext = parser.extension # save the stored config with self.write_tempfile( - filename="config." + other_ext, - config="") as filename: + filename="config." + other_ext, config="" + ) as filename: parser.convert(filename=filename, parser=other_parser) loaded_config = other_parser._load_method(filename) - self.assertEqual(loaded_config, self.new_config, f"{parser}, {other_parser}") + self.assertEqual( + loaded_config, self.new_config, f"{parser}, {other_parser}" + ) # save the config path with self.write_tempfile( - filename="config." + other_ext, - config="") as filename: + filename="config." + other_ext, config="" + ) as filename: config_path = self.config_path[ext] parser.convert( - filename=filename, parser=other_parser, config_path=config_path) + filename=filename, parser=other_parser, config_path=config_path + ) loaded_config = other_parser._load_method(filename) # yaml load config as ordered dict # we convert it back to dict for comparison loaded_config = dict(loaded_config) - self.assertEqual(loaded_config, self.config_truth, f"{parser}, {other_parser}") + self.assertEqual( + loaded_config, self.config_truth, f"{parser}, {other_parser}" + ) if __name__ == "__main__":