diff --git a/terracotta/scripts/click_utils.py b/terracotta/scripts/click_utils.py index fcc72f96..bc69398d 100644 --- a/terracotta/scripts/click_utils.py +++ b/terracotta/scripts/click_utils.py @@ -50,23 +50,26 @@ def convert(self, value: str, *args: Any) -> RasterPatternType: for before_field, field_name, _, _ in parsed_value: glob_pattern += before_field regex_pattern += re.escape(before_field) - if field_name is None: + if field_name is None: # no placeholder continue - if field_name == '': + glob_pattern += '*' + if field_name == '': # unnamed placeholder regex_pattern += '.*?' - elif field_name in keys: + elif field_name in keys: # duplicate placeholder key_group_number = keys.index(field_name) + 1 regex_pattern += f'\\{key_group_number}' - else: + else: # new placeholder keys.append(field_name) - regex_pattern += f'(?P<{field_name}>[a-zA-Z0-9]+)' - glob_pattern += '*' + regex_pattern += f'(?P<{field_name}>[^\\W_]+)' if not keys: self.fail('Pattern must contain at least one placeholder') + if not all(re.match(r'\w', key) for key in keys): + self.fail('Key names must be alphanumeric') + # use glob to find candidates, regex to extract placeholder values - candidates = [os.path.realpath(candidate) for candidate in glob.glob(glob_pattern)] + candidates = map(os.path.realpath, glob.glob(glob_pattern)) matched_candidates = [re.match(regex_pattern, candidate) for candidate in candidates] if not any(matched_candidates): diff --git a/tests/scripts/test_create_database.py b/tests/scripts/test_create_database.py index 7d87fdf7..9aa72fdd 100644 --- a/tests/scripts/test_create_database.py +++ b/tests/scripts/test_create_database.py @@ -41,39 +41,61 @@ 'expected_keys': ['name'], 'expected_datasets': [('foo',)] }, - { # unicode + { # case-sensitivity + 'filenames': ['bAr.tif', 'FOO.tif'], + 'input_pattern': '{nAmE}.tif', + 'expected_keys': ['nAmE'], + 'expected_datasets': [('bAr',), ('FOO',)] + }, + { # unicode path 'filenames': ['$*)-?:_«}ä»/foo.tif'], 'input_pattern': '{}/{name}.tif', 'expected_keys': ['name'], 'expected_datasets': [('foo',)] + }, + { # unicode key + 'filenames': ['günther.tif'], + 'input_pattern': '{bärbel}.tif', + 'expected_keys': ['bärbel'], + 'expected_datasets': [('günther',)] } ) INVALID_TEST_CASES = ( - { + { # no matching files 'filenames': [], - 'input_pattern': 'notafile{key}.tif', + 'input_pattern': '{key}.tif', 'error_contains': 'matches no files' }, - { + { # duplicate keys in different folders 'filenames': ['dir1/foo.tif', 'dir2/foo.tif'], 'input_pattern': '{}/{name}.tif', 'error_contains': 'duplicate keys' }, - { + { # duplicate keys through wildcard 'filenames': ['S2_B04.tif', 'S2_20180101_B04.tif'], 'input_pattern': '{sensor}_{}.tif', 'error_contains': 'duplicate keys' }, - { + { # no groups in pattern 'filenames': [], 'input_pattern': 'notafile.tif', 'error_contains': 'at least one placeholder' }, - { + { # only wildcards in pattern + 'filenames': [], + 'input_pattern': '{}.tif', + 'error_contains': 'at least one placeholder' + }, + { # stray { 'filenames': [], 'input_pattern': 'notafile{.tif', 'error_contains': 'invalid pattern' + }, + { # invalid placeholder name + 'filenames': ['foo.tif'], + 'input_pattern': '{(foo)}.tif', + 'error_contains': 'must be alphanumeric' } ) @@ -130,7 +152,7 @@ def test_create_database_pattern(case, abspath, raster_file, tmpworkdir): from terracotta import get_driver driver = get_driver(str(outfile), provider='sqlite') assert driver.key_names == tuple(case['expected_keys']) - assert tuple(driver.get_datasets().keys()) == tuple(case['expected_datasets']) + assert all(ds in driver.get_datasets() for ds in case['expected_datasets']) @pytest.mark.parametrize('case', INVALID_TEST_CASES)