Skip to content

Commit

Permalink
200 IQ raster pattern implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
dionhaefner committed Oct 27, 2018
1 parent 759fa13 commit 153c7a6
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 12 deletions.
29 changes: 19 additions & 10 deletions terracotta/scripts/click_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,29 +36,38 @@ class RasterPattern(click.ParamType):
name = 'raster-pattern'

def convert(self, value: str, *args: Any) -> RasterPatternType:
value = os.path.realpath(value).replace('\\', '\\\\')
value = os.path.realpath(value)

try:
parsed_value = list(string.Formatter().parse(value))
except ValueError as exc:
self.fail(f'Invalid pattern: {exc!s}')

# extract keys from format string and assemble glob and regex patterns matching it
keys = [field_name for _, field_name, _, _ in parsed_value if field_name]
glob_pattern = value.format(**{k: '*' for k in keys})
regex_pattern = value.format(**{k: f'(?P<{k}>\\w+)' for k in keys})
keys = []
glob_pattern = ''
regex_pattern = ''
for before_field, field_name, _, _ in parsed_value:
glob_pattern += before_field
regex_pattern += re.escape(before_field)
if field_name is None:
continue
if field_name == '':
regex_pattern += '.*?'
elif field_name in keys:
key_group_number = keys.index(field_name) + 1
regex_pattern += f'\\{key_group_number}'
else:
keys.append(field_name)
regex_pattern += f'(?P<{field_name}>[a-zA-Z0-9]+)'
glob_pattern += '*'

if not keys:
self.fail('Pattern must contain at least one placeholder')

try:
compiled_pattern = re.compile(regex_pattern)
except re.error as exc:
self.fail(f'Could not parse pattern to regex: {exc!s}')

# use glob to find candidates, regex to extract placeholder values
candidates = [os.path.realpath(candidate) for candidate in glob.glob(glob_pattern)]
matched_candidates = [compiled_pattern.match(candidate) for candidate in candidates]
matched_candidates = [re.match(regex_pattern, candidate) for candidate in candidates]

if not any(matched_candidates):
self.fail('Given pattern matches no files')
Expand Down
6 changes: 4 additions & 2 deletions terracotta/scripts/create_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,13 @@ def create_database(raster_pattern: RasterPatternType,
quiet: bool = False) -> None:
"""Create a new SQLite raster database from a collection of raster files.
First arguments is a format pattern defining paths and keys of all raster files.
First argument is a format pattern defining paths and keys of all raster files.
Example:
terracotta create-database /path/to/rasters/{name}/{date}_{band}.tif -o out.sqlite
terracotta create-database /path/to/rasters/{name}/{date}_{band}{}.tif -o out.sqlite
The empty group {} is replaced by a wildcard matching anything (similar to * in glob patterns).
This command only supports the creation of a simple SQLite database without any additional
metadata. For more sophisticated use cases use the Terracotta Python API.
Expand Down
133 changes: 133 additions & 0 deletions tests/scripts/test_create_database.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,92 @@
import pytest
from click.testing import CliRunner

import os
import shutil

TEST_CASES = (
{ # basic
'filenames': ['foo.tif'],
'input_pattern': '{name}.tif',
'expected_keys': ['name'],
'expected_datasets': [('foo',)]
},
{ # two keys
'filenames': ['S2_20180101_B04.tif'],
'input_pattern': '{sensor}_{date}_B04.tif',
'expected_keys': ['sensor', 'date'],
'expected_datasets': [('S2', '20180101')]
},
{ # subfolder
'filenames': ['S2/20180101_B04.tif'],
'input_pattern': '{sensor}/{date}_B04.tif',
'expected_keys': ['sensor', 'date'],
'expected_datasets': [('S2', '20180101')]
},
{ # wildcard magic
'filenames': ['S2/20180101_B04.tif', 'S2/20180101_B05_median.tif'],
'input_pattern': '{sensor}/{date}_{band}{}.tif',
'expected_keys': ['sensor', 'date', 'band'],
'expected_datasets': [('S2', '20180101', 'B04'), ('S2', '20180101', 'B05')]
},
{ # keys occuring more than once
'filenames': ['S2/20180101/S2_20180101_B04.tif'],
'input_pattern': '{sensor}/{date}/{sensor}_{date}_{band}.tif',
'expected_keys': ['sensor', 'date', 'band'],
'expected_datasets': [('S2', '20180101', 'B04')]
},
{ # {} in filename
'filenames': ['bar.tif', '{foo}.tif'],
'input_pattern': '{{{name}}}.tif',
'expected_keys': ['name'],
'expected_datasets': [('foo',)]
},
{ # unicode
'filenames': ['$*)-?:_«}ä»/foo.tif'],
'input_pattern': '{}/{name}.tif',
'expected_keys': ['name'],
'expected_datasets': [('foo',)]
}
)

INVALID_TEST_CASES = (
{
'filenames': [],
'input_pattern': 'notafile{key}.tif',
'error_contains': 'matches no files'
},
{
'filenames': ['dir1/foo.tif', 'dir2/foo.tif'],
'input_pattern': '{}/{name}.tif',
'error_contains': 'duplicate keys'
},
{
'filenames': ['S2_B04.tif', 'S2_20180101_B04.tif'],
'input_pattern': '{sensor}_{}.tif',
'error_contains': 'duplicate keys'
},
{
'filenames': [],
'input_pattern': 'notafile.tif',
'error_contains': 'at least one placeholder'
},
{
'filenames': [],
'input_pattern': 'notafile{.tif',
'error_contains': 'invalid pattern'
}
)


@pytest.fixture()
def tmpworkdir(tmpdir):
orig_dir = os.getcwd()
try:
os.chdir(tmpdir)
yield tmpdir
finally:
os.chdir(orig_dir)


def test_create_database(raster_file, tmpdir):
from terracotta.scripts import cli
Expand All @@ -18,6 +105,52 @@ def test_create_database(raster_file, tmpdir):
assert driver.get_datasets() == {('img',): str(raster_file)}


@pytest.mark.parametrize('case', TEST_CASES)
@pytest.mark.parametrize('abspath', [True, False])
def test_create_database_pattern(case, abspath, raster_file, tmpworkdir):
from terracotta.scripts import cli

for infile in case['filenames']:
temp_infile = tmpworkdir / infile
os.makedirs(temp_infile.dirpath(), exist_ok=True)
shutil.copy(raster_file, temp_infile)

outfile = tmpworkdir / 'out.sqlite'

if abspath:
input_pattern = os.path.abspath(tmpworkdir / case['input_pattern'])
else:
input_pattern = case['input_pattern']

runner = CliRunner()
result = runner.invoke(cli.cli, ['create-database', input_pattern, '-o', str(outfile)])
assert result.exit_code == 0, result.output
assert outfile.check()

from terracotta import get_driver
driver = get_driver(str(outfile), provider='sqlite')
assert driver.key_names == tuple(case['expected_keys'])
assert tuple(driver.get_datasets().keys()) == tuple(case['expected_datasets'])


@pytest.mark.parametrize('case', INVALID_TEST_CASES)
def test_create_database_invalid_pattern(case, raster_file, tmpworkdir):
from terracotta.scripts import cli

for infile in case['filenames']:
temp_infile = tmpworkdir / infile
os.makedirs(temp_infile.dirpath(), exist_ok=True)
shutil.copy(raster_file, temp_infile)

outfile = tmpworkdir / 'out.sqlite'
input_pattern = case['input_pattern']

runner = CliRunner()
result = runner.invoke(cli.cli, ['create-database', input_pattern, '-o', str(outfile)])
assert result.exit_code != 0
assert case['error_contains'].lower() in result.output.lower()


def test_create_database_rgb_key(raster_file, tmpdir):
from terracotta.scripts import cli

Expand Down

0 comments on commit 153c7a6

Please sign in to comment.