Skip to content

Commit

Permalink
rename create-database to ingest; enhance optimize-rasters
Browse files Browse the repository at this point in the history
  • Loading branch information
dionhaefner committed Oct 29, 2018
1 parent 8cf743e commit 5aba95f
Show file tree
Hide file tree
Showing 10 changed files with 248 additions and 108 deletions.
8 changes: 4 additions & 4 deletions terracotta/scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import click

from terracotta.scripts.click_utils import TOMLFile
from terracotta.scripts.click_types import TOMLFile
from terracotta import get_settings, update_settings, logs, __version__


Expand Down Expand Up @@ -42,7 +42,7 @@ def cli(ctx: click.Context,
def entrypoint() -> None:
try:
cli(obj={})
except Exception as exc:
except Exception:
import logging
logger = logging.getLogger(__name__)
logger.exception('Uncaught exception!', exc_info=True)
Expand All @@ -52,8 +52,8 @@ def entrypoint() -> None:
from terracotta.scripts.connect import connect
cli.add_command(connect)

from terracotta.scripts.create_database import create_database
cli.add_command(create_database)
from terracotta.scripts.ingest import ingest
cli.add_command(ingest)

from terracotta.scripts.optimize_rasters import optimize_rasters
cli.add_command(optimize_rasters)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""scripts/click_utils.py
"""scripts/click_types.py
Custom click parameter types and utilities.
"""
Expand All @@ -7,7 +7,6 @@
import pathlib
import glob
import re
import os
import string

import click
Expand All @@ -31,45 +30,69 @@ def convert(self, *args: Any) -> pathlib.Path: # type: ignore
RasterPatternType = Tuple[List[str], Dict[Tuple[str, ...], str]]


def _parse_raster_pattern(raster_pattern: str) -> Tuple[List[str], str, str]:
"""Parse a raster pattern string using Python format syntax.
Extracts names of unique placeholders, a glob pattern
and a regular expression to retrieve files matching the given pattern.
Example:
>>> _parse_raster_pattern('{key1}/{key2}_{}.tif')
(['key1', 'key2'], '*/*_*.tif', '(?P<key1>[^\\W_]+)/(?P<key2>[^\\W_]+)_.*?\\.tif')
"""

# raises ValueError on invalid patterns
parsed_value = string.Formatter().parse(raster_pattern)

keys: List[str] = []
glob_pattern: List[str] = []
regex_pattern: List[str] = []

for before_field, field_name, _, _ in parsed_value:
glob_pattern += before_field
regex_pattern += re.escape(before_field)

if field_name is None:
# no placeholder
continue

glob_pattern.append('*')

if field_name == '':
# unnamed placeholder
regex_pattern.append('.*?')
elif field_name in keys:
# duplicate placeholder
key_group_number = keys.index(field_name) + 1
regex_pattern.append(rf'\{key_group_number}')
else:
# new placeholder
keys.append(field_name)
regex_pattern += rf'(?P<{field_name}>[^\W_]+)'

return keys, ''.join(glob_pattern), ''.join(regex_pattern)


class RasterPattern(click.ParamType):
"""Expands a pattern following the Python format specification to matching files"""
name = 'raster-pattern'

def convert(self, value: str, *args: Any) -> RasterPatternType:
value = os.path.realpath(value)

try:
parsed_value = list(string.Formatter().parse(value))
keys, glob_pattern, regex_pattern = _parse_raster_pattern(value)
except ValueError as exc:
self.fail(f'Invalid pattern: {exc!s}')

# extract keys from format string and assemble glob and regex patterns matching it
keys = []
glob_pattern = ''
regex_pattern = ''
for before_field, field_name, _, _ in parsed_value:
glob_pattern += before_field
regex_pattern += re.escape(before_field)
if field_name is None: # no placeholder
continue
glob_pattern += '*'
if field_name == '': # unnamed placeholder
regex_pattern += '.*?'
elif field_name in keys: # duplicate placeholder
key_group_number = keys.index(field_name) + 1
regex_pattern += f'\\{key_group_number}'
else: # new placeholder
keys.append(field_name)
regex_pattern += f'(?P<{field_name}>[^\\W_]+)'

if not keys:
self.fail('Pattern must contain at least one placeholder')

if not all(re.match(r'\w', key) for key in keys):
self.fail('Key names must be alphanumeric')

# use glob to find candidates, regex to extract placeholder values
candidates = map(os.path.realpath, glob.glob(glob_pattern))
candidates = glob.glob(glob_pattern)
matched_candidates = [re.match(regex_pattern, candidate) for candidate in candidates]

if not any(matched_candidates):
Expand Down
2 changes: 1 addition & 1 deletion terracotta/scripts/connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import click

from terracotta.scripts.click_utils import Hostname
from terracotta.scripts.click_types import Hostname
from terracotta.scripts.http_utils import find_open_port


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""scripts/create_database.py
"""scripts/ingest.py
A convenience tool to create a Terracotta database from some raster files.
"""
Expand All @@ -10,32 +10,29 @@
import click
import tqdm

from terracotta.scripts.click_utils import RasterPattern, RasterPatternType, PathlibPath
from terracotta.scripts.click_types import RasterPattern, RasterPatternType, PathlibPath

logger = logging.getLogger(__name__)


@click.command('create-database',
short_help='Create a new SQLite raster database from a collection of raster files.')
@click.command('ingest',
short_help='Ingest a collection of raster files into a SQLite database.')
@click.argument('raster-pattern', type=RasterPattern(), required=True)
@click.option('-o', '--output-file', required=True, help='Path to output file',
type=PathlibPath(dir_okay=False, writable=True))
@click.option('--overwrite', is_flag=True, default=False,
help='Always overwrite existing database without asking')
@click.option('--skip-metadata', is_flag=True, default=False,
help='Speed up ingestion by not pre-computing metadata '
help='Speed up ingestion by skipping computation of metadata '
'(will be computed on first request instead)')
@click.option('--rgb-key', default=None,
help='Key to use for RGB compositing [default: last key in pattern]')
@click.option('-q', '--quiet', is_flag=True, default=False, show_default=True,
help='Suppress all output to stdout')
def create_database(raster_pattern: RasterPatternType,
output_file: Path,
overwrite: bool = False,
skip_metadata: bool = False,
rgb_key: str = None,
quiet: bool = False) -> None:
"""Create a new SQLite raster database from a collection of raster files.
def ingest(raster_pattern: RasterPatternType,
output_file: Path,
skip_metadata: bool = False,
rgb_key: str = None,
quiet: bool = False) -> None:
"""Ingest a collection of raster files into a SQLite database.
First argument is a format pattern defining paths and keys of all raster files.
Expand All @@ -45,16 +42,13 @@ def create_database(raster_pattern: RasterPatternType,
The empty group {} is replaced by a wildcard matching anything (similar to * in glob patterns).
Existing datasets are silently overwritten.
This command only supports the creation of a simple SQLite database without any additional
metadata. For more sophisticated use cases use the Terracotta Python API.
"""
from terracotta import get_driver

if output_file.is_file() and not overwrite:
click.confirm(f'Existing output file {output_file} will be overwritten. Continue?',
abort=True)
output_file.unlink()

keys, raster_files = raster_pattern

if rgb_key is not None:
Expand All @@ -71,7 +65,16 @@ def push_to_last(seq: Sequence[Any], index: int) -> Tuple[Any, ...]:
raster_files = {push_to_last(k, rgb_idx): v for k, v in raster_files.items()}

driver = get_driver(output_file)
driver.create(keys)

if not output_file.is_file():
driver.create(keys)

if tuple(keys) != driver.key_names:
click.echo(
f'Database file {output_file!s} has incompatible key names {driver.key_names}',
err=True
)
click.Abort()

with driver.connect():
progress = tqdm.tqdm(raster_files.items(), desc='Ingesting raster files', disable=quiet)
Expand Down
Loading

0 comments on commit 5aba95f

Please sign in to comment.