From d970cfe8f0e6593bc7df5cbf5523abcb3378cb2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20S=C3=A1nchez-Gallego?= Date: Thu, 12 Dec 2024 19:45:16 -0800 Subject: [PATCH] Add option to process all files in a directory --- src/too/__main__.py | 17 +++++++++++++---- tests/test_cli.py | 3 +-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/too/__main__.py b/src/too/__main__.py index 502417f..1ef45b9 100644 --- a/src/too/__main__.py +++ b/src/too/__main__.py @@ -61,8 +61,9 @@ def process( list[pathlib.Path] | None, typer.Argument( exists=True, - dir_okay=False, - help="The list of files to process.", + dir_okay=True, + help="The list of files to process. If a directory is passed, all the " + "files in the directory will be processed.", ), ] = None, verbose: Annotated[ @@ -121,9 +122,17 @@ def process( ) if files is not None and len(files) > 0: - log.debug("Reading input files.") - targets = polars.DataFrame({}, schema=too_dtypes) + process_files: list[pathlib.Path] = [] for file in files: + if file.is_dir(): + process_files.extend(file.glob("*.csv")) + process_files.extend(file.glob("*.parquet")) + else: + process_files.append(file) + + log.debug(f"Reading {len(files)} input file(s).") + targets = polars.DataFrame({}, schema=too_dtypes) + for file in process_files: targets = targets.vstack(read_too_file(file, cast=True)) log.info("Loading targets into the database.") diff --git a/tests/test_cli.py b/tests/test_cli.py index e60e885..02894c5 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -70,8 +70,7 @@ def test_cli_update(files_path: pathlib.Path, too_mock: polars.DataFrame): "sdss5db_too_test", "--write-log", str(files_path / "too.log"), - str(files_path / "too2.csv"), - str(files_path / "too3.parquet"), + str(files_path), ], )