From e98197bba771a24a8baf3f8bf24909ef9d09c35e Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Fri, 30 Aug 2024 10:15:58 +0200 Subject: [PATCH] Code styling (#27) * Update docs * Update docs and meta package * Update docs and meta package * Code styling --- README.rst | 36 ++--- setup.cfg | 5 + src/c3s_sm/cli.py | 350 +++++++++++++++++++++++++--------------- src/c3s_sm/const.py | 36 +++-- src/c3s_sm/download.py | 101 +++++++----- src/c3s_sm/interface.py | 104 +++++++----- src/c3s_sm/metadata.py | 193 +++++++++++++--------- src/c3s_sm/misc.py | 30 ++-- src/c3s_sm/reshuffle.py | 106 +++++++----- 9 files changed, 591 insertions(+), 370 deletions(-) diff --git a/README.rst b/README.rst index d1b3416..a05a59d 100644 --- a/README.rst +++ b/README.rst @@ -2,18 +2,20 @@ c3s_sm ============ +|ci| |cov| |pip| |doc| -.. image:: https://github.com/TUW-GEO/c3s_sm/workflows/Automated%20Tests/badge.svg?branch=master +.. |ci| image:: https://github.com/TUW-GEO/c3s_sm/actions/workflows/ci.yml/badge.svg?branch=master :target: https://github.com/TUW-GEO/c3s_sm/actions -.. image:: https://coveralls.io/repos/github/TUW-GEO/c3s_sm/badge.svg?branch=master - :target: https://coveralls.io/github/TUW-GEO/c3s_sm?branch=master +.. |cov| image:: https://coveralls.io/repos/TUW-GEO/c3s_sm/badge.png?branch=master + :target: https://coveralls.io/r/TUW-GEO/c3s_sm?branch=master + +.. |pip| image:: https://badge.fury.io/py/c3s_sm.svg + :target: http://badge.fury.io/py/c3s-sm + +.. |doc| image:: https://readthedocs.org/projects/c3s_sm/badge/?version=latest + :target: http://c3s-sm.readthedocs.org/ -.. image:: https://badge.fury.io/py/c3s-sm.svg - :target: https://badge.fury.io/py/c3s-sm - -.. image:: https://readthedocs.org/projects/c3s_sm/badge/?version=latest - :target: https://c3s-sm.readthedocs.io/en/latest/ Processing tools and tutorials for users of the C3S satellite soil moisture service ( https://doi.org/10.24381/cds.d7782f18 ). Written in Python. @@ -21,7 +23,7 @@ service ( https://doi.org/10.24381/cds.d7782f18 ). Written in Python. Installation ============ -The c3s_sm package can be installed via +The c3s_sm package and all required dependencies can be installed via .. code-block:: shell @@ -30,7 +32,7 @@ The c3s_sm package can be installed via Tutorials ========= -We provide (general) tutorials on using the C3S Soil Moisture data: +We provide tutorials on using the C3S Soil Moisture data: - `Tutorial 1: DataAccess from CDS & Anomaly computation `_ @@ -52,14 +54,6 @@ We are happy if you want to contribute. Please raise an issue explaining what is missing or if you find a bug. We will also gladly accept pull requests against our master branch for new features or bug fixes. -Development setup ------------------ - -For Development we also recommend a ``conda`` environment. You can create one -including test dependencies and debugger by running -``conda env create -f environment.yml``. This will create a new ``c3s_sm`` -environment which you can activate by using ``source activate c3s_sm``. - Guidelines ---------- @@ -71,11 +65,5 @@ If you want to contribute please follow these steps: - make a new feature branch from the c3s_sm master branch - Add your feature - Please include tests for your contributions in one of the test directories. - We use py.test so a simple function called test_my_feature is enough - submit a pull request to our master branch -Note -==== - -This project has been set up using PyScaffold 2.5. For details and usage -information on PyScaffold see http://pyscaffold.readthedocs.org/. diff --git a/setup.cfg b/setup.cfg index d2fc0fd..148c2b4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -141,3 +141,8 @@ version = 4.5 package = c3s_sm extensions = no_skeleton + +[yapf] +based_on_style = yapf +indent_width = 4 +column_limit = 79 \ No newline at end of file diff --git a/src/c3s_sm/cli.py b/src/c3s_sm/cli.py index beb2745..dc40e81 100644 --- a/src/c3s_sm/cli.py +++ b/src/c3s_sm/cli.py @@ -7,37 +7,71 @@ from c3s_sm.reshuffle import img2ts, extend_ts from c3s_sm.const import fntempl as _default_template, check_api_read, cds_api_url -@click.command("download", context_settings={'show_default': True}, - short_help="Download C3S SM data from Climate Data Store.") + +@click.command( + "download", + context_settings={'show_default': True}, + short_help="Download C3S SM data from Climate Data Store.") @click.argument("path", type=click.Path(writable=True)) -@click.option('--startdate', '-s', - type=click.STRING, default='1978-11-01', - help="Startdate in format YYYY-MM-DD. If not given, " - "then the first available date of the product is used.") -@click.option('--enddate', '-e', - type=click.STRING, default=str(datetime.now().date()), - help="Enddate in format YYYY-MM-DD. If not given, " - "then the current date is used.") -@click.option("-p", "--product", type=click.STRING, default='combined', - help="The C3S SM sensor product to download. Choose one of " - "combined, active, passive.") -@click.option("-f", "--freq", type=click.STRING, default="daily", - help="The C3S SM sensor product temporal sampling frequency to download. " - "Choose one of: daily, dekadal, monthly.") -@click.option("-v", '--version', type=click.STRING, default="v202212", - help="The C3S SM product version to download. " - "Choose one that is on the CDS: " - "e.g. deprecated_v20191, v201706, v201812, " - "v201912_1, v202012, v202212, v202312") -@click.option("-k", "--keep", type=click.BOOL, default=False, - help="Also keep the original, temporarily downloaded image stack " - "instead of deleting it after extracting individual images.") -@click.option("--cds_token", type=click.STRING, default=None, - help="To identify with the CDS, required if no .cdsapi file exists. " - "Consists of your UID and API Key . Both can be " - "found on your CDS User profile page.") -def cli_download(path, startdate, enddate, product, freq, version, - keep, cds_token=None): +@click.option( + '--startdate', + '-s', + type=click.STRING, + default='1978-11-01', + help="Startdate in format YYYY-MM-DD. If not given, " + "then the first available date of the product is used.") +@click.option( + '--enddate', + '-e', + type=click.STRING, + default=str(datetime.now().date()), + help="Enddate in format YYYY-MM-DD. If not given, " + "then the current date is used.") +@click.option( + "-p", + "--product", + type=click.STRING, + default='combined', + help="The C3S SM sensor product to download. Choose one of " + "combined, active, passive.") +@click.option( + "-f", + "--freq", + type=click.STRING, + default="daily", + help="The C3S SM sensor product temporal sampling frequency to download. " + "Choose one of: daily, dekadal, monthly.") +@click.option( + "-v", + '--version', + type=click.STRING, + default="v202212", + help="The C3S SM product version to download. " + "Choose one that is on the CDS: " + "e.g. deprecated_v20191, v201706, v201812, " + "v201912_1, v202012, v202212, v202312") +@click.option( + "-k", + "--keep", + type=click.BOOL, + default=False, + help="Also keep the original, temporarily downloaded image stack " + "instead of deleting it after extracting individual images.") +@click.option( + "--cds_token", + type=click.STRING, + default=None, + help="To identify with the CDS, required if no .cdsapi file exists. " + "Consists of your UID and API Key . Both can be " + "found on your CDS User profile page.") +def cli_download(path, + startdate, + enddate, + product, + freq, + version, + keep, + cds_token=None): """ Download C3S SM data within a chosen period. NOTE: Before using this program, create a CDS account and set up a `.cdsapirc` file as described @@ -54,7 +88,8 @@ def cli_download(path, startdate, enddate, product, freq, version, # The docstring above is slightly different to the normal python one to # display it properly on the command line. - url = os.environ.get('CDSAPI_URL', "https://cds.climate.copernicus.eu/api/v2") + url = os.environ.get('CDSAPI_URL', + "https://cds.climate.copernicus.eu/api/v2") os.environ['CDSAPI_URL'] = url if cds_token is not None: @@ -69,23 +104,36 @@ def cli_download(path, startdate, enddate, product, freq, version, f"from {startdate.isoformat()} to {enddate.isoformat()} " f"into {path}.") - download_and_extract(path, startdate=startdate, enddate=enddate, - product=product, freq=freq, version=version, - keep_original=keep) + download_and_extract( + path, + startdate=startdate, + enddate=enddate, + product=product, + freq=freq, + version=version, + keep_original=keep) -@click.command("update_img", context_settings={'show_default': True}, - short_help="Extend an existing record by downloading new files.") +@click.command( + "update_img", + context_settings={'show_default': True}, + short_help="Extend an existing record by downloading new files.") @click.argument("path", type=click.Path(writable=True)) -@click.option("--fntempl", type=click.STRING, default=_default_template, - help="In case files don't follow the usual naming convention, " - "a custom template can be given here. Must contain fields " - "`freq`, `prod`, `vers` and `datetime`") -@click.option("--cds_token", type=click.STRING, default=None, - help="To identify with the CDS. Required if no .cdsapi file exists. " - "In the home directory (see documentation)." - "Consists of your UID and API Key . Both can be " - "found under your CDS User profile page.") +@click.option( + "--fntempl", + type=click.STRING, + default=_default_template, + help="In case files don't follow the usual naming convention, " + "a custom template can be given here. Must contain fields " + "`freq`, `prod`, `vers` and `datetime`") +@click.option( + "--cds_token", + type=click.STRING, + default=None, + help="To identify with the CDS. Required if no .cdsapi file exists. " + "In the home directory (see documentation)." + "Consists of your UID and API Key . Both can be " + "found under your CDS User profile page.") def cli_update_img(path, fntempl, cds_token=None): """ Extend a locally existing C3S SM record by downloading new files that @@ -123,66 +171,106 @@ def cli_update_img(path, fntempl, cds_token=None): startdate = first_missing_date(props['datetime'], freq=freq) - print(f"Update C3S SM images: " - f"Fetching latest data for C3S SM CDR/ICDR {freq} {product} {version} " - f"after {startdate.isoformat()} into {path}.") + print( + f"Update C3S SM images: " + f"Fetching latest data for C3S SM CDR/ICDR {freq} {product} {version} " + f"after {startdate.isoformat()} into {path}.") + + download_and_extract( + path, + startdate=startdate, + freq=freq, + version=version, + product=product, + keep_original=False) - download_and_extract(path, startdate=startdate, freq=freq, - version=version, product=product, - keep_original=False) -@click.command("reshuffle", context_settings={'show_default': True}, - short_help="Convert C3S SM images into time series.") +@click.command( + "reshuffle", + context_settings={'show_default': True}, + short_help="Convert C3S SM images into time series.") @click.argument("img_path", type=click.Path(readable=True)) @click.argument("ts_path", type=click.Path(writable=True)) -@click.option('--startdate', '-s', - type=click.STRING, default=None, - help="Format YYYY-MM-DD | First image time stamp to include in the" - "time series. [default: Date of the first available image]") -@click.option('--enddate', '-e', - type=click.STRING, default=None, - help="Format YYYY-MM-DD | Last image time stamp to include in the" - "time series. [default: Date of the last available image]") -@click.option('--parameters', '-p', multiple=True, type=click.STRING, - default=None, - help="STRING | Data variable in images to include " - "in time series. If not specified, then all variables are " - "included. You can pass this option multiple times, " - "e.g. `... -p sm -p flag ...`! " - "[default: ALL parameters are included]") -@click.option('--land', type=click.BOOL, default=True, - help="True or False | Activating this flag will exclude grid " - "cells over water are not converted to time series. " - "Leads to faster processing and smaller files, but a varying " - "number of time series in each file.") -@click.option('--bbox', nargs=4, type=click.FLOAT, - help="4 NUMBERS | min_lon min_lat max_lon max_lat. " - "Set Bounding Box (lower left and upper right corner) " - "of area to reshuffle (WGS84). [default: -180 -90 180 90]") -@click.option('--ignore_meta', type=click.BOOL, default=False, - help="True or False | Activate to NOT transfer netcdf attributes" - " from images into time series files. E.g. for unsupported " - "data versions.") -@click.option("--fntempl", type=click.STRING, default=_default_template, - help="STRING CONTAINING {PLACEHOLDERS} | If image files don't " - "follow the usual naming convention, a custom template can " - "be given here. Must contain {placeholder} fields for " - "{freq}, {product}, {version} and {datetime}.") -@click.option("--overwrite", type=click.BOOL, default=False, - help="True or False | If this is True, then any files that already" - " exist in the `output_path`, will be replaced by the newly " - "created ones. If this is False, we try to append data to " - "existing files.") -@click.option('--imgbuffer', '-b', type=click.INT, default=250, - help="NUMBER | Number of images to read into memory at once before " - "conversion to time series. A larger buffer means faster" - " processing but requires more memory.") -@click.option('--n_proc', '-n', type=click.INT, default=1, - help="NUMBER | Number of parallel processes for reading and " - "writing data.") -def cli_reshuffle(img_path, ts_path, startdate, enddate, parameters, - land, bbox, ignore_meta, fntempl, overwrite, imgbuffer, - n_proc): +@click.option( + '--startdate', + '-s', + type=click.STRING, + default=None, + help="Format YYYY-MM-DD | First image time stamp to include in the" + "time series. [default: Date of the first available image]") +@click.option( + '--enddate', + '-e', + type=click.STRING, + default=None, + help="Format YYYY-MM-DD | Last image time stamp to include in the" + "time series. [default: Date of the last available image]") +@click.option( + '--parameters', + '-p', + multiple=True, + type=click.STRING, + default=None, + help="STRING | Data variable in images to include " + "in time series. If not specified, then all variables are " + "included. You can pass this option multiple times, " + "e.g. `... -p sm -p flag ...`! " + "[default: ALL parameters are included]") +@click.option( + '--land', + type=click.BOOL, + default=True, + help="True or False | Activating this flag will exclude grid " + "cells over water are not converted to time series. " + "Leads to faster processing and smaller files, but a varying " + "number of time series in each file.") +@click.option( + '--bbox', + nargs=4, + type=click.FLOAT, + help="4 NUMBERS | min_lon min_lat max_lon max_lat. " + "Set Bounding Box (lower left and upper right corner) " + "of area to reshuffle (WGS84). [default: -180 -90 180 90]") +@click.option( + '--ignore_meta', + type=click.BOOL, + default=False, + help="True or False | Activate to NOT transfer netcdf attributes" + " from images into time series files. E.g. for unsupported " + "data versions.") +@click.option( + "--fntempl", + type=click.STRING, + default=_default_template, + help="STRING CONTAINING {PLACEHOLDERS} | If image files don't " + "follow the usual naming convention, a custom template can " + "be given here. Must contain {placeholder} fields for " + "{freq}, {product}, {version} and {datetime}.") +@click.option( + "--overwrite", + type=click.BOOL, + default=False, + help="True or False | If this is True, then any files that already" + " exist in the `output_path`, will be replaced by the newly " + "created ones. If this is False, we try to append data to " + "existing files.") +@click.option( + '--imgbuffer', + '-b', + type=click.INT, + default=250, + help="NUMBER | Number of images to read into memory at once before " + "conversion to time series. A larger buffer means faster" + " processing but requires more memory.") +@click.option( + '--n_proc', + '-n', + type=click.INT, + default=1, + help="NUMBER | Number of parallel processes for reading and " + "writing data.") +def cli_reshuffle(img_path, ts_path, startdate, enddate, parameters, land, + bbox, ignore_meta, fntempl, overwrite, imgbuffer, n_proc): """ Convert C3S SM image data into a (5x5 degrees chunked) time series format following CF conventions for 'Orthogonal multidimensional array representation' @@ -218,32 +306,41 @@ def cli_reshuffle(img_path, ts_path, startdate, enddate, parameters, print(f"From: {startdate.isoformat()}, To: {enddate.isoformat()}") print(f"Into target directory: {ts_path}") - img2ts(img_path, - ts_path, - startdate=startdate, - enddate=enddate, - parameters=parameters, - land_points=land, - bbox=bbox, - ignore_meta=ignore_meta, - fntempl=fntempl, - overwrite=overwrite, - imgbuffer=imgbuffer, - n_proc=n_proc) - - -@click.command("update_ts", context_settings={'show_default': True}, - short_help="Extend an existing time series record with " - "available image data.") + img2ts( + img_path, + ts_path, + startdate=startdate, + enddate=enddate, + parameters=parameters, + land_points=land, + bbox=bbox, + ignore_meta=ignore_meta, + fntempl=fntempl, + overwrite=overwrite, + imgbuffer=imgbuffer, + n_proc=n_proc) + + +@click.command( + "update_ts", + context_settings={'show_default': True}, + short_help="Extend an existing time series record with " + "available image data.") @click.argument("img_path", type=click.Path(readable=True)) @click.argument("ts_path", type=click.Path(writable=True)) -@click.option("--freq", type=click.STRING, default=None, - help="This option can be used to manually specify whether DAILY," - "MONTHLY or DEKADAL data are processed.") -@click.option("--fntempl", type=click.STRING, default=_default_template, - help="In case image files don't follow the usual naming " - "convention, a custom template can be given here. Must " - "contain fields `freq`, `prod`, `vers` and `datetime`") +@click.option( + "--freq", + type=click.STRING, + default=None, + help="This option can be used to manually specify whether DAILY," + "MONTHLY or DEKADAL data are processed.") +@click.option( + "--fntempl", + type=click.STRING, + default=_default_template, + help="In case image files don't follow the usual naming " + "convention, a custom template can be given here. Must " + "contain fields `freq`, `prod`, `vers` and `datetime`") def cli_update_ts(img_path, ts_path, freq, fntempl): """ Extend a locally existing C3S SM time series record by appending new data @@ -266,6 +363,7 @@ def cli_update_ts(img_path, ts_path, freq, fntempl): print(f"Extend time series in {ts_path} with image data from {img_path}") extend_ts(img_path, ts_path, fntempl=fntempl, freq=freq) + @click.group(short_help="C3S SM Command Line Programs.") def c3s_sm(): pass @@ -274,4 +372,4 @@ def c3s_sm(): c3s_sm.add_command(cli_download) c3s_sm.add_command(cli_update_img) c3s_sm.add_command(cli_reshuffle) -c3s_sm.add_command(cli_update_ts) \ No newline at end of file +c3s_sm.add_command(cli_update_ts) diff --git a/src/c3s_sm/const.py b/src/c3s_sm/const.py index 7883841..e1dd699 100644 --- a/src/c3s_sm/const.py +++ b/src/c3s_sm/const.py @@ -8,6 +8,7 @@ # CDSAPI_RC variable must be set or we use home dir dotrc = os.environ.get('CDSAPI_RC', os.path.join(Path.home(), '.cdsapirc')) + def check_api_read() -> bool: if not os.path.isfile(dotrc): url = os.environ.get('CDSAPI_URL') @@ -21,22 +22,30 @@ def check_api_read() -> bool: 'https://cds.climate.copernicus.eu/api-how-to') api_ready = False elif ":" not in key: - raise ValueError('Your CDS token is not valid. It must be in the format ' - ':, both of which are found on your CDS' - 'profile page.') + raise ValueError( + 'Your CDS token is not valid. It must be in the format ' + ':, both of which are found on your CDS' + 'profile page.') else: api_ready = True else: api_ready = True return api_ready + variable_lut = { - 'combined': {'variable': 'volumetric_surface_soil_moisture', - 'type_of_sensor': 'combined_passive_and_active'}, - 'passive': {'variable': 'volumetric_surface_soil_moisture', - 'type_of_sensor': 'passive'}, - 'active': {'variable': 'soil_moisture_saturation', - 'type_of_sensor': 'active'} + 'combined': { + 'variable': 'volumetric_surface_soil_moisture', + 'type_of_sensor': 'combined_passive_and_active' + }, + 'passive': { + 'variable': 'volumetric_surface_soil_moisture', + 'type_of_sensor': 'passive' + }, + 'active': { + 'variable': 'soil_moisture_saturation', + 'type_of_sensor': 'active' + } } freq_lut = { @@ -45,9 +54,10 @@ def check_api_read() -> bool: 'monthly': 'month_average' } -startdates = {'combined': datetime(1978,11,1), - 'passive': datetime(1978, 11, 1), - 'active': datetime(1991, 8, 5)} - +startdates = { + 'combined': datetime(1978, 11, 1), + 'passive': datetime(1978, 11, 1), + 'active': datetime(1991, 8, 5) +} fntempl = "C3S-SOILMOISTURE-L3S-SSM{unit}-{product}-{freq}-{datetime}-{record}-{version}.{subversion}.nc" diff --git a/src/c3s_sm/download.py b/src/c3s_sm/download.py index e28df4d..8be027e 100644 --- a/src/c3s_sm/download.py +++ b/src/c3s_sm/download.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - """ Module to download c3s soil moisture data from the CDS """ @@ -24,9 +23,11 @@ def logger(fname, level=logging.DEBUG, verbose=False): for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) - logging.basicConfig(filename=fname, level=level, - format='%(levelname)s %(asctime)s %(message)s', - datefmt='%Y-%m-%d %H:%M:%S') + logging.basicConfig( + filename=fname, + level=level, + format='%(levelname)s %(asctime)s %(message)s', + datefmt='%Y-%m-%d %H:%M:%S') logger = logging.getLogger() if verbose: logger.addHandler(logging.StreamHandler(sys.stdout)) @@ -37,9 +38,18 @@ def logger(fname, level=logging.DEBUG, verbose=False): return logger -def download_c3ssm(c, sensor, years, months, days, version, target_dir, - temp_filename, freq='daily', keep_original=False, - max_retries=5, dry_run=False): +def download_c3ssm(c, + sensor, + years, + months, + days, + version, + target_dir, + temp_filename, + freq='daily', + keep_original=False, + max_retries=5, + dry_run=False): """ Download c3s sm data for single levels of a defined time span Parameters. We will always try to download the CDR and ICDR! @@ -84,9 +94,10 @@ def download_c3ssm(c, sensor, years, months, days, version, target_dir, if not dry_run: if not check_api_read(): - raise ValueError("Cannot establish connection to CDS. Please set up" - "your CDS API key as described at " - "https://cds.climate.copernicus.eu/api-how-to") + raise ValueError( + "Cannot establish connection to CDS. Please set up" + "your CDS API key as described at " + "https://cds.climate.copernicus.eu/api-how-to") os.makedirs(target_dir, exist_ok=True) @@ -112,8 +123,7 @@ def download_c3ssm(c, sensor, years, months, days, version, target_dir, 'version': version, 'type_of_record': record }, - target=dl_file - ) + target=dl_file) queries[record] = query @@ -143,8 +153,9 @@ def download_c3ssm(c, sensor, years, months, days, version, target_dir, return success, queries + def download_and_extract(target_path, - startdate=datetime(1978,1,1), + startdate=datetime(1978, 1, 1), enddate=datetime.now(), product='combined', freq='daily', @@ -194,25 +205,35 @@ def download_and_extract(target_path, os.makedirs(os.path.join(target_path, '000_log'), exist_ok=True) - dl_logger = logger(os.path.join(target_path, '000_log', - f"download_{'{:%Y%m%d%H%M%S.%f}'.format(datetime.now())}.log")) + dl_logger = logger( + os.path.join( + target_path, '000_log', + f"download_{'{:%Y%m%d%H%M%S.%f}'.format(datetime.now())}.log")) if dry_run: c = None else: - c = cdsapi.Client(quiet=True, - url=os.environ.get('CDSAPI_URL'), - key=os.environ.get('CDSAPI_KEY'), - error_callback=dl_logger) + c = cdsapi.Client( + quiet=True, + url=os.environ.get('CDSAPI_URL'), + key=os.environ.get('CDSAPI_KEY'), + error_callback=dl_logger) STATIC_KWARGS = { - 'c': c, 'keep_original': keep_original, - 'dry_run': dry_run, 'sensor': product, - 'version': version, 'freq': freq, 'max_retries': 3 + 'c': c, + 'keep_original': keep_original, + 'dry_run': dry_run, + 'sensor': product, + 'version': version, + 'freq': freq, + 'max_retries': 3 } ITER_KWARGS = { - 'years': [], 'months': [], 'days': [], 'target_dir': [], + 'years': [], + 'months': [], + 'days': [], + 'target_dir': [], 'temp_filename': [] } @@ -221,7 +242,8 @@ def download_and_extract(target_path, # download monthly zip archives while curr_start <= enddate: sy, sm, sd = curr_start.year, curr_start.month, curr_start.day - sm_days = calendar.monthrange(sy, sm)[1] # days in the current month + sm_days = calendar.monthrange(sy, + sm)[1] # days in the current month y, m = sy, sm if (enddate.year == y) and (enddate.month == m): @@ -238,7 +260,7 @@ def download_and_extract(target_path, ITER_KWARGS['years'].append([y]) ITER_KWARGS['months'].append([m]) - ITER_KWARGS['days'].append(list(range(sd, d+1))) + ITER_KWARGS['days'].append(list(range(sd, d + 1))) ITER_KWARGS['target_dir'].append(target_dir_year) ITER_KWARGS['temp_filename'].append(fname) @@ -260,12 +282,15 @@ def download_and_extract(target_path, elif curr_year == enddate.year and curr_year != startdate.year: ms = [m for m in range(1, 13) if m <= enddate.month] elif curr_year == startdate.year and curr_year == enddate.year: - ms = [m for m in range(1, 13) if ((m >= startdate.month) and - (m <= enddate.month))] + ms = [ + m for m in range(1, 13) + if ((m >= startdate.month) and (m <= enddate.month)) + ] else: ms = list(range(1, 13)) - curr_start = datetime(curr_year, ms[0], + curr_start = datetime( + curr_year, ms[0], startdate.day if curr_year == startdate.year else ds[0]) while curr_start.day not in ds: @@ -285,12 +310,16 @@ def download_and_extract(target_path, curr_year += 1 - results = parallel_process(download_c3ssm, STATIC_KWARGS=STATIC_KWARGS, - ITER_KWARGS=ITER_KWARGS, n_proc=1, - log_path=os.path.join(target_path, '000_log'), - loglevel='INFO', backend='threading', - logger_name='dl_logger', - show_progress_bars=True) + results = parallel_process( + download_c3ssm, + STATIC_KWARGS=STATIC_KWARGS, + ITER_KWARGS=ITER_KWARGS, + n_proc=1, + log_path=os.path.join(target_path, '000_log'), + loglevel='INFO', + backend='threading', + logger_name='dl_logger', + show_progress_bars=True) try: update_image_summary_file(target_path) @@ -309,8 +338,8 @@ def download_and_extract(target_path, return queries -def first_missing_date(last_date: str, - freq: str = 'daily') -> datetime: + +def first_missing_date(last_date: str, freq: str = 'daily') -> datetime: """ For a product, based on the last available date, find the next expected one. diff --git a/src/c3s_sm/interface.py b/src/c3s_sm/interface.py index 6adb6bf..5a676ca 100644 --- a/src/c3s_sm/interface.py +++ b/src/c3s_sm/interface.py @@ -27,10 +27,12 @@ _default_fillvalues = {'sm': np.nan, 'sm_uncertainty': np.nan, 't0': np.nan} + class C3SImg(ImageBase): """ Class to read a single C3S image (for one time stamp) """ + def __init__(self, filename, parameters=None, @@ -95,11 +97,14 @@ def _read_flat_img(self) -> (dict, dict, dict, datetime): Reads a single C3S image, flat with gpi0 as first element """ with Dataset(self.filename, mode='r') as ds: - timestamp = num2date(ds['time'], ds['time'].units, - only_use_cftime_datetimes=True, - only_use_python_datetimes=False) - - assert len(timestamp) == 1, "Found more than 1 time stamps in image" + timestamp = num2date( + ds['time'], + ds['time'].units, + only_use_cftime_datetimes=True, + only_use_python_datetimes=False) + + assert len( + timestamp) == 1, "Found more than 1 time stamps in image" timestamp = timestamp[0] param_img = {} @@ -107,8 +112,10 @@ def _read_flat_img(self) -> (dict, dict, dict, datetime): if len(self.parameters) == 0: # all data vars, exclude coord vars - self.parameters = [k for k in ds.variables.keys() - if k not in ds.dimensions.keys()] + self.parameters = [ + k for k in ds.variables.keys() + if k not in ds.dimensions.keys() + ] parameters = list(self.parameters) @@ -130,8 +137,7 @@ def _read_flat_img(self) -> (dict, dict, dict, datetime): self.fillval[parameter] = data.fill_value common_dtype = np.result_type( - *([data.dtype] + [type(self.fillval[parameter])]) - ) + *([data.dtype] + [type(self.fillval[parameter])])) self.fillval[parameter] = np.array( [self.fillval[parameter]], dtype=common_dtype)[0] @@ -215,11 +221,8 @@ def read(self, timestamp=None): data = self._mask_and_reshape(data) if self.flatten: - return Image(self.subgrid.activearrlon, - self.subgrid.activearrlat, - data, - var_meta, - timestamp) + return Image(self.subgrid.activearrlon, self.subgrid.activearrlat, + data, var_meta, timestamp) else: # also cut 2d case to active area min_lat, min_lon = self.subgrid.activearrlat.min(), \ @@ -231,16 +234,16 @@ def read(self, timestamp=None): self.grid.find_nearest_gpi(min_lon, min_lat)[0], # llc self.grid.find_nearest_gpi(max_lon, min_lat)[0], # lrc self.grid.find_nearest_gpi(max_lon, max_lat)[0], # urc - ]) + ]) rows = slice(corners[0][0], corners[0][2] + 1) cols = slice(corners[1][0], corners[1][1] + 1) - return Image(self.grid.arrlon.reshape(*self.shape)[rows, cols], - np.flipud(self.grid.arrlat.reshape(*self.shape)[rows, cols]), - {k: np.flipud(v[rows, cols]) for k, v in data.items()}, - var_meta, - timestamp) + return Image( + self.grid.arrlon.reshape(*self.shape)[rows, cols], + np.flipud(self.grid.arrlat.reshape(*self.shape)[rows, cols]), { + k: np.flipud(v[rows, cols]) for k, v in data.items() + }, var_meta, timestamp) def write(self, *args, **kwargs): pass @@ -298,10 +301,12 @@ def __init__(self, """ self.data_path = data_path - ioclass_kwargs = {'parameters': parameters, - 'subgrid': subgrid, - 'flatten': flatten, - 'fillval': fillval} + ioclass_kwargs = { + 'parameters': parameters, + 'subgrid': subgrid, + 'flatten': flatten, + 'fillval': fillval + } self.fname_args = self._parse_filename(fntempl) self.solve_ambiguity = solve_ambiguity @@ -311,15 +316,18 @@ def __init__(self, fn_args['record'] = '*' filename_templ = fntempl.format(**fn_args) - super(C3S_Nc_Img_Stack, self).__init__(path=data_path, - ioclass=C3SImg, - fname_templ=filename_templ , - datetime_format="%Y%m%d%H%M%S", - subpath_templ=subpath_templ, - exact_templ=False, - ioclass_kws=ioclass_kwargs) - - def _build_filename(self, timestamp: datetime, custom_templ: str = None, + super(C3S_Nc_Img_Stack, self).__init__( + path=data_path, + ioclass=C3SImg, + fname_templ=filename_templ, + datetime_format="%Y%m%d%H%M%S", + subpath_templ=subpath_templ, + exact_templ=False, + ioclass_kws=ioclass_kwargs) + + def _build_filename(self, + timestamp: datetime, + custom_templ: str = None, str_param: dict = None): """ This function uses _search_files to find the correct @@ -339,23 +347,24 @@ def _build_filename(self, timestamp: datetime, custom_templ: str = None, the fname_templ.format(**str_param) notation before the resulting string is put into datetime.strftime. """ - filename = self._search_files(timestamp, custom_templ=custom_templ, - str_param=str_param) + filename = self._search_files( + timestamp, custom_templ=custom_templ, str_param=str_param) if len(filename) == 0: raise IOError("No file found for {:}".format(timestamp.ctime())) if len(filename) > 1: filename = sorted(filename) if self.solve_ambiguity == 'sort_last': - warnings.warn(f'Ambiguous file for {str(timestamp)} found.' - f' Sort and use last: {filename[-1]}, skipped {filename[:-1]}') + warnings.warn( + f'Ambiguous file for {str(timestamp)} found.' + f' Sort and use last: {filename[-1]}, skipped {filename[:-1]}' + ) filename = [filename[-1]] elif self.solve_ambiguity == 'sort_first': warnings.warn(f'Ambiguous file for {str(timestamp)} found.' f' Sort and use first: {filename[0]}') filename = [filename[0]] else: - raise IOError( - "File search is ambiguous {:}".format(filename)) + raise IOError("File search is ambiguous {:}".format(filename)) return filename[0] @@ -410,9 +419,11 @@ def tstamps_for_daterange(self, start_date, end_date): self.fname_args['freq'] = 'DAILY' if self.fname_args['freq'] == 'MONTHLY': - timestamps = pd.date_range(start_date, end_date, freq='MS').to_pydatetime() + timestamps = pd.date_range( + start_date, end_date, freq='MS').to_pydatetime() elif self.fname_args['freq'] == 'DAILY': - timestamps = pd.date_range(start_date, end_date, freq='D').to_pydatetime() + timestamps = pd.date_range( + start_date, end_date, freq='D').to_pydatetime() elif self.fname_args['freq'] == 'DEKADAL': timestamps = dekad_index(start_date, end_date).to_pydatetime() timestamps = [dekad_startdate_from_date(d) for d in timestamps] @@ -442,13 +453,18 @@ def read(self, timestamp, **kwargs): warnings.warn(f'Could not load image for {timestamp}.') raise IOError + class C3STs(GriddedNcOrthoMultiTs): """ Module for reading C3S time series in netcdf format. """ - def __init__(self, ts_path, grid_path=None, remove_nans=False, drop_tz=True, - **kwargs): + def __init__(self, + ts_path, + grid_path=None, + remove_nans=False, + drop_tz=True, + **kwargs): """ Class for reading C3S SM time series after reshuffling. @@ -563,4 +579,4 @@ def iter_ts(self, **kwargs): pass def write_ts(self, *args, **kwargs): - pass \ No newline at end of file + pass diff --git a/src/c3s_sm/metadata.py b/src/c3s_sm/metadata.py index de9a766..5612aad 100644 --- a/src/c3s_sm/metadata.py +++ b/src/c3s_sm/metadata.py @@ -3,8 +3,10 @@ import numpy as np from collections import OrderedDict + class C3S_SM_TS_Attrs(object): '''Default, common metadata for daily and monthly, dekadal products''' + def __init__(self, sensor_type, version): ''' Parameters @@ -18,9 +20,11 @@ def __init__(self, sensor_type, version): ''' self.version = version - self.product_datatype_str = {'active': 'SSMS', - 'passive': 'SSMV', - 'combined': 'SSMV'} + self.product_datatype_str = { + 'active': 'SSMS', + 'passive': 'SSMV', + 'combined': 'SSMV' + } self.sensor_type = sensor_type @@ -54,7 +58,8 @@ def flag(self): ('0', 'no_data_inconsistency_detected'), ('Bit0', 'snow_coverage_or_temperature_below_zero'), ('Bit1', 'dense_vegetation'), - ('Bit2', 'others_no_convergence_in_the_model_thus_no_valid_sm_estimates'), + ('Bit2', + 'others_no_convergence_in_the_model_thus_no_valid_sm_estimates'), ('Bit3', 'soil_moisture_value_exceeds_physical_boundary'), ('Bit4', 'weight_of_measurement_below_threshold'), ('Bit5', 'all_datasets_deemed_unreliable'), @@ -79,8 +84,10 @@ def freqbandID_flag(self): ('Bit7', 'K194'), ]) - self.freqbandID_flag_values = np.array(list(freqbandID_flag_dict.keys())) - self.freqbandID_flag_meanings = np.array(list(freqbandID_flag_dict.values())) + self.freqbandID_flag_values = np.array( + list(freqbandID_flag_dict.keys())) + self.freqbandID_flag_meanings = np.array( + list(freqbandID_flag_dict.values())) return self.freqbandID_flag_values, self.freqbandID_flag_meanings @@ -109,7 +116,7 @@ def mode_flag(self): ('0', 'NaN'), ('Bit0', 'ascending'), ('Bit1', 'descending'), - ]) + ]) self.mode_flag_values = np.array(list(mode_flag_dict.keys())) self.mode_flag_meanings = np.array(list(mode_flag_dict.values())) @@ -118,10 +125,7 @@ def mode_flag(self): class C3S_daily_tsatt_nc: - def __init__(self, - cdr_type:str, - sensor_type:str, - cls): + def __init__(self, cdr_type: str, sensor_type: str, cls): self.general_attrs = cls(sensor_type=sensor_type) @@ -138,43 +142,64 @@ def __init__(self, self.general_attrs.sensor_flag() self.ts_attributes = { - 'dnflag': {'full_name': 'Day / Night Flag', - 'flag_values': self.general_attrs.dn_flag_values, - 'flag_meanings': self.general_attrs.dn_flag_meanings}, - 'flag': {'full_name': 'Flag', - 'flag_values': self.general_attrs.flag_values, - 'flag_meanings': self.general_attrs.flag_meanings}, - 'freqbandID': {'full_name': 'Frequency Band Identification', - 'flag_values': self.general_attrs.freqbandID_flag_values, - 'flag_meanings': self.general_attrs.freqbandID_flag_meanings}, - 'mode': {'full_name': 'Satellite Mode', - 'flag_values': self.general_attrs.mode_flag_values, - 'flag_meanings': self.general_attrs.mode_flag_meanings}, - 'sensor': {'full_name': 'Sensor', - 'flag_values': self.general_attrs.sensor_flag_values, - 'flag_meanings': self.general_attrs.sensor_flag_meanings}, - 'sm': {'full_name': self.general_attrs.sm_full_name, - 'units': self.general_attrs.sm_units}, - 'sm_uncertainty': {'full_name': self.general_attrs.sm_uncertainty_full_name, - 'units': self.general_attrs.sm_uncertainty_units}, - 't0': {'full_name': 'Observation Timestamp', - 'units': 'days since 1970-01-01 00:00:00 UTC'}} + 'dnflag': { + 'full_name': 'Day / Night Flag', + 'flag_values': self.general_attrs.dn_flag_values, + 'flag_meanings': self.general_attrs.dn_flag_meanings + }, + 'flag': { + 'full_name': 'Flag', + 'flag_values': self.general_attrs.flag_values, + 'flag_meanings': self.general_attrs.flag_meanings + }, + 'freqbandID': { + 'full_name': 'Frequency Band Identification', + 'flag_values': self.general_attrs.freqbandID_flag_values, + 'flag_meanings': self.general_attrs.freqbandID_flag_meanings + }, + 'mode': { + 'full_name': 'Satellite Mode', + 'flag_values': self.general_attrs.mode_flag_values, + 'flag_meanings': self.general_attrs.mode_flag_meanings + }, + 'sensor': { + 'full_name': 'Sensor', + 'flag_values': self.general_attrs.sensor_flag_values, + 'flag_meanings': self.general_attrs.sensor_flag_meanings + }, + 'sm': { + 'full_name': self.general_attrs.sm_full_name, + 'units': self.general_attrs.sm_units + }, + 'sm_uncertainty': { + 'full_name': self.general_attrs.sm_uncertainty_full_name, + 'units': self.general_attrs.sm_uncertainty_units + }, + 't0': { + 'full_name': 'Observation Timestamp', + 'units': 'days since 1970-01-01 00:00:00 UTC' + } + } _prod = sensor_type.upper() _freq = self.freq.upper() _cdr = self.cdr_type.upper() _vers = self.version - product_name = " ".join(['C3S', 'SOILMOISTURE', 'L3S', - self.general_attrs.product_datatype_str[sensor_type].upper(), - _prod, _freq, _cdr, _vers]) + product_name = " ".join([ + 'C3S', 'SOILMOISTURE', 'L3S', + self.general_attrs.product_datatype_str[sensor_type].upper(), + _prod, _freq, _cdr, _vers + ]) - self.global_attr = {'product_full_name': product_name, - 'product': str(_prod), - 'temporal_sampling': str(_freq), - 'cdr': str(_cdr), - 'version': str(_vers), - 'resolution': '0.25 degree'} + self.global_attr = { + 'product_full_name': product_name, + 'product': str(_prod), + 'temporal_sampling': str(_freq), + 'cdr': str(_cdr), + 'version': str(_vers), + 'resolution': '0.25 degree' + } class C3S_dekmon_tsatt_nc(object): @@ -183,11 +208,7 @@ class C3S_dekmon_tsatt_nc(object): tcdr and icdr timeseries files. """ - def __init__(self, - freq: str, - cdr_type: str, - sensor_type: str, - cls): + def __init__(self, freq: str, cdr_type: str, sensor_type: str, cls): self.general_attrs = cls(sensor_type=sensor_type) @@ -203,15 +224,23 @@ def __init__(self, self.general_attrs.sensor_flag() self.ts_attributes = { - 'freqbandID': {'full_name': 'Frequency Band Identification', - 'flag_values': self.general_attrs.freqbandID_flag_values, - 'flag_meanings': self.general_attrs.freqbandID_flag_meanings}, - 'sensor': {'full_name': 'Sensor', - 'flag_values': self.general_attrs.sensor_flag_values, - 'flag_meanings': self.general_attrs.sensor_flag_meanings}, - 'nobs': {'full_name': 'Number of valid observation'}, - 'sm': {'full_name': self.general_attrs.sm_full_name, - 'units': self.general_attrs.sm_units} + 'freqbandID': { + 'full_name': 'Frequency Band Identification', + 'flag_values': self.general_attrs.freqbandID_flag_values, + 'flag_meanings': self.general_attrs.freqbandID_flag_meanings + }, + 'sensor': { + 'full_name': 'Sensor', + 'flag_values': self.general_attrs.sensor_flag_values, + 'flag_meanings': self.general_attrs.sensor_flag_meanings + }, + 'nobs': { + 'full_name': 'Number of valid observation' + }, + 'sm': { + 'full_name': self.general_attrs.sm_full_name, + 'units': self.general_attrs.sm_units + } } _prod = sensor_type.upper() @@ -219,16 +248,20 @@ def __init__(self, _cdr = self.cdr_type.upper() _vers = self.version - product_name = " ".join(['C3S', 'SOILMOISTURE', 'L3S', - self.general_attrs.product_datatype_str[sensor_type].upper(), - _prod, _freq, _cdr, _vers]) + product_name = " ".join([ + 'C3S', 'SOILMOISTURE', 'L3S', + self.general_attrs.product_datatype_str[sensor_type].upper(), + _prod, _freq, _cdr, _vers + ]) - self.global_attr = {'product_full_name': product_name, - 'product': str(_prod), - 'temporal_sampling': str(_freq), - 'cdr': str(_cdr), - 'version': str(_vers), - 'resolution': '0.25 degree'} + self.global_attr = { + 'product_full_name': product_name, + 'product': str(_prod), + 'temporal_sampling': str(_freq), + 'cdr': str(_cdr), + 'version': str(_vers), + 'resolution': '0.25 degree' + } class C3S_SM_TS_Attrs_v201706(C3S_SM_TS_Attrs): @@ -236,40 +269,39 @@ class C3S_SM_TS_Attrs_v201706(C3S_SM_TS_Attrs): def __init__(self, sensor_type): version = type(self).__name__.split('_')[-1] - super(C3S_SM_TS_Attrs_v201706, self).__init__(sensor_type, - version) + super(C3S_SM_TS_Attrs_v201706, self).__init__(sensor_type, version) + class C3S_SM_TS_Attrs_v201801(C3S_SM_TS_Attrs): # Example for a version specific attribute class, last part defines version def __init__(self, sensor_type): version = type(self).__name__.split('_')[-1] - super(C3S_SM_TS_Attrs_v201801, self).__init__(sensor_type, - version) + super(C3S_SM_TS_Attrs_v201801, self).__init__(sensor_type, version) + class C3S_SM_TS_Attrs_v201812(C3S_SM_TS_Attrs): # Example for a version specific attribute class, last part defines version def __init__(self, sensor_type): version = type(self).__name__.split('_')[-1] - super(C3S_SM_TS_Attrs_v201812, self).__init__(sensor_type, - version) + super(C3S_SM_TS_Attrs_v201812, self).__init__(sensor_type, version) + class C3S_SM_TS_Attrs_v201912(C3S_SM_TS_Attrs): # Example for a version specific attribute class, last part defines version def __init__(self, sensor_type): version = type(self).__name__.split('_')[-1] - super(C3S_SM_TS_Attrs_v201912, self).__init__(sensor_type, - version) + super(C3S_SM_TS_Attrs_v201912, self).__init__(sensor_type, version) + class C3S_SM_TS_Attrs_v202012(C3S_SM_TS_Attrs): # smap added to sensors (no new freq band), based on cci v5 def __init__(self, sensor_type): version = type(self).__name__.split('_')[-1] - super(C3S_SM_TS_Attrs_v202012, self).__init__(sensor_type, - version) + super(C3S_SM_TS_Attrs_v202012, self).__init__(sensor_type, version) def sensor_flag(self): sensor_flag_dict = OrderedDict([ @@ -292,19 +324,21 @@ def sensor_flag(self): return self.sensor_flag_values, self.sensor_flag_meanings + class C3S_SM_TS_Attrs_v202212(C3S_SM_TS_Attrs): # gpm, fy3b added to sensors (no new freq band), based on cci v7 def __init__(self, sensor_type): version = type(self).__name__.split('_')[-1] - super(C3S_SM_TS_Attrs_v202212, self).__init__(sensor_type, - version) + super(C3S_SM_TS_Attrs_v202212, self).__init__(sensor_type, version) + def flag(self): flag_dict = OrderedDict([ ('0', 'no_data_inconsistency_detected'), ('Bit0', 'snow_coverage_or_temperature_below_zero'), ('Bit1', 'dense_vegetation'), - ('Bit2', 'others_no_convergence_in_the_model_thus_no_valid_sm_estimates'), + ('Bit2', + 'others_no_convergence_in_the_model_thus_no_valid_sm_estimates'), ('Bit3', 'soil_moisture_value_exceeds_physical_boundary'), ('Bit4', 'weight_of_measurement_below_threshold'), ('Bit5', 'all_datasets_deemed_unreliable'), @@ -344,8 +378,9 @@ def sensor_flag(self): return self.sensor_flag_values, self.sensor_flag_meanings + class C3S_SM_TS_Attrs_v202312(C3S_SM_TS_Attrs): + def __init__(self, sensor_type): version = type(self).__name__.split('_')[-1] - super(C3S_SM_TS_Attrs_v202312, self).__init__(sensor_type, - version) + super(C3S_SM_TS_Attrs_v202312, self).__init__(sensor_type, version) diff --git a/src/c3s_sm/misc.py b/src/c3s_sm/misc.py index 1dbe581..0db2b50 100644 --- a/src/c3s_sm/misc.py +++ b/src/c3s_sm/misc.py @@ -8,6 +8,7 @@ import xarray as xr from repurpose.process import parallel_process + def collect_ts_cov(data_path: str, n_proc=1, progressbar=False): """ Open all time series files in a directory (slow) and detect the @@ -26,8 +27,9 @@ def collect_ts_cov(data_path: str, n_proc=1, progressbar=False): Periods coverged by the time series files. {(start, end): [cell, cell, ...], ...} """ - fl = glob(os.path.join(data_path, '**', "[0-9][0-9][0-9][0-9].nc"), - recursive=True) + fl = glob( + os.path.join(data_path, '**', "[0-9][0-9][0-9][0-9].nc"), + recursive=True) if len(fl) == 0: raise ValueError(f"No matching files found in {data_path}") @@ -41,9 +43,12 @@ def _func(f: str) -> tuple: return start, end, cell, parameters - se = parallel_process(_func, ITER_KWARGS=dict(f=fl), - show_progress_bars=progressbar, - backend='threading', n_proc=n_proc) + se = parallel_process( + _func, + ITER_KWARGS=dict(f=fl), + show_progress_bars=progressbar, + backend='threading', + n_proc=n_proc) periods = {} parameters = None @@ -82,7 +87,7 @@ def img_infer_file_props(path: str, raise NotImplementedError(f"`start_from` must be one of: " f"`first`, `last`.") for f in files: - file_args = parse(fntempl, os.path.basename(f)) + file_args = parse(fntempl, os.path.basename(f)) if file_args is None: continue return file_args.named @@ -102,6 +107,7 @@ def read_summary_yml(path: str) -> dict: return props + def get_first_image_date(path: str, fntempl: str = _default_template) -> str: """ Parse files in the given directory (or any subdir) using the passed @@ -123,8 +129,7 @@ def get_first_image_date(path: str, fntempl: str = _default_template) -> str: Parse date from the first found image file that matches `fntempl`. """ try: - props = img_infer_file_props( - path, fntempl=fntempl, start_from='first') + props = img_infer_file_props(path, fntempl=fntempl, start_from='first') startdate = props['datetime'] except ValueError: raise ValueError('Could not infer start date from image files. ' @@ -160,6 +165,7 @@ def get_last_image_date(path: str, fntempl: str) -> str: 'Please specify enddate manually.') return enddate + def update_image_summary_file(data_path: str, out_file=None, fntempl: str = _default_template): @@ -193,6 +199,7 @@ def update_image_summary_file(data_path: str, with open(out_file, 'w') as f: yaml.dump(props, f, default_flow_style=False) + def update_ts_summary_file(data_path, props=None, collect_cov=False, **kwargs): """ Create a summary yml file that contains the most relevant information @@ -229,9 +236,10 @@ def update_ts_summary_file(data_path, props=None, collect_cov=False, **kwargs): i = 1 for startend, cells in periods.items(): props[f'period{i}'] = dict( - start=str(startend[0]), end=str(startend[1]), - N=len(cells), cells=sorted(cells) - ) + start=str(startend[0]), + end=str(startend[1]), + N=len(cells), + cells=sorted(cells)) out_file = os.path.join(data_path, f"000_overview.yml") diff --git a/src/c3s_sm/reshuffle.py b/src/c3s_sm/reshuffle.py index 5ce67dc..5c2a6bf 100644 --- a/src/c3s_sm/reshuffle.py +++ b/src/c3s_sm/reshuffle.py @@ -30,12 +30,15 @@ update_image_summary_file, ) + def reshuffle(*args, **kwargs): - warnings.warn("`c3s_sm.reshuffle.reshuffle` is deprecated, " - "use `c3s_sm.reshuffle.img2ts`", - category=DeprecationWarning) + warnings.warn( + "`c3s_sm.reshuffle.reshuffle` is deprecated, " + "use `c3s_sm.reshuffle.img2ts`", + category=DeprecationWarning) return img2ts(*args, **kwargs) + def parse_filename(data_dir, fntempl=_default_template): """ Take the first file in the passed directory and use its file name to @@ -64,13 +67,18 @@ def parse_filename(data_dir, fntempl=_default_template): else: file_args = file_args.named file_args['datetime'] = '{datetime}' - file_vars = Dataset(os.path.join(curr,f)).variables.keys() + file_vars = Dataset(os.path.join(curr, f)).variables.keys() return file_args, list(file_vars) raise IOError('No file name in passed directory fits to template') -def extend_ts(img_path, ts_path, fntempl=_default_template, startdate=None, - freq=None, n_proc=1): + +def extend_ts(img_path, + ts_path, + fntempl=_default_template, + startdate=None, + freq=None, + n_proc=1): """ Append any new data from the image path to the time series data. This function is only applied to time series file that were created @@ -117,8 +125,7 @@ def extend_ts(img_path, ts_path, fntempl=_default_template, startdate=None, else: raise ValueError( f'Unexpected frequency found: {freq}. One of daily, ' - f'dekadal, monthly is expected.' - ) + f'dekadal, monthly is expected.') startdate = pd.to_datetime(kwargs['enddate']) + dt else: @@ -133,9 +140,19 @@ def extend_ts(img_path, ts_path, fntempl=_default_template, startdate=None, img2ts(ts_path=ts_path, n_proc=n_proc, **kwargs) -def img2ts(img_path, ts_path, startdate, enddate, parameters=None, - land_points=True, bbox=None, cells=None, ignore_meta=False, - fntempl=_default_template, overwrite=False, imgbuffer=250, + +def img2ts(img_path, + ts_path, + startdate, + enddate, + parameters=None, + land_points=True, + bbox=None, + cells=None, + ignore_meta=False, + fntempl=_default_template, + overwrite=False, + imgbuffer=250, n_proc=1): """ Reshuffle method applied to C3S data. @@ -189,7 +206,8 @@ def img2ts(img_path, ts_path, startdate, enddate, parameters=None, grid = SMECV_Grid_v052('land') if land_points else SMECV_Grid_v052(None) if (bbox is not None) and (cells is not None): - raise ValueError("Please either pass a bounding box or cells, not both") + raise ValueError( + "Please either pass a bounding box or cells, not both") if bbox: grid = grid.subgrid_from_bbox(*bbox) @@ -203,14 +221,19 @@ def img2ts(img_path, ts_path, startdate, enddate, parameters=None, startdate = pd.to_datetime(startdate).to_pydatetime() enddate = pd.to_datetime(enddate).to_pydatetime() - subpath_templ = ('%Y',) if os.path.isdir(os.path.join(img_path, str(startdate.year))) else None - input_dataset = C3S_Nc_Img_Stack(data_path=img_path, - parameters=parameters, - subgrid=grid, - flatten=True, - fillval={'sm': np.nan, 'flag': 2**8}, - fntempl=fntempl, - subpath_templ=subpath_templ) + subpath_templ = ('%Y',) if os.path.isdir( + os.path.join(img_path, str(startdate.year))) else None + input_dataset = C3S_Nc_Img_Stack( + data_path=img_path, + parameters=parameters, + subgrid=grid, + flatten=True, + fillval={ + 'sm': np.nan, + 'flag': 2**8 + }, + fntempl=fntempl, + subpath_templ=subpath_templ) props = {'freq': 'unknown', 'sensor_type': 'unknown', 'version': 'unknown'} @@ -261,26 +284,35 @@ def img2ts(img_path, ts_path, startdate, enddate, parameters=None, else: _cellsize = 5 - reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=ts_path, - startdate=startdate, enddate=enddate, input_grid=grid, - imgbuffer=imgbuffer, cellsize_lat=_cellsize, - cellsize_lon=_cellsize, global_attr=global_attributes, - zlib=True, unlim_chunksize=1000, - ts_attributes=ts_attributes, n_proc=n_proc, - backend='multiprocessing') + reshuffler = Img2Ts( + input_dataset=input_dataset, + outputpath=ts_path, + startdate=startdate, + enddate=enddate, + input_grid=grid, + imgbuffer=imgbuffer, + cellsize_lat=_cellsize, + cellsize_lon=_cellsize, + global_attr=global_attributes, + zlib=True, + unlim_chunksize=1000, + ts_attributes=ts_attributes, + n_proc=n_proc, + backend='multiprocessing') reshuffler.calc() - kwargs = {'parameters': list(parameters), 'land_points': land_points, - 'enddate': enddate, - 'img_path': img_path, - 'cells': None if cells is None else list(cells), - 'bbox': None if bbox is None else list(bbox), - "fntempl": fntempl, - 'ignore_meta': ignore_meta} + kwargs = { + 'parameters': list(parameters), + 'land_points': land_points, + 'enddate': enddate, + 'img_path': img_path, + 'cells': None if cells is None else list(cells), + 'bbox': None if bbox is None else list(bbox), + "fntempl": fntempl, + 'ignore_meta': ignore_meta + } props["img2ts_kwargs"] = kwargs - update_ts_summary_file(ts_path, collect_cov=False, - props=props) - + update_ts_summary_file(ts_path, collect_cov=False, props=props)