diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..254a85bd --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,73 @@ +name: GLAMOD marine-processing Testing Suite +on: + push: + branches: + - "*" + pull_request: + branches: + - "*" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: py${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] + # Bookend python versions + python-version: ["3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + - name: Set environment variables + run: | + echo "CONDA_ENV_FILE=ci/requirements/environment.yml" >> $GITHUB_ENV + echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV + + - name: Create conda environment + uses: mamba-org/provision-with-micromamba@main + with: + cache-downloads: true + cache-downloads-key: "${{runner.os}}-${{runner.arch}}-py${{matrix.python-version}}-${{hashFiles(env.CONDA_ENV_FILE)}}" + micromamba-version: 'latest' + environment-file: ci/requirements/environment.yml + extra-specs: | + python=${{ matrix.python-version }} + + - name: Install cdm reader mapper toolbox + run: | + python -m pip install --no-deps -e . + + - name: Version info + run: | + conda info -a + conda list + + - name: Import marine_processing toolbox + run: | + python -c "import glamod_marine_processing" + + - name: Run tests + timeout-minutes: 10 + run: python -u -m pytest + --cov=glamod_marine_processing + --cov-report=xml + --junitxml=test-results/${{ runner.os }}-${{ matrix.python-version }}.xml + + - name: Upload code coverage to Codecov + uses: codecov/codecov-action@v3 + with: + file: ./coverage.xml + flags: unittests + env_vars: RUNNER_OS,PYTHON_VERSION + name: codecov-umbrella + fail_ci_if_error: false diff --git a/CHANGES.rst b/CHANGES.rst index 4f5e6839..d25e0382 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -18,10 +18,11 @@ New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * add some information files: ``AUTHORS.rst``, ``CHANGES.rst``, ``CONTRIBUTING.rst`` and ``LICENSE`` (:pull:`3`) * make us of pre-commit (:pull:`3`) -* make use of an command-line interface to create suite PYTHON and SLURM scripts (:pull:`3`) +* make use of an command-line interface to create suite PYTHON and SLURM scripts (:pull:`3`, :pull:`5`) * add new release 7.0 configuration files (:pull:`3`) * set some default directories and SLURM settings for both HPC systems KAY and MeluXina (:pull:`3`) Internal changes ^^^^^^^^^^^^^^^^ * reduce complexity of some functions (:pull:`3`) +* adding observational testing suite (:issue:`5`, :pull:`5`) diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml new file mode 100755 index 00000000..eedc0b00 --- /dev/null +++ b/ci/requirements/environment.yml @@ -0,0 +1,16 @@ +name: marine-processing-test + +channels: + - conda-forge + - nodefaults + +dependencies: + # for testing + - pytest + - pytest-cov + - pytest-env + - pip: + - git+https://github.com/glamod/cdm_reader_mapper + - pandas-validation + - Levenshtein + - simplejson diff --git a/glamod_marine_processing/cli.py b/glamod_marine_processing/cli.py index 2707d8ce..ee38b942 100755 --- a/glamod_marine_processing/cli.py +++ b/glamod_marine_processing/cli.py @@ -107,6 +107,11 @@ def build_configuration(self): if self.work_directory is not None: config["paths"]["glamod"] = self.work_directory + try: + user = os.getlogin() + except OSError: + user = "testuser" + config["paths"]["data_directory"] = get_abs_path( config["paths"]["data_directory"] ) @@ -121,7 +126,7 @@ def build_configuration(self): scripts_directory = os.path.join(code_directory, "scripts") lotus_scripts_directory = os.path.join(code_directory, "lotus_scripts") work_directory = os.path.abspath(config["paths"]["glamod"]) - scratch_directory = os.path.join(work_directory, os.getlogin()) + scratch_directory = os.path.join(work_directory, user) release_directory = os.path.join( scratch_directory, self.release, self.dataset, self.level ) @@ -155,7 +160,10 @@ def __init__(self): """, ) self.submit_jobs = click.option( - "-submit", "--submit_jobs", is_flag=True, help="Submit job scripts" + "-submit", "--submit_jobs", is_flag=True, help="Submit job scripts." + ) + self.run_jobs = click.option( + "-run", "--run_jobs", is_flag=True, help="Run job scripts interactively." ) self.level = click.option( "-l", @@ -248,7 +256,22 @@ def __init__(self): "-qc", "--quality_control", is_flag=True, - help="Do quality control for qc_suite only", + help="Do quality control for qc_suite only.", + ) + self.source_pattern = click.option( + "-sp", + "--source_pattern", + help="User-defined input source pattern.", + ) + self.prev_file_id = click.option( + "-p_id", + "--prev_file_id", + help="fileID of input file names. Default ", + ) + self.external_qc_files = click.option( + "-ext_qc", + "--external_qc_files", + help="Path to external QC files. Default: /external_files.", ) diff --git a/glamod_marine_processing/cli_obs.py b/glamod_marine_processing/cli_obs.py index bee7faa5..a07a57f3 100755 --- a/glamod_marine_processing/cli_obs.py +++ b/glamod_marine_processing/cli_obs.py @@ -24,10 +24,13 @@ def ObsCli( release, update, dataset, + source_pattern, + prev_file_id, data_directory, work_directory, config_file, submit_jobs, + run_jobs, overwrite, ): """Enry point for the obs_suite command line interface.""" @@ -64,8 +67,17 @@ def ObsCli( level_config = load_json(level_config_file) level_config["submit_jobs"] = submit_jobs + level_config["run_jobs"] = run_jobs level_config["level"] = level level_config["overwrite"] = overwrite + if source_pattern: + level_config["source_pattern"] = source_pattern + if prev_file_id: + if prev_file_id[0] != "*": + prev_file_id = f"*{prev_file_id}" + if prev_file_id[-1] != "*": + prev_file_id = f"{prev_file_id}*" + level_config["prev_fileID"] = prev_file_id for key, value in config.items(): level_config[key] = value diff --git a/glamod_marine_processing/cli_qc.py b/glamod_marine_processing/cli_qc.py index 161fdb65..11dc1b6d 100755 --- a/glamod_marine_processing/cli_qc.py +++ b/glamod_marine_processing/cli_qc.py @@ -26,11 +26,13 @@ def QcCli( corrections_version, data_directory, work_directory, + external_qc_files, config_file, submit_jobs, preprocessing, quality_control, high_resolution_qc, + run_jobs, overwrite, ): """Enry point for theqcmetadata_suite command line interface.""" @@ -58,8 +60,9 @@ def QcCli( ) parameter_file = os.path.join(p.config_directory, "ParametersCCI.json") icoads_version = "3.0.2" - external_files = os.path.join(p.data_directory, "external_files") - sst_files = os.path.join(external_files, "SST") + if external_qc_files is None: + external_qc_files = os.path.join(p.data_directory, "external_files") + sst_files = os.path.join(external_qc_files, "SST") sst_stdev_climatology = os.path.join(sst_files, "OSTIA_pentad_stdev_climatology.nc") old_sst_stdev_climatology = os.path.join( sst_files, "HadSST2_pentad_stdev_climatology.nc" @@ -73,12 +76,12 @@ def QcCli( sst_buddy_avg_sampling = os.path.join( sst_files, "OSTIA_buddy_range_sampling_error.nc" ) - ostia_background = os.path.join(external_files, "OSTIA_background") + ostia_background = os.path.join(external_qc_files, "OSTIA_background") djf_ostia_background = os.path.join(ostia_background, "DJF_bckerr_smooth.nc") jja_ostia_background = os.path.join(ostia_background, "JJA_bckerr_smooth.nc") son_ostia_background = os.path.join(ostia_background, "SON_bckerr_smooth.nc") mam_ostia_background = os.path.join(ostia_background, "MAM_bckerr_smooth.nc") - test_files = os.path.join(external_files, "TestFiles") + test_files = os.path.join(external_qc_files, "TestFiles") sst_climatology_file = os.path.join(test_files, "HadSST2_pentad_climatology.nc") mat_climatology_file = os.path.join(test_files, "HadNMAT2_pentad_climatology.nc") stdev_climatology_file = os.path.join( @@ -94,7 +97,7 @@ def QcCli( out_dir=out_dir, ICOADS_dir=icoads_dir, track_out_dir=out_dir, - external_files=external_files, + external_files=external_qc_files, key="Directories", ) @@ -135,6 +138,7 @@ def QcCli( key="TestFiles", ) config["submit_jobs"] = submit_jobs + config["run_jobs"] = run_jobs mkdir(qc_log_directory) mkdir(qc_hr_log_directory) diff --git a/glamod_marine_processing/obs_suite/configuration_files/release_7.0/000000/ICOADS_R3.0.2T/level2_cmd_add.json b/glamod_marine_processing/obs_suite/configuration_files/release_7.0/000000/ICOADS_R3.0.2T/level2_cmd_add.json index 3d68af5b..27151831 100755 --- a/glamod_marine_processing/obs_suite/configuration_files/release_7.0/000000/ICOADS_R3.0.2T/level2_cmd_add.json +++ b/glamod_marine_processing/obs_suite/configuration_files/release_7.0/000000/ICOADS_R3.0.2T/level2_cmd_add.json @@ -1,67 +1,67 @@ { "103-792": { - "year_init": 2015, - "year_end": 2021, + "year_init": 2022, + "year_end": 2022, "exclude": false, "params_exclude": [] }, "103-793": { - "year_init": 2015, - "year_end": 2021, + "year_init": 2022, + "year_end": 2022, "exclude": false, "params_exclude": [] }, "103-794": { - "year_init": 2015, - "year_end": 2021, + "year_init": 2022, + "year_end": 2022, "exclude": false, "params_exclude": [] }, "103-795": { - "year_init": 2015, + "year_init": 2022, "year_end": 2022, "exclude": false, "params_exclude": [] }, "103-797": { - "year_init": 2015, - "year_end": 2021, + "year_init": 2022, + "year_end": 2022, "exclude": false, "params_exclude": [] }, "114-992": { - "year_init": 2015, - "year_end": 2021, + "year_init": 2022, + "year_end": 2022, "exclude": false, "params_exclude": [] }, "114-993": { - "year_init": 2015, - "year_end": 2021, + "year_init": 2022, + "year_end": 2022, "exclude": false, "params_exclude": [] }, "114-994": { - "year_init": 2015, - "year_end": 2021, + "year_init": 2022, + "year_end": 2022, "exclude": false, "params_exclude": [] }, "114-995": { - "year_init": 2015, - "year_end": 2021, + "year_init": 2022, + "year_end": 2022, "exclude": false, "params_exclude": [] }, "172-798": { - "year_init": 2015, - "year_end": 2021, + "year_init": 2022, + "year_end": 2022, "exclude": false, "params_exclude": [] }, "params_exclude": [ "observations-wbt" ], - "year_init": 2015, - "year_end": 2021 + "year_init": 2022, + "year_end": 2022 } diff --git a/glamod_marine_processing/obs_suite/lotus_scripts/config_array.py b/glamod_marine_processing/obs_suite/lotus_scripts/config_array.py index ce58e50a..ebf4a582 100755 --- a/glamod_marine_processing/obs_suite/lotus_scripts/config_array.py +++ b/glamod_marine_processing/obs_suite/lotus_scripts/config_array.py @@ -18,7 +18,13 @@ # FUNCTIONS ------------------------------------------------------------------- def config_element( - sid_dck_log_dir, ai, script_config, sid_dck, yyyy, mm, filename=None + sid_dck_log_dir, + ai, + script_config, + sid_dck, + yyyy, + mm, + filename=None, ): """Update configuration script.""" script_config.update({"sid_dck": sid_dck}) @@ -60,10 +66,8 @@ def clean_ok_logs( for x in ok_files: os.remove(x) for source_file in source_files: - # print(source_file) yyyy, mm = get_yyyymm(source_file) if int(yyyy) >= year_init and int(yyyy) <= year_end: - # config_element(sid_dck_log_dir,ai,script_config,sid_dck,yyyy,mm, source_file) config_element(sid_dck_log_dir, ai, config, sid_dck, yyyy, mm, source_file) ai += 1 elif (int(yyyy) == year_init - 1 and int(mm) == 12) or ( @@ -103,7 +107,6 @@ def clean_failed_logs_only( source_file = re.sub("[?]{2}", mm, source_file) source_file = os.path.join(source_dir, sid_dck, source_file) if int(yyyy) >= year_init and int(yyyy) <= year_end: - # config_element(sid_dck_log_dir,ai,script_config,sid_dck,yyyy,mm, source_file) config_element(sid_dck_log_dir, ai, config, sid_dck, yyyy, mm, source_file) ai += 1 elif (int(yyyy) == year_init - 1 and int(mm) == 12) or ( diff --git a/glamod_marine_processing/obs_suite/lotus_scripts/level_slurm.py b/glamod_marine_processing/obs_suite/lotus_scripts/level_slurm.py index 767e9ef8..24dac5ac 100755 --- a/glamod_marine_processing/obs_suite/lotus_scripts/level_slurm.py +++ b/glamod_marine_processing/obs_suite/lotus_scripts/level_slurm.py @@ -78,7 +78,10 @@ def source_dataset(level, release): LEVEL = script_config["level"] LEVEL_SOURCE = slurm_preferences.level_source[LEVEL] -SOURCE_PATTERN = slurm_preferences.source_pattern[LEVEL] +if "source_pattern" in script_config.keys(): + SOURCE_PATTERN = script_config["source_pattern"] +else: + SOURCE_PATTERN = slurm_preferences.source_pattern[LEVEL] PYSCRIPT = f"{LEVEL}.py" MACHINE = script_config["scripts"]["machine"].lower() overwrite = script_config["overwrite"] @@ -117,6 +120,11 @@ def source_dataset(level, release): process_list = read_txt(process_list_file) release_periods = load_json(release_periods_file) +# Optionally, add CMD add file +add_file = os.path.join(config_files_path, f"{LEVEL}_cmd_add.json") +if os.path.isfile(add_file): + script_config["cmd_add_file"] = add_file + # Build array input files ----------------------------------------------------- logging.info("CONFIGURING JOB ARRAYS...") status = config_array.main( @@ -134,12 +142,7 @@ def source_dataset(level, release): # Build jobs ------------------------------------------------------------------ py_path = os.path.join(scripts_dir, PYSCRIPT) -add_file = os.path.join(config_files_path, f"{LEVEL}_cmd_add.json") -if os.path.isfile(add_file): - addition = add_file -else: - addition = "" -pycommand = f"python {py_path} {data_dir} {release} {update} {dataset} {addition}" +pycommand = f"python {py_path} {data_dir} {release} {update} {dataset}" # Set default job params mem = script_config["job_memo_mb"] @@ -226,4 +229,9 @@ def source_dataset(level, release): jid = launch_process(process) else: logging.info(f"{sid_dck}: create script") - logging.info(f"Script {job_file} was created.") + logging.info(f"Script {taskfarm_file} was created.") + if script_config["run_jobs"] is True: + logging.info("Run interactively.") + os.system(f"chmod u+x {taskfarm_file}") + os.system(f"{taskfarm_file}") + logging.info(f"Check whether jobs was successful: {log_diri}") diff --git a/glamod_marine_processing/obs_suite/scripts/_utilities.py b/glamod_marine_processing/obs_suite/scripts/_utilities.py new file mode 100755 index 00000000..1b87db89 --- /dev/null +++ b/glamod_marine_processing/obs_suite/scripts/_utilities.py @@ -0,0 +1,70 @@ +"""Utility functions/classes for level scripts.""" + +from __future__ import annotations + +import datetime +import json +import logging +import sys + + +# Functions-------------------------------------------------------------------- +class script_setup: + """Create script.""" + + def __init__(self, process_options, inargs): + self.data_path = inargs[1] + self.release = inargs[2] + self.update = inargs[3] + self.dataset = inargs[4] + self.configfile = inargs[5] + + try: + with open(self.configfile) as fileObj: + config = json.load(fileObj) + except Exception: + logging.error( + f"Opening configuration file :{self.configfile}", exc_info=True + ) + self.flag = False + return + + if len(sys.argv) >= 8: + logging.warning( + "Removed option to provide sid_dck, year and month as arguments. Use config file instead" + ) + if len(sys.argv) > 6: + self.sid_dck = inargs[6] + self.year = inargs[7] + self.month = inargs[8] + else: + self.sid_dck = config.get("sid_dck") + self.year = config.get("yyyy") + self.month = config.get("mm") + + self.dck = self.sid_dck.split("-")[1] + self.corrections = config.get("corrections") + + try: + for opt in process_options: + if not config.get(self.sid_dck, {}).get(opt): + setattr(self, opt, config.get(opt)) + else: + setattr(self, opt, config.get(self.sid_dck).get(opt)) + self.flag = True + except Exception: + logging.error( + f"Parsing configuration from file :{self.configfile}", exc_info=True + ) + self.flag = False + + self.filename = config.get("filename") + self.level2_list = config.get("cmd_add_file") + self.prev_fileID = config.get("prev_fileID") + + +# This is for json to handle dates +def date_handler(obj): + """Handle date.""" + if isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() diff --git a/glamod_marine_processing/obs_suite/scripts/level1a.py b/glamod_marine_processing/obs_suite/scripts/level1a.py index e01f18b7..9fc21949 100755 --- a/glamod_marine_processing/obs_suite/scripts/level1a.py +++ b/glamod_marine_processing/obs_suite/scripts/level1a.py @@ -57,7 +57,6 @@ import datetime import glob -import json import logging import os import sys @@ -67,6 +66,7 @@ import numpy as np import pandas as pd import simplejson +from _utilities import date_handler, script_setup from cdm_reader_mapper import cdm_mapper as cdm from cdm_reader_mapper import mdf_reader, metmetpy from cdm_reader_mapper.common import pandas_TextParser_hdlr @@ -79,67 +79,6 @@ # FUNCTIONS ------------------------------------------------------------------- -class script_setup: - """Setup LEVEL1a script.""" - - def __init__(self, inargs): - self.data_path = inargs[1] - self.release = inargs[2] - self.update = inargs[3] - self.dataset = inargs[4] - self.configfile = inargs[5] - - try: - with open(self.configfile) as fileObj: - config = json.load(fileObj) - except Exception: - logging.error( - f"Opening configuration file :{self.configfile}", exc_info=True - ) - self.flag = False - return - - if len(sys.argv) > 6: - self.sid_dck = inargs[6] - self.year = inargs[7] - self.month = inargs[8] - self.filename = inargs[9] - else: - self.sid_dck = config.get("sid_dck") - self.year = config.get("yyyy") - self.month = config.get("mm") - self.filename = config.get("filename") - - self.dck = self.sid_dck.split("-")[1] - self.corrections = config.get("corrections") - - process_options = [ - "data_model", - "read_sections", - "filter_reports_by", - "cdm_map", - ] - try: - for opt in process_options: - if not config.get(self.sid_dck, {}).get(opt): - setattr(self, opt, config.get(opt)) - else: - setattr(self, opt, config.get(self.sid_dck).get(opt)) - self.flag = True - except Exception: - logging.error( - f"Parsing configuration from file :{self.configfile}", exc_info=True - ) - self.flag = False - - -# This is for json to handle dates -def date_handler(obj): - """Handle date.""" - if isinstance(obj, (datetime.datetime, datetime.date)): - return obj.isoformat() - - def clean_L1a(L1a_id): """Clean previous LEVEL1a files.""" L1a_prods = glob.glob(os.path.join(L1a_path, "*" + FFS + L1a_id + ".psv")) @@ -181,7 +120,13 @@ def write_out_junk(dataObj, filename): logging.error("Need arguments to run!") sys.exit(1) -params = script_setup(args) +process_options = [ + "data_model", + "read_sections", + "filter_reports_by", + "cdm_map", +] +params = script_setup(process_options, args) if not params.flag: logging.error("Error parsing initial configuration") @@ -217,7 +162,7 @@ def write_out_junk(dataObj, filename): L1a_id = FFS.join([str(params.year), str(params.month).zfill(2), release_id]) # CLEAN PREVIOUS L1A PRODUCTS AND SIDE FILES ---------------------------------- -# clean_L1a(L1a_id) +clean_L1a(L1a_id) # DO THE DATA PROCESSING ------------------------------------------------------ data_model = params.data_model @@ -372,24 +317,7 @@ def write_out_junk(dataObj, filename): process = False logging.warning("No data to map to CDM after selection and cleaning") -# 2.9. Let's save reports with no observations... -# OBS_VALUES = [ ('core',x) for x in ['AT','SST','SLP','D','W','WBT','DPT'] ] -# no_obs_file = os.path.join(L1a_path,'no_obs',str(params.year) + FFS + str(params.month) + '.psv') -# c = 0 -# for data_obs in data_in.data: -# if len(data_obs[data_obs[OBS_VALUES].isna().all(axis=1)]) > 0: -# if not os.path.isdir(os.path.join(L1a_path,'no_obs')): -# os.mkdir(os.path.join(L1a_path,'no_obs')) -# wmode = 'a' if c > 0 else 'w' -# header = False if c > 0 else True -# data_obs[data_obs[OBS_VALUES].isna().all(axis=1)].to_csv(no_obs_file, sep = '|', mode = wmode, header = header) -# c += 1 -# %% extract NOC_corrections/duplicates -# data_in.data = TextParser_hdlr.restore(data_in.data) -# logging.info('Generate files needed in level1b for corrections') -# corrections.corrections(data_in.data, dataset, correction_path, params.year, params.month) - -# data_in.data = TextParser_hdlr.restore(data_in.data) + # 3. Map to common data model and output files if process: logging.info("Mapping to CDM") diff --git a/glamod_marine_processing/obs_suite/scripts/level1b.py b/glamod_marine_processing/obs_suite/scripts/level1b.py index 64649a36..b5f94965 100755 --- a/glamod_marine_processing/obs_suite/scripts/level1b.py +++ b/glamod_marine_processing/obs_suite/scripts/level1b.py @@ -51,7 +51,6 @@ import datetime import glob -import json import logging import os import sys @@ -60,64 +59,13 @@ import numpy as np import pandas as pd import simplejson +from _utilities import date_handler, script_setup from cdm_reader_mapper import cdm_mapper as cdm from cdm_reader_mapper.operations import replace reload(logging) # This is to override potential previous config of logging -# Functions-------------------------------------------------------------------- -class script_setup: - """Create script.""" - - def __init__(self, inargs): - self.data_path = inargs[1] - self.release = inargs[2] - self.update = inargs[3] - self.dataset = inargs[4] - self.configfile = inargs[5] - - try: - with open(self.configfile) as fileObj: - config = json.load(fileObj) - except Exception: - logging.error( - f"Opening configuration file :{self.configfile}", exc_info=True - ) - self.flag = False - return - - if len(sys.argv) > 6: - self.sid_dck = inargs[6] - self.year = inargs[7] - self.month = inargs[8] - else: - self.sid_dck = config.get("sid_dck") - self.year = config.get("yyyy") - self.month = config.get("mm") - - process_options = ["correction_version", "corrections", "histories"] - try: - for opt in process_options: - if not config.get(self.sid_dck, {}).get(opt): - setattr(self, opt, config.get(opt)) - else: - setattr(self, opt, config.get(self.sid_dck).get(opt)) - self.flag = True - except Exception: - logging.error( - f"Parsing configuration from file :{self.configfile}", exc_info=True - ) - self.flag = False - - -# This is for json to handle dates -def date_handler(obj): - """Handle date.""" - if isinstance(obj, (datetime.datetime, datetime.date)): - return obj.isoformat() - - def clean_L1b(L1b_id): """Clean level 1b.""" L1b_prods = glob.glob(os.path.join(L1b_path, "*-" + L1b_id + ".psv")) @@ -153,7 +101,8 @@ def clean_L1b(L1b_id): logging.error("Need arguments to run!") sys.exit(1) -params = script_setup(args) +process_options = ["correction_version", "corrections", "histories"] +params = script_setup(process_options, args) filename_field_sep = "-" delimiter = "|" @@ -165,6 +114,8 @@ def clean_L1b(L1b_id): [str(params.year), str(params.month).zfill(2), release_id] ) fileID_date = filename_field_sep.join([str(params.year), str(params.month)]) +if params.prev_fileID is None: + params.prev_fileID = fileID L1a_path = os.path.join(release_path, "level1a", params.sid_dck) L1b_path = os.path.join(release_path, "level1b", params.sid_dck) @@ -185,7 +136,7 @@ def clean_L1b(L1b_id): ) sys.exit(1) -L1a_filename = os.path.join(L1a_path, "header-" + fileID + ".psv") +L1a_filename = params.filename if not os.path.isfile(L1a_filename): logging.error(f"L1a header file not found: {L1a_filename}") sys.exit(1) @@ -203,9 +154,10 @@ def clean_L1b(L1b_id): history_tstmp = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") for table in cdm.properties.cdm_tables: datetime_col = "report_timestamp" if table == "header" else "date_time" - logging.info(f"TABLE {table}") - table_df = pd.DataFrame() - table_df = cdm.read_tables(L1a_path, fileID, cdm_subset=[table]) + logging.info(L1a_path) + logging.info(params.prev_fileID) + logging.info(table) + table_df = cdm.read_tables(L1a_path, params.prev_fileID, cdm_subset=[table]) if len(table_df) == 0: logging.warning(f"Empty or non-existing table {table}") diff --git a/glamod_marine_processing/obs_suite/scripts/level1c.py b/glamod_marine_processing/obs_suite/scripts/level1c.py index 3a983c51..a8a74ed1 100755 --- a/glamod_marine_processing/obs_suite/scripts/level1c.py +++ b/glamod_marine_processing/obs_suite/scripts/level1c.py @@ -105,58 +105,12 @@ import numpy as np import pandas as pd import simplejson +from _utilities import date_handler, script_setup from cdm_reader_mapper import cdm_mapper as cdm reload(logging) # This is to override potential previous config of logging -# FUNCTIONS ------------------------------------------------------------------- -class script_setup: - """Set up script.""" - - def __init__(self, inargs): - self.data_path = inargs[1] - self.release = inargs[2] - self.update = inargs[3] - self.dataset = inargs[4] - self.configfile = inargs[5] - - try: - with open(self.configfile) as fileObj: - config = json.load(fileObj) - except Exception: - logging.error( - f"Opening configuration file :{self.configfile}", exc_info=True - ) - self.flag = False - return - - if len(sys.argv) > 6: - self.sid_dck = inargs[6] - self.year = inargs[7] - self.month = inargs[8] - else: - try: - self.sid_dck = config.get("sid_dck") - self.year = config.get("yyyy") - self.month = config.get("mm") - except Exception: - logging.error( - f"Parsing configuration from file :{self.configfile}", exc_info=True - ) - self.flag = False - - self.dck = self.sid_dck.split("-")[1] - self.flag = True - - -# This is for json to handle dates -def date_handler(obj): - """Handle date.""" - if isinstance(obj, (datetime.datetime, datetime.date)): - return obj.isoformat() - - def validate_id(idSeries): """Validate ID.""" json_file = os.path.join(id_validation_path, "dck" + params.dck + ".json") @@ -195,9 +149,10 @@ def read_table_files(table): # First read the master file, if any, then append leaks # If no yyyy-mm master file, can still have reports from datetime leaks # On reading 'header' read null as NaN so that we can validate null ids as NaN easily - table_df = cdm.read_tables( - prev_level_path, fileID, cdm_subset=[table], na_values="null" - ) + # table_df = cdm.read_tables( + # prev_level_path, fileID, cdm_subset=[table], na_values="null" + # ) + table_df = cdm.read_tables(prev_level_path, cdm_subset=[table], na_values="null") try: len(table_df) except Exception: @@ -296,7 +251,7 @@ def clean_level(file_id): logging.error("Need arguments to run!") sys.exit(1) -params = script_setup(args) +params = script_setup([], args) FFS = "-" delimiter = "|" @@ -309,6 +264,8 @@ def clean_level(file_id): release_id = FFS.join([params.release, params.update]) fileID = FFS.join([str(params.year), str(params.month).zfill(2), release_id]) fileID_date = FFS.join([str(params.year), str(params.month)]) +if params.prev_fileID is None: + params.prev_fileID = fileID prev_level_path = os.path.join(release_path, level_prev, params.sid_dck) level_path = os.path.join(release_path, level, params.sid_dck) @@ -317,7 +274,7 @@ def clean_level(file_id): id_validation_path = os.path.join( params.data_path, params.release, "NOC_ANC_INFO", "json_files" -) # os.path.join(params.data_path,'datasets',params.dataset,'NOC_ANC_INFO','json_files') +) data_paths = [ prev_level_path, @@ -334,11 +291,7 @@ def clean_level(file_id): ) sys.exit(1) - -prev_level_filename = os.path.join( - prev_level_path, - "header-*" + "-".join([str(params.year), str(params.month)]) + "*.psv", -) +prev_level_filename = params.filename if len(glob.glob(prev_level_filename)) == 0: logging.error(f"L1b header files not found: {prev_level_filename}") sys.exit(1) @@ -454,7 +407,7 @@ def clean_level(file_id): process_table(table_df, table) obs_tables = [x for x in cdm_tables.keys() if x != "header"] for table in obs_tables: - table_pattern = FFS.join([table, fileID]) + "*.psv" + table_pattern = FFS.join([table, params.prev_fileID]) + "*.psv" table_files = glob.glob(os.path.join(prev_level_path, table_pattern)) if len(table_files) > 0: logging.info(f"Cleaning table {table}") diff --git a/glamod_marine_processing/obs_suite/scripts/level1d.py b/glamod_marine_processing/obs_suite/scripts/level1d.py index b689de39..6b185d43 100755 --- a/glamod_marine_processing/obs_suite/scripts/level1d.py +++ b/glamod_marine_processing/obs_suite/scripts/level1d.py @@ -55,7 +55,6 @@ import datetime import glob -import json import logging import os import subprocess @@ -65,79 +64,12 @@ import pandas as pd import simplejson +from _utilities import date_handler, script_setup from cdm_reader_mapper import cdm_mapper as cdm reload(logging) # This is to override potential previous config of logging -# %% FUNCTIONS ------------------------------------------------------------------- -class script_setup: - """Set up script.""" - - def __init__(self, inargs): - self.data_path = inargs[1] - self.release = inargs[2] - self.update = inargs[3] - self.dataset = inargs[4] - self.configfile = inargs[5] - - try: - with open(self.configfile) as fileObj: - config = json.load(fileObj) - except Exception: - logging.error( - f"Opening configuration file :{self.configfile}", exc_info=True - ) - self.flag = False - return - - if len(sys.argv) >= 8: - logging.warning( - "Removed option to provide sid_dck, year and month as arguments. Use config file instead" - ) - try: - self.sid_dck = config.get("sid_dck") - self.year = config.get("yyyy") - self.month = config.get("mm") - except Exception: - logging.error( - f"Parsing configuration from file :{self.configfile}", exc_info=True - ) - self.flag = False - - self.dck = self.sid_dck.split("-")[1] - - # However md_subdir is then nested in monthly....and inside monthly files - # Other MD sources would stick to this? Force it otherwise? - process_options = [ - "md_model", - "md_subdir", - "history_explain", - "md_first_yr_avail", - "md_last_yr_avail", - "md_not_avail", - ] - try: - for opt in process_options: - if not config.get(self.sid_dck, {}).get(opt): - setattr(self, opt, config.get(opt)) - else: - setattr(self, opt, config.get(self.sid_dck).get(opt)) - self.flag = True - except Exception: - logging.error( - f"Parsing configuration from file :{self.configfile}", exc_info=True - ) - self.flag = False - - -# This is for json to handle dates -def date_handler(obj): - """Handle date.""" - if isinstance(obj, (datetime.datetime, datetime.date)): - return obj.isoformat() - - def map_to_cdm(md_model, meta_df, log_level="INFO"): """Map to CDM.""" # Atts is a minimum info on vars the cdm mocule requires @@ -161,9 +93,13 @@ def process_table(table_df, table_name): # Open table and reindex table_df = pd.DataFrame() if local: - table_df = cdm.read_tables(scratch_path, fileID, cdm_subset=[table_name]) + table_df = cdm.read_tables( + scratch_path, params.prev_fileID, cdm_subset=[table_name] + ) else: - table_df = cdm.read_tables(prev_level_path, fileID, cdm_subset=[table_name]) + table_df = cdm.read_tables( + prev_level_path, params.prev_fileID, cdm_subset=[table_name] + ) if table_df is None or len(table_df) == 0: logging.warning(f"Empty or non existing table {table_name}") return @@ -257,7 +193,15 @@ def clean_level(file_id): logging.error("Need arguments to run!") sys.exit(1) -params = script_setup(args) +process_options = [ + "md_model", + "md_subdir", + "history_explain", + "md_first_yr_avail", + "md_last_yr_avail", + "md_not_avail", +] +params = script_setup(process_options, args) # %% FFS = "-" delimiter = "|" @@ -273,6 +217,8 @@ def clean_level(file_id): release_id = FFS.join([params.release, params.update]) fileID = FFS.join([str(params.year), str(params.month).zfill(2), release_id]) fileID_date = FFS.join([str(params.year), str(params.month)]) +if params.prev_fileID is None: + params.prev_fileID = fileID prev_level_path = os.path.join(release_path, level_prev, params.sid_dck) level_path = os.path.join(release_path, level, params.sid_dck) @@ -291,7 +237,7 @@ def clean_level(file_id): ) sys.exit(1) -prev_level_filename = os.path.join(prev_level_path, "header-" + fileID + ".psv") +prev_level_filename = params.filename if not os.path.isfile(prev_level_filename): logging.error(f"L1c header file not found: {prev_level_filename}") sys.exit(1) @@ -345,17 +291,21 @@ def clean_level(file_id): # Read the header table table = "header" if local: - logging.info(f"cp -L {prev_level_path}/*{fileID}.psv {scratch_path}") - subprocess.call(f"cp -L {prev_level_path}/*{fileID}.psv {scratch_path}", shell=True) + # logging.info(f"cp -L {prev_level_path}/*{fileID}.psv {scratch_path}") + # subprocess.call(f"cp -L {prev_level_path}/*{fileID}.psv {scratch_path}", shell=True) + logging.info(f"cp -L {prev_level_path}/*.psv {scratch_path}") + subprocess.call(f"cp -L {prev_level_path}/*.psv {scratch_path}", shell=True) header_df = pd.DataFrame() if local: - header_df = cdm.read_tables( - scratch_path, fileID, cdm_subset=[table], na_values="null" - ) + # header_df = cdm.read_tables( + # scratch_path, fileID, cdm_subset=[table], na_values="null" + # ) + header_df = cdm.read_tables(scratch_path, cdm_subset=[table], na_values="null") else: - header_df = cdm.read_tables( - prev_level_path, fileID, cdm_subset=[table], na_values="null" - ) + # header_df = cdm.read_tables( + # prev_level_path, fileID, cdm_subset=[table], na_values="null" + # ) + header_df = cdm.read_tables(prev_level_path, cdm_subset=[table], na_values="null") if len(header_df) == 0: logging.error("Empty or non-existing header table") diff --git a/glamod_marine_processing/obs_suite/scripts/level1e.py b/glamod_marine_processing/obs_suite/scripts/level1e.py index 5ab3aeac..d20b425f 100755 --- a/glamod_marine_processing/obs_suite/scripts/level1e.py +++ b/glamod_marine_processing/obs_suite/scripts/level1e.py @@ -122,7 +122,6 @@ import datetime import glob -import json import logging import os import sys @@ -131,79 +130,13 @@ import numpy as np import pandas as pd import simplejson +from _utilities import date_handler, script_setup from cdm_reader_mapper import cdm_mapper as cdm reload(logging) # This is to override potential previous config of logging # Functions-------------------------------------------------------------------- -class script_setup: - """Set up script.""" - - def __init__(self, inargs): - self.data_path = inargs[1] - self.release = inargs[2] - self.update = inargs[3] - self.dataset = inargs[4] - self.configfile = inargs[5] - - try: - with open(self.configfile) as fileObj: - config = json.load(fileObj) - except Exception: - logging.error( - f"Opening configuration file :{self.configfile}", exc_info=True - ) - self.flag = False - return - - if len(sys.argv) > 6: - self.sid_dck = inargs[6] - self.year = inargs[7] - self.month = inargs[8] - else: - try: - self.sid_dck = config.get("sid_dck") - self.year = config.get("yyyy") - self.month = config.get("mm") - except Exception: - logging.error( - f"Parsing configuration from file :{self.configfile}", - exc_info=True, - ) - self.flag = False - - self.dck = self.sid_dck.split("-")[1] - - # However md_subdir is then nested in monthly....and inside monthly files - # Other MD sources would stick to this? Force it otherwise? - process_options = [ - "history_explain", - "qc_first_date_avail", - "qc_last_date_avail", - ] - try: - for opt in process_options: - if not config.get(self.sid_dck, {}).get(opt): - setattr(self, opt, config.get(opt)) - else: - setattr(self, opt, config.get(self.sid_dck).get(opt)) - self.flag = True - except Exception: - logging.error( - f"Parsing configuration from file :{self.configfile}", - exc_info=True, - ) - self.flag = False - - -# This is for json to handle dates -def date_handler(obj): - """Handle date.""" - if isinstance(obj, (datetime.datetime, datetime.date)): - return obj.isoformat() - - # This is to get the unique flag per parameter def get_qc_flags(qc, qc_df_full): """Get QC flag.""" @@ -297,7 +230,9 @@ def process_table(table_df, table_name): # Assume 'header' and in a DF in table_df otherwise # Open table and reindex table_df = pd.DataFrame() - table_df = cdm.read_tables(prev_level_path, fileID, cdm_subset=[table_name]) + table_df = cdm.read_tables( + prev_level_path, params.prev_fileID, cdm_subset=[table_name] + ) if table_df is None or len(table_df) == 0: logging.warning(f"Empty or non existing table {table_name}") @@ -464,7 +399,12 @@ def clean_level(file_id): logging.error("Need arguments to run!") sys.exit(1) -params = script_setup(args) +process_options = [ + "history_explain", + "qc_first_date_avail", + "qc_last_date_avail", +] +params = script_setup(process_options, args) release_path = os.path.join(params.data_path, params.release, params.dataset) release_id = filename_field_sep.join([params.release, params.update]) @@ -472,6 +412,8 @@ def clean_level(file_id): [str(params.year), str(params.month).zfill(2), release_id] ) fileID_date = filename_field_sep.join([str(params.year), str(params.month)]) +if params.prev_fileID is None: + params.prev_fileID = fileID prev_level_path = os.path.join(release_path, level_prev, params.sid_dck) level_path = os.path.join(release_path, level, params.sid_dck) @@ -519,17 +461,17 @@ def clean_level(file_id): sys.exit(1) # Do some additional checks before clicking go, do we have a valid header? -header_filename = os.path.join( - prev_level_path, filename_field_sep.join(["header", fileID]) + ".psv" -) +header_filename = params.filename if not os.path.isfile(header_filename): logging.error(f"Header table file not found: {header_filename}") sys.exit(1) header_df = pd.DataFrame() -header_df = cdm.read_tables( - prev_level_path, fileID, cdm_subset=["header"], na_values="null" -) +# header_df = cdm.read_tables( +# prev_level_path, fileID, cdm_subset=[table], na_values="null" +# ) +header_df = cdm.read_tables(prev_level_path, cdm_subset=[table], na_values="null") + if len(header_df) == 0: logging.error("Empty or non-existing header table") sys.exit(1) diff --git a/glamod_marine_processing/obs_suite/scripts/level2.py b/glamod_marine_processing/obs_suite/scripts/level2.py index 0eba06fc..259bdc41 100755 --- a/glamod_marine_processing/obs_suite/scripts/level2.py +++ b/glamod_marine_processing/obs_suite/scripts/level2.py @@ -41,7 +41,6 @@ """ from __future__ import annotations -import datetime import glob import json import logging @@ -49,57 +48,35 @@ import shutil import sys from importlib import reload -from subprocess import call +from pathlib import Path +from _utilities import script_setup from cdm_reader_mapper import cdm_mapper as cdm reload(logging) # This is to override potential previous config of logging # FUNCTIONS ------------------------------------------------------------------- -class script_setup: - """Set up script.""" - - def __init__(self, inargs): - self.data_path = inargs[1] - self.release = inargs[2] - self.update = inargs[3] - self.dataset = inargs[4] - self.level2_list = inargs[5] - self.configfile = inargs[6] - - try: - with open(self.configfile) as fileObj: - config = json.load(fileObj) - except Exception: - logging.error( - f"Opening configuration file: {self.configfile}", exc_info=True - ) - self.flag = False - return - - self.sid_dck = config.get("sid_dck") - self.dck = self.sid_dck.split("-")[1] - - -# This is for json to handle dates -def date_handler(obj): - """Handle date.""" - if isinstance(obj, (datetime.datetime, datetime.date)): - return obj.isoformat() - - def clean_level(): """Clean level.""" for dirname in [L2_path, L2_reports_path, L2_excluded_path]: try: if os.path.isdir(dirname): logging.info(f"Removing directory {dirname}") - shutil.rmtree(dirname) + os.remove(os.path.join(dirname, "*.psv")) except Exception: pass +def copyfiles(pattern, dest, mode="excluded"): + """Copy file pattern to dest.""" + file_list = glob.glob(pattern) + for file_ in file_list: + file_name = Path(file_).name + shutil.copyfile(file_, os.path.join(dest, file_name)) + logging.info(f"{file_name} {mode} from level2 in {dest}") + + # MAIN ------------------------------------------------------------------------ # Process input and set up some things and make sure we can do something------- @@ -114,11 +91,9 @@ def clean_level(): args = sys.argv else: logging.error("Need arguments to run!") - sys.exit(1) -params = script_setup(args) +params = script_setup([], args) -FFS = "-" level = "level2" header = True wmode = "w" @@ -127,7 +102,7 @@ def clean_level(): right_max_period = 2100 release_path = os.path.join(params.data_path, params.release, params.dataset) -release_id = FFS.join([params.release, params.update]) +release_id = "-".join([params.release, params.update]) L1e_path = os.path.join(release_path, "level1e", params.sid_dck) L2_path = os.path.join(release_path, level, params.sid_dck) L2_excluded_path = os.path.join(release_path, level, "excluded", params.sid_dck) @@ -195,41 +170,29 @@ def clean_level(): try: include_param_list.append("header") if exclude_sid_dck: - logging.info(f"Full dataset {params.sid_dck} excluded from level2") for table in cdm_tables: - files = os.path.join(L1e_path, table + "*.psv") - call(" ".join(["cp", files, L2_excluded_path, "2>/dev/null"]), shell=True) + pattern = os.path.join(L1e_path, table + "*.psv") + copyfiles(pattern, L2_excluded_path) else: - period_brace = "{" + str(year_init) + ".." + str(year_end) + "}" - left_period_brace = "{" + str(left_min_period) + ".." + str(year_init - 1) + "}" - right_period_brace = ( - "{" + str(year_end + 1) + ".." + str(right_max_period) + "}" - ) for table in exclude_param_list: - logging.info(f"{table} excluded from level2") - files = os.path.join(L1e_path, table + "*.psv") - file_list = glob.glob(files) - if len(file_list) > 0: - call( - " ".join(["cp", files, L2_excluded_path, "2>/dev/null"]), - shell=True, - ) + pattern = os.path.join(L1e_path, table + "*.psv") + copyfiles(pattern, L2_excluded_path) for table in include_param_list: - logging.info(f"{table} included in level2") - files = os.path.join(L1e_path, table + FFS + period_brace + FFS + "*.psv") - call(" ".join(["cp", files, L2_path, "2>/dev/null"]), shell=True) + for year in range(year_init, year_end + 1): + pattern = os.path.join(L1e_path, f"{table}-*{str(year)}-??-*.psv") + logging.warning(pattern) + copyfiles(pattern, L2_path, mode="included") + # Send out of release period to excluded - files = os.path.join( - L1e_path, FFS.join(["*", left_period_brace, "??", "*.psv"]) - ) - call(" ".join(["cp", files, L2_excluded_path, "2>/dev/null"]), shell=True) - files = os.path.join( - L1e_path, FFS.join(["*", right_period_brace, "??", "*.psv"]) - ) - call(" ".join(["cp", files, L2_excluded_path, "2>/dev/null"]), shell=True) + for year in range(left_min_period, year_init): + pattern = os.path.join(L1e_path, f"*{str(year)}-??-*.psv") + copyfiles(pattern, L2_excluded_path) + for year in range(year_end + 1, right_max_period + 1): + pattern = os.path.join(L1e_path, f"*{str(year)}-??-*.psv") + copyfiles(pattern, L2_excluded_path) + logging.info("Level2 data succesfully created") except Exception: logging.error("Error creating level2 data", exc_info=True) logging.info(f"Level2 data {params.sid_dck} removed") clean_level() - sys.exit(1) diff --git a/glamod_marine_processing/qc_suite/configuration_files/ParametersCCI.json b/glamod_marine_processing/qc_suite/configuration_files/ParametersCCI.json index 4408a6da..a766e1b9 100755 --- a/glamod_marine_processing/qc_suite/configuration_files/ParametersCCI.json +++ b/glamod_marine_processing/qc_suite/configuration_files/ParametersCCI.json @@ -3,12 +3,8 @@ "icoads_filenames": [ "YYYY-MM.psv" ], - "background_dir": [ - "SST_daily_CCI/gws-access.jasmin.ac.uk/ICDR_v2/Analysis/L4/v2.1/YYYY/MMMM/DDDD/" - ], - "background_filenames": [ - "YYYYMMMMDDDD120000-ESACCI-L4_GHRSST-SSTdepth-OSTIA-GLOB_CDR2.1-v02.0-fv01.0.nc" - ], + "background_dir": "SST_daily_CCI/gws-access.jasmin.ac.uk/ICDR_v2/Analysis/L4/v2.1/YYYY/MMMM/DDDD/", + "background_filenames": "YYYYMMMMDDDD120000-ESACCI-L4_GHRSST-SSTdepth-OSTIA-GLOB_CDR2.1-v02.0-fv01.0.nc", "climatologies": [ [ "SST", diff --git a/glamod_marine_processing/qc_suite/lotus_scripts/qc_slurm.py b/glamod_marine_processing/qc_suite/lotus_scripts/qc_slurm.py index 93d36afc..3db8f533 100755 --- a/glamod_marine_processing/qc_suite/lotus_scripts/qc_slurm.py +++ b/glamod_marine_processing/qc_suite/lotus_scripts/qc_slurm.py @@ -135,3 +135,8 @@ def launch_process(process): else: logging.info(f"{taskfile}: create script") logging.info(f"Script {slurmfile} was created.") + if script_config["run_jobs"] is True: + logging.info("Run interactively.") + os.system(f"chmod u+x {taskfile}") + os.system(f"{taskfile}") + logging.info(f"Check whether jobs was successful: {logdir}") diff --git a/glamod_marine_processing/qc_suite/modules/BackgroundField.py b/glamod_marine_processing/qc_suite/modules/BackgroundField.py index 0c4c2584..2cdccade 100755 --- a/glamod_marine_processing/qc_suite/modules/BackgroundField.py +++ b/glamod_marine_processing/qc_suite/modules/BackgroundField.py @@ -206,7 +206,10 @@ def get_background_filename(dirstubs, filenamestubs, year, month, day): """ if year is None or month is None or day is None: return None - + if isinstance(dirstubs, str): + dirstubs = [dirstubs] + if isinstance(filenamestubs, str): + filenamestubs = [filenamestubs] assert len(dirstubs) == len( filenamestubs ), "dirstubs and filename stubs have different numbers of members" diff --git a/glamod_marine_processing/qc_suite/modules/Extended_IMMA_sb.py b/glamod_marine_processing/qc_suite/modules/Extended_IMMA_sb.py index 95fbd35d..4f4bd9ba 100755 --- a/glamod_marine_processing/qc_suite/modules/Extended_IMMA_sb.py +++ b/glamod_marine_processing/qc_suite/modules/Extended_IMMA_sb.py @@ -3385,6 +3385,10 @@ def write_qc(self, runid, icoads_dir, year, month, allvarnames, test=False): syr = str(year) smn = f"{month:02}" + if len(self.reps) == 0: + print("wrote no output") + return + for var in allvarnames: outfilename = var + "_qc_" + syr + smn + "_" + runid + ".csv" if test: @@ -3405,7 +3409,6 @@ def write_qc(self, runid, icoads_dir, year, month, allvarnames, test=False): outfile.close() print(f"wrote out {count_write} obs") - return def write_output(self, runid, icoads_dir, year, month, test=False): """Write out the contents of the class`.Deck`.""" diff --git a/glamod_marine_processing/qc_suite/scripts/marine_qc.py b/glamod_marine_processing/qc_suite/scripts/marine_qc.py index c250b0fc..ed50ef96 100755 --- a/glamod_marine_processing/qc_suite/scripts/marine_qc.py +++ b/glamod_marine_processing/qc_suite/scripts/marine_qc.py @@ -108,7 +108,7 @@ def read_icoads_file( ) # replace ' ' in ID field with '' (corrections introduce bug) - imma_obj["ID"].replace(" ", "", inplace=True) + imma_obj["ID"] = imma_obj["ID"].replace(" ", "") imma_obj = imma_obj.sort_values( ["YR", "MO", "DY", "HR", "ID"], axis=0, ascending=True ) @@ -149,7 +149,6 @@ def read_icoads_file( readyear, readmonth, lastday ) - # ofname = ostia_filename(ostia_dir, y_year, y_month, y_day) ofname = bf.get_background_filename( os.path.join(external_dir, parameters["background_dir"]), parameters["background_filenames"], @@ -323,7 +322,6 @@ def main(argv): sst_stdev_3 = clim.Climatology.from_filename( config.get("Climatologies").get("SST_buddy_avg_sampling"), "sst" ) - with open(config.get("Files").get("parameter_file")) as f: parameters = json.load(f) diff --git a/tests/_load_data.py b/tests/_load_data.py new file mode 100755 index 00000000..b1449844 --- /dev/null +++ b/tests/_load_data.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +from _settings import level_input, prev_level, table_names +from cdm_reader_mapper.common.getting_files import load_file + + +def load_NOC_corrections(**kwargs): + """Load NOC correction data from cdm-testdata.""" + for sub in [ + "duplicate_flags", + "duplicates", + "id", + "latitude", + "longitude", + "timestamp", + ]: + load_file( + f"NOC_corrections/v1x2023/{sub}/2022-01.txt.gz", + **kwargs, + ) + + +def load_NOC_ANC_INFO(**kwargs): + """Load NOC ANC INFO data from cdm-testdata.""" + load_file( + "NOC_ANC_INFO/json_files/dck992.json", + **kwargs, + ) + + +def load_Pub47(**kwargs): + """Load Pub47 data from cdm-testdata.""" + load_file( + "Pub47/monthly/2022-01-01.csv", + **kwargs, + ) + + +def load_metoffice_qc(**kwargs): + """Load metoffice QC data from cdm-testdata.""" + for qc_file in [ + "AT_qc_202201_CCIrun.csv", + "DPT_qc_202201_CCIrun.csv", + "POS_qc_202201_CCIrun.csv", + "SLP_qc_202201_CCIrun.csv", + "SST_qc_202201_CCIrun.csv", + "SST_qc_202201_hires_CCIrun.csv", + "Variables_202201_CCIrun.csv", + "W_qc_202201_CCIrun.csv", + ]: + load_file(f"metoffice_qc/base/2022/01/{qc_file}", **kwargs) + + +def load_input(level): + """Load level input data data from cdm-testdata.""" + p_level = prev_level[level] + leveli = level_input[level] + if level == "level1a": + load_imma(level, leveli, p_level) + else: + load_cdms(level, leveli, p_level) + + +def load_cdms(level, leveli, p_level): + """Load level CDM input data from cdm-testdata.""" + for table_name in table_names: + load_file( + f"imma1_992/cdm_tables/{table_name}-114-992_2022-01-01_subset.psv", + cache_dir=f"./T{level}/{leveli}/ICOADS_R3.0.2T/{p_level}/114-992", + within_drs=False, + ) + + +def load_imma(level, leveli, p_level): + """Load level IMMA input data from cdm-testdata.""" + load_file( + "imma1_992/input/114-992_2022-01-01_subset.imma", + cache_dir=f"./T{level}/{leveli}/ICOADS_R3.0.2T/{p_level}/114-992", + within_drs=False, + ) diff --git a/tests/_settings.py b/tests/_settings.py new file mode 100755 index 00000000..6cad807f --- /dev/null +++ b/tests/_settings.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +import numpy as np + +table_names = [ + "header", + "observations-at", + "observations-dpt", + "observations-slp", + "observations-sst", + "observations-wbt", + "observations-wd", + "observations-ws", +] + +table_names_1b = [ + "header", + "observations-at", + "observations-dpt", + "observations-slp", + "observations-sst", + "observations-wd", + "observations-ws", +] + +prev_level = { + "level1a": "level0", + "level1b": "level1a", + "level1c": "level1b", + "level1d": "level1c", + "level1e": "level1d", + "level2": "level1e", +} + +level_input = { + "level1a": "datasets", + "level1b": "release_7.0", + "level1c": "release_7.0", + "level1d": "release_7.0", + "level1e": "release_7.0", + "level2": "release_7.0", +} + +which_tables = { + "level1a": table_names, + "level1b": table_names_1b, + "level1c": table_names_1b, + "level1d": table_names_1b, + "level1e": table_names_1b, + "level2": table_names_1b, +} + +pattern = { + "level1a": "???-???_????-??-??_subset.imma", + "level1b": "header-???-???_????-??-??_subset.psv", + "level1c": "header-???-???_????-??-??_subset.psv", + "level1d": "header-???-???_????-??-??_subset.psv", + "level1e": "header-???-???_????-??-??_subset.psv", + "level2": "header-???-???_????-??-??_subset.psv", +} + +manipulation = { + "level1a": {}, + "level1b": {}, + "level1c": {}, + "level1d": { + ("header", "station_name"): [ + "null", + "FF HELMER HANSEN", + "WAVERIDER TFSTD", + "NORNE", + "WAVERIDER TFDRN", + ], + ("header", "platform_sub_type"): ["null", "RV", "OT", "MI", "OT"], + ("header", "station_record_number"): ["1", "1", "0", "13", "0"], + ("header", "report_duration"): ["11", "HLY", "11", "HLY", "11"], + ("observations-at", "sensor_id"): ["null", "AT", np.nan, "null", np.nan], + ("observations-dpt", "sensor_id"): [np.nan, "HUM", np.nan, "null", np.nan], + ("observations-slp", "sensor_id"): ["null", "SLP", np.nan, "null", np.nan], + ("observations-sst", "sensor_id"): ["null", "SST", np.nan, np.nan, np.nan], + ("observations-wd", "sensor_id"): ["null", "WSPD", np.nan, "null", np.nan], + ("observations-ws", "sensor_id"): ["null", "WSPD", np.nan, "null", np.nan], + ("observations-at", "sensor_automation_status"): [ + "5", + "3", + np.nan, + "5", + np.nan, + ], + ("observations-dpt", "sensor_automation_status"): [ + np.nan, + "3", + np.nan, + "5", + np.nan, + ], + ("observations-slp", "sensor_automation_status"): [ + "5", + "3", + np.nan, + "5", + np.nan, + ], + ("observations-sst", "sensor_automation_status"): [ + "5", + "3", + np.nan, + np.nan, + np.nan, + ], + ("observations-wd", "sensor_automation_status"): [ + "5", + "3", + np.nan, + "5", + np.nan, + ], + ("observations-ws", "sensor_automation_status"): [ + "5", + "3", + np.nan, + "5", + np.nan, + ], + }, + "level1e": { + ("header", "report_quality"): ["2", "2", "2", "2", "2"], + }, + "level2": {}, +} diff --git a/tests/_testing_suite.py b/tests/_testing_suite.py new file mode 100755 index 00000000..74b775ff --- /dev/null +++ b/tests/_testing_suite.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import os + +import _load_data +import _settings +import pandas as pd +from cdm_reader_mapper.cdm_mapper import read_tables +from cdm_reader_mapper.common.getting_files import load_file + +add_data = { + "level1a": None, + "level1b": _load_data.load_NOC_corrections, + "level1c": _load_data.load_NOC_ANC_INFO, + "level1d": _load_data.load_Pub47, + "level1e": _load_data.load_metoffice_qc, + "level2": None, +} + + +def manipulate_expected(expected, level): + """Manipulate expected result data.""" + for index, values in _settings.manipulation[level].items(): + expected[index] = values + return expected + + +def _obs_testing(level, capsys): + """Observational testing suite.""" + tables = _settings.which_tables[level] + if add_data[level] is not None: + add_data[level]( + cache_dir=f"./T{level}/release_7.0", + branch="marine_processing_testing", + ) + + _load_data.load_input(level) + + s = ( + "obs_suite " + f"-l {level} " + f"-data_dir ./T{level} " + f"-work_dir ./T{level} " + f"-sp {_settings.pattern[level]} " + "-p_id subset " + "-o " + "-run" + ) + os.system(s) + captured = capsys.readouterr() + assert captured.out == "" + + results = read_tables( + f"./T{level}/release_7.0/ICOADS_R3.0.2T/{level}/114-992", cdm_subset=tables + ) + for table_name in tables: + load_file( + f"imma1_992/cdm_tables/{table_name}-114-992_2022-01-01_subset.psv", + cache_dir=f"./E{level}/ICOADS_R3.0.2T/{level}/114-992", + within_drs=False, + ) + expected = read_tables( + f"./E{level}/ICOADS_R3.0.2T/{level}/114-992", cdm_subset=tables + ) + + expected = manipulate_expected(expected, level) + + for deletion in [("header", "record_timestamp"), ("header", "history")]: + del results[deletion] + del expected[deletion] + + pd.testing.assert_frame_equal(results, expected) diff --git a/tests/test_marine_processing.py b/tests/test_marine_processing.py new file mode 100755 index 00000000..7cf2babd --- /dev/null +++ b/tests/test_marine_processing.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +import pytest # noqa + +import glamod_marine_processing + + +def test_marine_processing(): + """Sample pytest test.""" diff --git a/tests/test_obssuite.py b/tests/test_obssuite.py new file mode 100755 index 00000000..9efabeee --- /dev/null +++ b/tests/test_obssuite.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +import pytest +from _testing_suite import _obs_testing + + +@pytest.mark.parametrize( + "level", ["level1a", "level1b", "level1c", "level1d", "level1e", "level2"] +) +def test_levels(capsys, level): + _obs_testing(level, capsys)