From ffe9438cf353eee0bc18346976bc0b4dbead920e Mon Sep 17 00:00:00 2001 From: allibco Date: Fri, 17 Jun 2022 12:15:09 -0600 Subject: [PATCH] Moving the statistical_ensemble_test files to CESM repo and pyCECT to a CESM external. --- tools/statistical_ensemble_test/README | 116 - .../statistical_ensemble_test/addmetadata.sh | 64 - tools/statistical_ensemble_test/ensemble.py | 200 -- .../pyCECT/.gitignore | 90 - .../pyCECT/CHANGES.rst | 89 - tools/statistical_ensemble_test/pyCECT/EET.py | 71 - .../pyCECT/LICENSE.txt | 201 -- .../pyCECT/README.rst | 45 - .../pyCECT/docs/Makefile | 20 - .../pyCECT/docs/conf.py | 57 - .../pyCECT/docs/index.rst | 26 - .../pyCECT/docs/make.bat | 35 - .../pyCECT/docs/requirements.txt | 5 - .../pyCECT/docs/source/installation.rst | 5 - .../pyCECT/docs/source/pyCECT.rst | 213 -- .../pyCECT/docs/source/pyEnsSum.rst | 161 -- .../pyCECT/docs/source/pyEnsSumPop.rst | 150 -- .../pyCECT/docs/source/readme.rst | 46 - .../pyCECT/empty_excluded.json | 3 - .../pyCECT/excluded_varlist.json | 3 - .../pyCECT/included_varlist.json | 17 - .../pyCECT/pop_ensemble.json | 4 - .../pyCECT/pyCECT.py | 544 ---- .../pyCECT/pyEnsLib.py | 2232 ----------------- .../pyCECT/pyEnsSum.py | 617 ----- .../pyCECT/pyEnsSumPop.py | 371 --- .../pyCECT/pyPlots.py | 171 -- .../pyCECT/readthedocs.yml | 8 - .../pyCECT/test_pop_CECT.sh | 17 - .../pyCECT/test_pyEnsSum.sh | 13 - .../pyCECT/test_pyEnsSumPop.sh | 11 - .../pyCECT/test_uf_cam_ect.sh | 13 - tools/statistical_ensemble_test/single_run.py | 434 ---- .../user_nl_cam_LENS | 24 - 34 files changed, 6076 deletions(-) delete mode 100644 tools/statistical_ensemble_test/README delete mode 100755 tools/statistical_ensemble_test/addmetadata.sh delete mode 100644 tools/statistical_ensemble_test/ensemble.py delete mode 100644 tools/statistical_ensemble_test/pyCECT/.gitignore delete mode 100644 tools/statistical_ensemble_test/pyCECT/CHANGES.rst delete mode 100644 tools/statistical_ensemble_test/pyCECT/EET.py delete mode 100644 tools/statistical_ensemble_test/pyCECT/LICENSE.txt delete mode 100644 tools/statistical_ensemble_test/pyCECT/README.rst delete mode 100644 tools/statistical_ensemble_test/pyCECT/docs/Makefile delete mode 100644 tools/statistical_ensemble_test/pyCECT/docs/conf.py delete mode 100644 tools/statistical_ensemble_test/pyCECT/docs/index.rst delete mode 100644 tools/statistical_ensemble_test/pyCECT/docs/make.bat delete mode 100644 tools/statistical_ensemble_test/pyCECT/docs/requirements.txt delete mode 100644 tools/statistical_ensemble_test/pyCECT/docs/source/installation.rst delete mode 100644 tools/statistical_ensemble_test/pyCECT/docs/source/pyCECT.rst delete mode 100644 tools/statistical_ensemble_test/pyCECT/docs/source/pyEnsSum.rst delete mode 100644 tools/statistical_ensemble_test/pyCECT/docs/source/pyEnsSumPop.rst delete mode 100644 tools/statistical_ensemble_test/pyCECT/docs/source/readme.rst delete mode 100644 tools/statistical_ensemble_test/pyCECT/empty_excluded.json delete mode 100644 tools/statistical_ensemble_test/pyCECT/excluded_varlist.json delete mode 100644 tools/statistical_ensemble_test/pyCECT/included_varlist.json delete mode 100644 tools/statistical_ensemble_test/pyCECT/pop_ensemble.json delete mode 100755 tools/statistical_ensemble_test/pyCECT/pyCECT.py delete mode 100644 tools/statistical_ensemble_test/pyCECT/pyEnsLib.py delete mode 100644 tools/statistical_ensemble_test/pyCECT/pyEnsSum.py delete mode 100644 tools/statistical_ensemble_test/pyCECT/pyEnsSumPop.py delete mode 100644 tools/statistical_ensemble_test/pyCECT/pyPlots.py delete mode 100644 tools/statistical_ensemble_test/pyCECT/readthedocs.yml delete mode 100644 tools/statistical_ensemble_test/pyCECT/test_pop_CECT.sh delete mode 100644 tools/statistical_ensemble_test/pyCECT/test_pyEnsSum.sh delete mode 100644 tools/statistical_ensemble_test/pyCECT/test_pyEnsSumPop.sh delete mode 100644 tools/statistical_ensemble_test/pyCECT/test_uf_cam_ect.sh delete mode 100644 tools/statistical_ensemble_test/single_run.py delete mode 100644 tools/statistical_ensemble_test/user_nl_cam_LENS diff --git a/tools/statistical_ensemble_test/README b/tools/statistical_ensemble_test/README deleted file mode 100644 index 0439177cdcf..00000000000 --- a/tools/statistical_ensemble_test/README +++ /dev/null @@ -1,116 +0,0 @@ ------------------------------------------- -CESM-ECT (CESM Ensemble Consistency Test: ------------------------------------------- - -CESM-ECT is a suite of tests to determine whether a new -simulation set up (new machine, compiler, etc.) is statistically -distinguishable from an accepted ensemble. The verification tools in -the CESM-ECT suite are: - -CAM-ECT - detects issues in CAM and CLM (12 month runs) -UF-CAM-ECT - detects issues in CAM and CLM (9 time step runs) -POP-ECT - detects issues in POP and CICE (12 month runs) - -The ECT process involves comparing runs generated with -the new scenario ( 3 for CAM-ECT and UF-CAM-ECT, and 1 for POP-ECT) -to an ensemble built on a trusted machine (currently -cheyenne). The python ECT tools are located in the pyCECT -subdirectory or https://github.com/NCAR/PyCECT/releases. - --OR- - -We now provide a web server for CAM-ECT and UF-CAM-ECT, where -you can upload the (3) generated runs for comparison to our ensemble. -Please see the webpage at http://www.cesm.ucar.edu/models/cesm2/verification/ -for further instructions. - ------------------------------------ -Creating or obtaining a summary file: ------------------------------------ - -Before the test can be run, a summary file is needed of the ensemble -runs to which the comparison will be made. Ensemble summary files -(NetCDF) for existing tags for CAM-ECT, UF-CAM-ECT, and POP-ECT that -were created by CSEG are located (respectively) in the CESM input data -directories: - -$CESMDATAROOT/inputdata/validation/ensembles -$CESMDATAROOT/inputdata/validation/uf_ensembles -$CESMDATAROOT/inputdata/validation/pop_ensembles - -If none of our ensembles are suitable for your needs, then you may create -your own ensemble (and summary file) using the following instructions: - -(1) To create a new ensemble, use the ensemble.py script in this directory. -This script creates and compiles a case, then creates clones of the -original case, where the initial temperature perturbation is slightly modified -for each ensemble member. At this time, cime includes functionality -to create ensembles for CAM-ECT, UF-CAM-ECT, and POP-ECT. - -(2) Use --ect to specify whether ensemble is for CAM or POP. -(See 'python ensemble.py -h' for additional details). - -(3) Use --ensemble to specify the ensemble size. -Recommended ensemble sizes: -CAM-ECT: 151 -UF-CAM-ECT: 350 -POP-ECT 40 - -(4) Examples: - -CAM-ECT: - -python ensemble.py --case /glade/scratch/cesm_user/cesm_tag/ensemble/ensemble.cesm_tag.000 --mach cheyenne --ensemble 151 --ect cam --project P99999999 - - -UF-CAM-ECT: - -python ensemble.py --case /glade/scratch/cesm_user/cesm_tag/uf_ensemble/ensemble.cesm_tag.uf.000 --mach cheyenne --ensemble 350 --uf --ect cam --project P99999999 - -POP-ECT: - -python ensemble.py --case /glade/scratch/cesm_user/cesm_tag/uf_ensemble/ensemble.cesm_tag.000 --mach cheyenne --ensemble 40 --ect pop --project P99999999 - -Notes: - (a) ensemble.py accepts (most of) the argumenets of create_newcase - - (b) case name must end in ".000" and include the full path - - (c) ensemble size must be specified, and suggested defaults are listed - above. Note that for CAM-ECT and UF-CAM-ECT, the ensemble size - needs to be larger than the number of variables that ECT will evaluate. - - -(5) Once all ensemble simulations have run successfully, copy every cam history -file (*.cam.h0.*) for CAM-ECT and UF-CAM-ECT) or monthly pop history file -(*.pop.h.*) for POP-ECT from each ensemble run directory into a separate directory. -Next create the ensemble summary using the pyCECT tool pyEnsSum.py (for CAM-ECT and -UF-CAM-ECT) or pyEnsSumPop.py (for POP-ECT). For details see README_pyEnsSum.rst -and README_pyEnsSumPop.rst with the pyCECT tools. - -------------------- -Creating test runs: -------------------- - -(1) Once an ensemble summary file has been created or chosen to -use from $CESMDATAROOT/inputdata/validation, the simulation -run(s) to be verified by ECT must be created via script ensemble.py. - -NOTE: It is important that the **same** resolution and compset be used in the -individual runs as in the ensemble. The NetCDF ensemble summary file global -attributes give this information. - -(2) For example, for CAM-ECT: - -python ensemble.py --case /glade/scratch/cesm_user/cesm_tag/camcase.cesm_tag.000 --ect cam --mach cheyenne --project P99999999 ---compset F2000climo --res f19_f19_mg17 -For example, for UF-CAM-ECT: - -python ensemble.py --case /glade/scratch/cesm_user/cesm_tag/uf.camcase.cesm_tag.000 --ect cam --uf --mach cheyenne --project P99999999 --compset F2000climo --res f19_f19_mg17 - -For example, for POP-ECT: - -python ensemble.py --case /glade/scratch/cesm_user/cesm_tag/popcase.cesm_tag.000 --ect pop --mach cheyenne --project P99999999 --compset G --res T62_g17 - -(3) Next verify the new simulation(s) with the pyCECT tool pyCECT.py (see -README_pyCECT.rst with the pyCECT tools). diff --git a/tools/statistical_ensemble_test/addmetadata.sh b/tools/statistical_ensemble_test/addmetadata.sh deleted file mode 100755 index 9f3139531d2..00000000000 --- a/tools/statistical_ensemble_test/addmetadata.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env bash -# -# Adds metadata to netcdf statistical ensemble test files. -# -Args=("$@") -i=0 -while [ $i -le ${#Args[@]} ]; do - case ${Args[$i]} in - --caseroot ) - i=$((i+1)) - caseroot=${Args[$i]} - if [ ! -d ${caseroot} ]; then - echo "ERROR: caseroot not found: $caseroot" - exit 2 - fi - if [ ! -f ${caseroot}/xmlquery ]; then - echo "ERROR: Directory $caseroot does not appear to be a cesm case directory" - exit 3 - fi - ;; - --histfile ) - i=$((i+1)) - histfile=${Args[$i]} - if [ ! -f ${histfile} ]; then - echo "ERROR: file not found $histfile" - exit 4 - fi - ;; - --help ) - echo "usage: addmetadata --histroot CASEROOT --histfile HISTFILE [--help] - " - echo "Script to add metadata to validation files. - " - echo "Optional arguments: - --help show this help message and exit - --caseroot Full pathname to the CASE directory. - --histfile Full filename of the history file to add the metadata." - exit - ;; - esac - i=$((i+1)) -done - -if [ "$caseroot" = "" ] || [ "$histfile" = "" ]; then - echo "Please run ./addmetadata.sh --help for correct usage." - exit -fi - -cd $caseroot - -stop_option=`./xmlquery --value STOP_OPTION` -test_type="UF-ECT" -if [ "$stop_option" = "nmonths" ]; then - test_type="ECT" -elif [ "$stop_option" = "nyears" ]; then - test_type="POP-ECT" -fi - -if hash ncks 2>/dev/null; then - ncks --glb compset=`./xmlquery --value COMPSET` --glb grid=`./xmlquery --value GRID` --glb testtype="$test_type" --glb compiler=`./xmlquery --value COMPILER` --glb machineid=`./xmlquery --value MACH` --glb model_version=`./xmlquery --value MODEL_VERSION` $histfile $histfile.tmp - mv $histfile.tmp $histfile -else - echo "This script requires the ncks tool" -fi diff --git a/tools/statistical_ensemble_test/ensemble.py b/tools/statistical_ensemble_test/ensemble.py deleted file mode 100644 index fc5ff8072f1..00000000000 --- a/tools/statistical_ensemble_test/ensemble.py +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/python -from __future__ import print_function -import os, sys, getopt -import random -from single_run import process_args_dict, single_case - -# ============================================================================== -# set up and submit 12-month (original) or 9-time step (uf) run. then create -# clones for a complete ensemble or a set of (3) test cases -# ============================================================================== - -# generate positive random integers in [0, end-1] -# can't have any duplicates -def random_pick(num_pick, end): - ar = range(0, end) - rand_list = random.sample(ar, num_pick) - # for i in rand_list: - # print i - return rand_list - - -# get the pertlim corressponding to the random int -def get_pertlim_uf(rand_num): - i = rand_num - if i == 0: - ptlim = 0 - else: - j = 2 * int((i - 1) / 100) + 101 - k = (i - 1) % 100 - if i % 2 != 0: - ll = j + int(k / 2) * 18 - ippt = str(ll).zfill(3) - ptlim = "0." + ippt + "d-13" - else: - ll = j + int((k - 1) / 2) * 18 - ippt = str(ll).zfill(3) - ptlim = "-0." + ippt + "d-13" - return ptlim - - -def main(argv): - - caller = "ensemble.py" - - # directory with single_run.py and ensemble.py - stat_dir = os.path.dirname(os.path.realpath(__file__)) - print("STATUS: stat_dir = " + stat_dir) - - opts_dict, case_flags = process_args_dict(caller, argv) - - # default is verification mode (3 runs) - run_type = "verify" - if opts_dict["ect"] == "pop": - clone_count = 0 - else: - clone_count = 2 - - uf = opts_dict["uf"] - - # check for run_type change (i.e., if doing ensemble instead of verify) - ens_size = opts_dict["ensemble"] - if ens_size > 0: - run_type = "ensemble" - clone_count = ens_size - 1 - if ens_size > 999: - print("Error: cannot have an ensemble size greater than 999.") - sys.exit() - print("STATUS: ensemble size = " + str(ens_size)) - - # generate random pertlim(s) for verify - if run_type == "verify": - if opts_dict["ect"] == "pop": - rand_ints = random_pick(1, 40) - else: # cam - if uf: - end_range = 350 - else: - end_range = 150 - rand_ints = random_pick(3, end_range) - - # now create cases - print("STATUS: creating first case ...") - - # create first case - then clone - if run_type == "verify": - opts_dict["pertlim"] = get_pertlim_uf(rand_ints[0]) - else: # full ensemble - opts_dict["pertlim"] = "0" - - # first case - single_case(opts_dict, case_flags, stat_dir) - - # clone? - if clone_count > 0: - - # now clone - print("STATUS: cloning additional cases ...") - - # scripts dir - print("STATUS: stat_dir = " + stat_dir) - ret = os.chdir(stat_dir) - ret = os.chdir("../../scripts") - scripts_dir = os.getcwd() - print("STATUS: scripts dir = " + scripts_dir) - - # we know case name ends in '.000' (already checked) - clone_case = opts_dict["case"] - case_pfx = clone_case[:-4] - - for i in range(1, clone_count + 1): # 1: clone_count - if run_type == "verify": - this_pertlim = get_pertlim_uf(rand_ints[i]) - else: # full ensemble - this_pertlim = get_pertlim_uf(i) - - iens = "{0:03d}".format(i) - new_case = case_pfx + "." + iens - - os.chdir(scripts_dir) - print("STATUS: creating new cloned case: " + new_case) - - clone_args = " --keepexe --case " + new_case + " --clone " + clone_case - print(" with args: " + clone_args) - - command = scripts_dir + "/create_clone" + clone_args - ret = os.system(command) - - print("STATUS: running setup for new cloned case: " + new_case) - os.chdir(new_case) - command = "./case.setup" - ret = os.system(command) - - # adjust perturbation - if opts_dict["ect"] == "pop": - if run_type == "verify": # remove old init_ts_perturb - f = open("user_nl_pop", "r+") - all_lines = f.readlines() - f.seek(0) - for line in all_lines: - if line.find("init_ts_perturb") == -1: - f.write(line) - f.truncate() - f.close() - text = "init_ts_perturb = " + this_pertlim - else: - text = "\ninit_ts_perturb = " + this_pertlim - - # now append new pertlim - with open("user_nl_pop", "a") as f: - f.write(text) - - else: - if run_type == "verify": # remove old pertlim first - f = open("user_nl_cam", "r+") - all_lines = f.readlines() - f.seek(0) - for line in all_lines: - if line.find("pertlim") == -1: - f.write(line) - f.truncate() - f.close() - text = "pertlim = " + this_pertlim - else: - text = "\npertlim = " + this_pertlim - - # now append new pertlim - with open("user_nl_cam", "a") as f: - f.write(text) - - # preview namelists - command = "./preview_namelists" - ret = os.system(command) - - # submit? - if opts_dict["ns"] == False: - command = "./case.submit" - ret = os.system(command) - - # Final output - if run_type == "verify": - if opts_dict["ect"] == "pop": - print("STATUS: ---POP-ECT VERIFICATION CASE COMPLETE---") - print("Set up one case using the following init_ts_perturb value:") - print(get_pertlim_uf(rand_ints[0])) - else: - print("STATUS: ---CAM-ECT VERIFICATION CASES COMPLETE---") - print("Set up three cases using the following pertlim values:") - print( - get_pertlim_uf(rand_ints[0]) - + " " - + get_pertlim_uf(rand_ints[1]) - + " " - + get_pertlim_uf(rand_ints[2]) - ) - else: - print("STATUS: --ENSEMBLE CASES COMPLETE---") - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/tools/statistical_ensemble_test/pyCECT/.gitignore b/tools/statistical_ensemble_test/pyCECT/.gitignore deleted file mode 100644 index 37fc9d40817..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/.gitignore +++ /dev/null @@ -1,90 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*,cover -.hypothesis/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# IPython Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# dotenv -.env - -# virtualenv -.venv/ -venv/ -ENV/ - -# Spyder project settings -.spyderproject - -# Rope project settings -.ropeproject diff --git a/tools/statistical_ensemble_test/pyCECT/CHANGES.rst b/tools/statistical_ensemble_test/pyCECT/CHANGES.rst deleted file mode 100644 index 02472a02d3a..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/CHANGES.rst +++ /dev/null @@ -1,89 +0,0 @@ -PyCECT Change Log -===================== - -Copyright 2020 University Corporation for Atmospheric Research - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - - -VERSION 3.2.0 -------------- - -- Migrated from Python 2 to Python 3. - -- Added improved documentation via ReadtheDocs. - - -VERSION 3.1.1 --------------- - -- Minor bug fixes and I/O update for web_enabled interface. - -VERSION 3.1.0 --------------- - -- Minor bug fixes. - -- Removed pyNIO and pyNGL dependencies. - -- Modified CAM variable exclusion process to potentially exclude more variables (via larger tolerance in rank calculation and identification of variables taking only a few constant values). - -- Updated CAM option to print plots. - - -VERSION 3.0.7 -------------- - -- Added web_enabled mode and pbs submission script. - - - -VERSION 3.0.5 -------------- - -- Minor release to address data cast problem in area_avg. - -VERSION 3.0.2 -------------- - -- Minor release to remove tabs. - - -VERSION 3.0.1 -------------- - -- Minor release that can generate ensemble summary on 3D variable having dimension ilev, create boxplot on ensemble members, and can process with excluded variable list or included variable list. - -VERSION 3.0.0 --------------- - -- "Ultra-fast", UF-CAM-ECT, tool released. - - -VERSIONS 2.0.1 - 2.0.3 --------------------- - -- Bug fixes. - -VERSION 2.0.0 -------------- - -- Tools for POP (Ocean component) are released. - - -VERSION 1.0.0 -------------- - -- Initial release. - -- Includes CAM (atmosphere compnent) tools: CECT and PyEnsSum. diff --git a/tools/statistical_ensemble_test/pyCECT/EET.py b/tools/statistical_ensemble_test/pyCECT/EET.py deleted file mode 100644 index 1fab1ba2b86..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/EET.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/python -from __future__ import print_function -import re -import argparse -import itertools - - -class exhaustive_test(object): - def __init__(self): - super(exhaustive_test, self).__init__() - - def file_to_sets(self, compfile): - set_dict = {} - with open(compfile, "r") as f: - for line in f: - line.strip - key, failset = line.replace(" ", "").split(";", 1) - - try: - failset = list(map(int, failset.split(","))) - failset = set(failset) - - except: - failset = set() - - set_dict[key] = failset - - return set_dict - - def test_combinations(self, dictionary, runsPerTest=3, nRunFails=2): - sims = list(dictionary.keys()) - - passed = failed = 0 - for compset in itertools.combinations(sims, runsPerTest): - # This block is slightly slower than manually - # specifying the pairs, but it generalizes - # easily. - failsets = [dictionary[s] for s in compset] - # The following three lines are adapted from - # user doug's answer in - # http://stackoverflow.com/questions/27369373/pairwise-set-intersection-in-python - pairs = itertools.combinations(failsets, 2) - isect = lambda a, b: a.intersection(b) - isect_list = [isect(t[0], t[1]) for t in pairs] - isect_tot = set() - [isect_tot.update(x) for x in isect_list] - - if len(isect_tot) > nRunFails: - # print statements for debugging - # print("this set failed") - # print(compset) - failed += 1 - else: - # print("this set passed") - # print(compset) - passed += 1 - - return passed, failed - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="script to calculate all combinations of ensemble tests" - ) - parser.add_argument("-f", dest="compfile", help="compfile location", metavar="PATH") - - args = parser.parse_args() - - eet = exhaustive_test() - compare_dict = eet.file_to_sets(args.compfile) - print(("failure percent is %s" % eet.test_combinations(compare_dict))) diff --git a/tools/statistical_ensemble_test/pyCECT/LICENSE.txt b/tools/statistical_ensemble_test/pyCECT/LICENSE.txt deleted file mode 100644 index 261eeb9e9f8..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/LICENSE.txt +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/tools/statistical_ensemble_test/pyCECT/README.rst b/tools/statistical_ensemble_test/pyCECT/README.rst deleted file mode 100644 index 32a55abfc45..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/README.rst +++ /dev/null @@ -1,45 +0,0 @@ - -pyCECT: Tools to support and run the CESM Ensemble Consistency Test -============================================================================= - -Overview: --------- - -The Community Earth System Model Ensemble -Consistency Test (CESM-ECT or CECT) suite was developed as an -alternative to requiring bitwise identical output for quality -assurance. This objective test provides a statistical measurement -of consistency between an accepted ensemble created -by small initial temperature perturbations and a test set of -CESM simulations. - - -:AUTHORS: Haiying Xu, Allison Baker, Daniel Milroy, Dorit Hammerling -:COPYRIGHT: 2015-2020 University Corporation for Atmospheric Research -:LICENSE: Apache 2.0 - - -Obtaining the code: ----------------- - -Currently, the most up-to-date development source code is available via git from the site: - -https://github.com/NCAR/PyCECT - -You may then check out the most recent stable tag. The source is available in read-only mode to everyone. Developers are welcome to update the source and submit Pull Requests via GitHub. - - -Other resources: ----------------- - -See full documentation_ for more information. - -.. _documentation: https://pycect.readthedocs.io/en/latest/ - -See CESM information on PyCECT_. - -.. _PyCECT: http://www.cesm.ucar.edu/models/cesm2/python-tools/ - -The CESM web-based interface to this tool is available here_. - -.. _here: http://www.cesm.ucar.edu/models/cesm2/verification/ diff --git a/tools/statistical_ensemble_test/pyCECT/docs/Makefile b/tools/statistical_ensemble_test/pyCECT/docs/Makefile deleted file mode 100644 index d4bb2cbb9ed..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/tools/statistical_ensemble_test/pyCECT/docs/conf.py b/tools/statistical_ensemble_test/pyCECT/docs/conf.py deleted file mode 100644 index d5ec2d0ded5..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/docs/conf.py +++ /dev/null @@ -1,57 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys - -sys.path.insert(0, os.path.abspath(".")) -sys.path.insert(0, os.path.abspath("../")) -# sys.path.insert(0, os.path.abspath('../../')) - - -# -- Project information ----------------------------------------------------- - -project = "pyCECT" -copyright = u"2015-2020, University Corporation for Atmospheric Research" -author = u"Haiying Xu, Allison Baker, DOrit Hammerling, Daniel Milroy" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -# nbsphinx_execute = 'never' -# extensions = ['nbsphinx', 'sphinx.ext.autodoc', 'sphinx.ext.napoleon'] - -# Add any paths that contain templates here, relative to this directory. -# templates_path = ['_templates'] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "alabaster" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = ['_static'] - -master_doc = "index" diff --git a/tools/statistical_ensemble_test/pyCECT/docs/index.rst b/tools/statistical_ensemble_test/pyCECT/docs/index.rst deleted file mode 100644 index 124f0e94e14..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/docs/index.rst +++ /dev/null @@ -1,26 +0,0 @@ -.. pyCECT documentation master file, created by - sphinx-quickstart on Wed Sep 16 12:48:10 2020. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to pyCECT's documentation! -================================== - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - source/readme.rst - source/installation.rst - source/pyCECT.rst - source/pyEnsSum.rst - source/pyEnsSumPop.rst - - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/tools/statistical_ensemble_test/pyCECT/docs/make.bat b/tools/statistical_ensemble_test/pyCECT/docs/make.bat deleted file mode 100644 index 2119f51099b..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=. -set BUILDDIR=_build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/tools/statistical_ensemble_test/pyCECT/docs/requirements.txt b/tools/statistical_ensemble_test/pyCECT/docs/requirements.txt deleted file mode 100644 index de513ac01db..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/docs/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -sphinx -sphinx_rtd_theme -nbsphinx -sphinx-reload -pillow==9.0.1 diff --git a/tools/statistical_ensemble_test/pyCECT/docs/source/installation.rst b/tools/statistical_ensemble_test/pyCECT/docs/source/installation.rst deleted file mode 100644 index cd71b36a806..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/docs/source/installation.rst +++ /dev/null @@ -1,5 +0,0 @@ -============ -Installation -============ - -*COMING SOON* diff --git a/tools/statistical_ensemble_test/pyCECT/docs/source/pyCECT.rst b/tools/statistical_ensemble_test/pyCECT/docs/source/pyCECT.rst deleted file mode 100644 index 31b36a3a666..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/docs/source/pyCECT.rst +++ /dev/null @@ -1,213 +0,0 @@ - -pyCECT test -============================================== - - -CESM-ECT suite contains multiple tests is to compare the results of a set of new (modified) -CESM simulations against the accepted ensemble. An overall pass or fail is designated. -Current functionality in the CESM-ECT suite includes: - -*Atmosphere component (CAM):* - - * CAM-ECT: examines yearly-average files from CAM - * UF-CAM-ECT: examine history files from CAM - - Both CAM-ECT and UF-CAM-ECT require a summary file generated by - pyEnsSum.py. UF-CAM-ECT uses simulations of nine time-steps in length, while - CAM-ECT uses yearly averages. The faster UF-CAM-ECT is always - suggested to start with. (The CAM-ECT is typically only used in the case of an unexpected - UF-CAM-ECT fail.) Three simulation runs from the new test environment are - recommended for both of these tests. More information is available in: - - Daniel J. Milroy, Allison H. Baker, Dorit M. Hammerling, and - Elizabeth R. Jessup, “Nine time steps: ultra-fast statistical - consistency testing of the Community Earth System Model (pyCECT - v3.0)”, Geoscientific Model Development, 11, pp. 697-711, 2018. - - https://gmd.copernicus.org/articles/11/697/2018/ - - - -*Ocean Component (POP):* - - * POP-ECT: examines monthly-average files from POP - - - POP-ECT requires a summary file generated by pyEnsSumPop.py and uses - monthly output, typically from a single year. One simulation run from - the new test environment is needed. More information is available in: - - A.H. Baker, Y. Hu, D.M. Hammerling, Y. Tseng, X. Hu, X. Huang, - F.O. Bryan, and G. Yang, “Evaluating Statistical Consistency in the - Ocean Model Component of the Community Earth System Model - (pyCECT v2.0).” Geoscientific Model Development, 9, pp. 2391-2406, 2016. - - https://gmd.copernicus.org/articles/9/2391/2016/ - - - -To use pyCECT: ---------------- -*Note: compatible with python 3* - -1. On NCAR's Cheyenne machine: - - ``module load python`` - - ``ncar_pylib`` - - ``qsub test_pyEnsSum.sh`` - - -2. Otherwise you need these packages: - - * numpy - * scipy - * future - * configparser - * sys - * getopt - * os - * netCDF4 - * time - * re - * json - * random - * asaptools - * fnmatch - * glob - * itertools - * datetime - - - -3. To see all options (and defaults): - - ``python pyCECT.py -h`` - - -Notes and examples: --------------------------------------------- - -1. Options for all CESM-ECT approaches: - - Required: - - * To specify the summary file generated by pyEnsSum.py - - ``--sumfile ens.summary.nc`` - - * To specifying the directory path that contains the run(s) to be evaluated: - - ``--indir /glade/u/tdd/asap/verification/cesm1_3_beta11/mira`` - - Optional: - - * For verbose information: - - ``--verbose`` - -2. CAM-ECT and UF-CAM-ECT specific options (and summary file generated by pyEnsSum.py) - - * Note that CAM-ECT is the default test. - - * The parameters setting the pass/fail criteria are all set by - default (ie. sigMul, minPCFail, minRunFail, numRunFile, and nPC). - - * If the specified indir contains more files than the number specified by - - ``--numRunFile `` - - (default= 3), then files will be chosen at random - from that directory. - - * The Ensemble Exhaustive Test (EET) is specified by - - ``--eet `` - - This tool computes the failure rate of tests taken at a time. - Therefore, when specifying ``--eet ``, must be greater than or equal to - . - - * To enable printing of extra variable information: - - ``--printVars`` - - * By default, CAM-ECT looks at annual averages which is indictated by - - ``--tslice 1`` - - (For monthly files, use ``--tslice 0``. Note that this - should correspond to what has been collected in the summary file.) - - * To enable printing out sum of standardized mean of all variables and associated box plots - (requries the Python seaborn package): - - ``--printStdMean`` - - - * To save a netcdf file with scores and std global means from the test runs (savefile.nc): - - ``--saveResults`` - - * *Example:* - - ``python pyCECT.py --sumfile /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/summary_files/uf.ens.c1.2.2.1_fc5.ne30.nc --indir /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/uf_cam_test_files --tslice 1`` - - * *Example using EET* (note that EET takes longer to run - especially for a large number of tests): - - ``python pyCECT.py --sumfile /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/summary_files/uf.ens.c1.2.2.1_fc5.ne30.nc --indir /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/uf_cam_test_files --tslice 1 --eet 10`` - - -3. POP-ECT specific options (and summary file generated by pyEnsSumPop.py) - - * To use POP-ECT, you MUST add the following to enable this test - (which disables UF-CAM-ECT and CAM-ECT): - - ``--popens`` - - * Be sure to use a POP-ECT summary file: - - ``--sumfile /glade/p/cisl/asap//pycect_sample_data/pop_c2.0.b10/summary_files/pop.cesm2.0.b10.nc`` - - * Directory path that contains the run(s) to be evaluated. - - ``--indir /glade/p/cisl/asap//pycect_sample_data/pop_c2.0.b10/pop_test_files/C96`` - - * The above directory may contain many POP history files that following the standard - CESM-POP naming convention. To specific which file or files you wish to test, you - simply specifying the test case file prefix (like a wildcard expansion). - - * To compare against all months in year 2 from the input directory above: - - ``--input_glob C96.pop.000.pop.h.0002`` - - * To compare only against month 12 in year 1: - - ``--input_glob C96.pop.000.pop.h.0001-12`` - - * (Note: if input_glob is not specified, all files in --indir will be compared) - - * (Note: the recommendation is to just compare year 1, month 12) - - - * Be sure to specify the json file that includes the variables which will be run the test on: - - ``--jsonfile pop_ensemble.json`` - - * The parameters setting the pass/fail criteria are all set by - default (ie. pop_tol, pop_threshold) but may be modified: - - * Specifying test tolerance (the minimum Z-score - threshold): - - ``--pop_tol 3.0`` - - * Specifying pop threshold (fraction of points that must satisfy the Z-score tolerance): - - ``--pop_threshold 0.9`` - - - * *Example:* - - ``python pyCECT.py --popens --sumfile /glade/p/cisl/asap//pycect_sample_data/pop_c2.0.b10/summary_files/pop.cesm2.0.b10.nc --indir /glade/p/cisl/asap//pycect_sample_data/pop_c2.0.b10/pop_test_files/C96 --jsonfile pop_ensemble.json --input_glob C96.pop.000.pop.h.0001-12`` diff --git a/tools/statistical_ensemble_test/pyCECT/docs/source/pyEnsSum.rst b/tools/statistical_ensemble_test/pyCECT/docs/source/pyEnsSum.rst deleted file mode 100644 index 2c598e4e835..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/docs/source/pyEnsSum.rst +++ /dev/null @@ -1,161 +0,0 @@ - -pyEnsSum -============== - -The verification tools in the CESM-ECT suite all require an *ensemble -summary file*, which contains statistics describing the ensemble distribution. -pyEnsSum can be used to create a CAM (atmospheric component) ensemble summary file. - -Note that an ensemble summary files for existing CESM tags for CAM-ECT and UF-CAM-ECT -that were created by CSEG (CESM Software Engineering Group) -are located (respectively) in the CESM input data directories: - -$CESMDATAROOT/inputdata/validation/ensembles -$CESMDATAROOT/inputdata/validation/uf_ensembles - -Alternatively, pyEnsSum.py be used to create a summary file for CAM-ECT or -UF-CAM-ECT, given the location of appropriate ensemble history files (which should -be generated via CIME, https://github.com/ESMCI/cime) - -(Note: to generate a summary file for POP-ECT, you must use pyEnsSumPop.py, -which has its own corresponding instructions) - -To use pyEnsSum: --------------------- -*Note: compatible with Python 3* - -1. On NCAR's Cheyenne machine: - - ``module load python`` - - ``ncar_pylib`` - - ``qsub test_pyEnsSum.sh`` - - -2. Otherwise you need these packages: - - * numpy - * scipy - * future - * configparser - * sys - * getopt - * os - * netCDF4 - * time - * re - * json - * random - * asaptools - * fnmatch - * glob - * itertools - * datetime - -3. To see all options (and defaults): - - ``python pyEnsSum.py -h*``:: - - Creates the summary file for an ensemble of CAM data. - - Args for pyEnsSum : - - pyEnsSum.py - -h : prints out this usage message - --verbose : prints out in verbose mode (off by default) - --sumfile : the output summary data file (default = ens.summary.nc) - --indir : directory containing all of the ensemble runs (default = ./) - --esize : Number of ensemble members (default = 350) - --tag : Tag name used in metadata (default = cesm2_0) - --compset : Compset used in metadata (default = F2000climo) - --res : Resolution used in metadata (default = f19_f19) - --tslice : the index into the time dimension (default = 1) - --mach : Machine name used in the metadata (default = cheyenne) - --jsonfile : Jsonfile to provide that a list of variables that will be excluded - or included (default = exclude_empty.json) - --mpi_disable : Disable mpi mode to run in serial (off by default) - --fIndex : Use this to start at ensemble member instead of 000 (so - ensembles with numbers less than are excluded from summary file) - - -Notes: ------------------- - -1. CAM-ECT uses yearly average files, which by default (in the ensemble.py - generation script in CIME) also contains the initial conditions. Therefore, - one typically needs to set ``--tslice 1`` to use the yearly average (because - slice 0 is the initial conditions.) - -2. UF-CAM-ECT uses timestep nine. By default (in the ensemble.py - generation script in CIME) the ouput file also contains the initial conditions. - Therefore, one typically needs to set ``--tslice 1`` to use time step nine (because - slice 0 is the initial conditions.) - -3. There is no need to indicate UF-CAM-ECT vs. CAM-ECT to this routine. It - simply creates statistics for the supplied history files at the specified - time slice. For example, if you want to look at monthly files, simply - supply their location. Monthly files typically do not contain an initial - condition and would require ``--tslice 0``. - -4. The ``--esize`` (the ensemble size) can be less than or equal to the number of files - in ``--indir``. Ensembles numbered 000-(esize-1) will be included unless ``--fIndex`` - is specified. UF-CAM-ECT typically uses at least 350 members (the default), - whereas CAM-ECT does not require as many. - -5. Note that ``--res``, ``--tag``, ``--compset``, and ``--mach`` - parameters only affect the metadata in the summary file. - -6. When running in parallel, the recommended number of cores to use is one - for each 3D variable. The default is to run in paralalel (recommended). - -7. You must specify a json file (via ``--jsonfile``) that indicates - the variables in the ensemble - output files that you want to include or exclude from the summary file - statistics (see the example json files). We recommend excluding variables, as - this is typically less work and pyEnsSum will let you know if you have not - listed variables that need to be excluded (see next note). Keep in mind that - you must have *fewer* variables included than ensemble members. - -8. *IMPORTANT:* If there are variables that need to be excluded (that are not in - the .json file already), pyEnsSum will exit early and provide a list of the - variables to exclude in the output. These should be added to your exclude - variable list (or removed from an include list), and then pyEnsSum can - be re-run. Note that additional problematic variables may be found by - pyEnsSum as variables are detected in three stages. (First any variables that - are constant across the ensemble are identified. Once these are removed, - linearly dependant variables are indentified for removal. Finally, variables - that are not constant but have very few unique values are identified.) - - -Example: --------------------------------------- -(Note: This example is in test_pyEnsSum.sh) - -*To generate a summary file for 350 UF-CAM-ECT simulations runs (time step nine):* - -* we specify the size (this is optional since 350 is the default) and data location: - - ``--esize 350`` - - ``--indir /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/uf_cam_ens_files`` - -* We also specify the name of file to create for the summary: - - ``--sumfile uf.ens.c1.2.2.1_fc5.ne30.nc`` - -* Since the ensemble files contain the intial conditions as well as the values at time step 9 (this is optional as 1 is the default), we set: - - ``--tslice 1`` - -* We also specify the CESM tag, compset and resolution and machine of our ensemble data so that it can be written to the metadata of the summary file: - - ``--tag cesm1.2.2.1 --compset FC5 --res ne30_ne30 --mach cheyenne`` - -* We can exclude or include some variables from the analysis by specifying them in a json file: - - ``--jsonfile excluded_varlist.json`` - -* This yields the following command for your job submission script: - - ``python pyCECT.py --esize 350 --indir /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/uf_cam_ens_files --sumfile uf.ens.c1.2.2.1_fc5.ne30.nc --tslice 1 --tag cesm1.2.2.1 --compset FC5 --res ne30_ne30 --jsonfile excluded_varlist.json`` diff --git a/tools/statistical_ensemble_test/pyCECT/docs/source/pyEnsSumPop.rst b/tools/statistical_ensemble_test/pyCECT/docs/source/pyEnsSumPop.rst deleted file mode 100644 index f4f46dea24c..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/docs/source/pyEnsSumPop.rst +++ /dev/null @@ -1,150 +0,0 @@ - -pyEnsSumPop -================== - -The verification tools in the CESM-ECT suite all require an *ensemble -summary file*, which contains statistics describing the ensemble distribution. -pyEnsSumPop can be used to create a POP (ocean component) ensemble summary file. - - -Note that an ensemble summary files for existing CESM tags for POP-ECT -that were created by CSEG (CESM Software Engineering Group) -are located in the CESM input data directory: - -$CESMDATAROOT/inputdata/validation/pop_ensembles - -Alternatively, pyEnsSumPop can be used to create a summary file for POP-ECT -given the location of appropriate ensemble history files (which should -be generated in CIME via $CIME/tools/statistical_ensemble_test/ensemble.py). - -(Note: to generate a summary file for UF-CAM-ECT or CAM-ECT, you must use -pyEnsSum.py, which has its own corresponding instructions.) - - -To use pyEnsSumPop: --------------------------- - -*Note: compatible with Python 3* - -1. On NCAR's Cheyenne machine: - - ``module load python`` - - ``ncar_pylib`` - - ``qsub test_pyEnsSumPop.sh`` - - -2. Otherwise you need these packages: - - * numpy - * scipy - * future - * configparser - * sys - * getopt - * os - * netCDF4 - * time - * re - * json - * random - * asaptools - * fnmatch - * glob - * itertools - * datetime - -3. To see all options (and defaults): - - ``python pyEnsSumPop.py -h``:: - - Creates the summary file for an ensemble of POP data. - - - Args for pyEnsSumPop : - - pyEnsSumPop.py - -h : prints out this usage message - --verbose : prints out in verbose mode (off by default) - --sumfile : the output summary data file (default = pop.ens.summary.nc) - --indir : directory containing all of the ensemble runs (default = ./) - --esize : Number of ensemble members (default = 40) - --tag : Tag name used in metadata (default = cesm2_0_0) - --compset : Compset used in metadata (default = G) - --res : Resolution (used in metadata) (default = T62_g17) - --mach : Machine name used in the metadata (default = cheyenne) - --tslice : the time slice of the variable that we will use (default = 0) - --nyear : Number of years (default = 1) - --nmonth : Number of months (default = 12) - --jsonfile : Jsonfile to provide that a list of variables that will be included - (RECOMMENDED: default = pop_ensemble.json) - --mpi_disable : Disable mpi mode to run in serial (off by default) - - - -Notes: ----------------- - -1. POP-ECT uses monthly average files. Therefore, one typically needs - to set ``--tslice 0`` (which is the default). - -2. Note that ``--res``, ``--tag``, ``--compset``, and --mach only affect the - metadata in the summary file. - -3. The sample script test_pyEnsSumPop.sh gives a recommended parallel - configuration for Cheyenne. We recommend one core per month (and make - sure each core has sufficient memory). - -4. The json file indicates variables from the output files that you want - to include in the summary files statistics. We recommend using the - default pop_ensemble.json, which contains only 5 variables. - - - -Example: ----------------------------------------- -(Note: this example is in test_pyEnsSumPop.sh) - -*To generate a summary file for 40 POP-ECT simulations runs (1 year of monthly output):* - -* We specify the size (this is optional since 40 is the default) and data location: - - ``--esize 40`` - - ``--indir /glade/p/cisl/iowa/pop_verification/cesm2_0_beta10/ensembles`` - -* We also specify the name of file to create for the summary: - - ``--sumfile pop.ens.sum.cesm2.0.nc`` - -* Since these are monthly average files, we set (optional as 0 is the default): - - ``--tslice 0`` - -* We also specify the number of years, the number of months (optional, as 1 and 12 are the defaults): - - ``--nyear 1`` - - ``--nmonth 12`` - -* We also can specify the tag, resolution, machine and compset - information (that will be written to the metadata of the summary file): - - ``--tag cesm2.0_beta10`` - - ``--res T62_g16`` - - ``--mach cheyenne`` - - ``--compset G`` - -* We include a recommended subset of variables (5) for the - analysis by specifying them in a json file (optional, as - this is the defaut): - - ``--jsonfile pop_ensemble.json`` - - * This yields the following command for your job submission script: - - ``python pyEnsSumPop.py --indir /glade/p/cisl/asap/pycect_sample_data/pop_c2.0.b10/pop_ens_files --sumfile pop.cesm2.0.b10.nc --tslice 0 --nyear 1 --nmonth 12 --esize 40 --jsonfile pop_ensemble.json --mach cheyenne --compset G --tag cesm2_0_beta10 --res T62_g17`` diff --git a/tools/statistical_ensemble_test/pyCECT/docs/source/readme.rst b/tools/statistical_ensemble_test/pyCECT/docs/source/readme.rst deleted file mode 100644 index cf2936769cd..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/docs/source/readme.rst +++ /dev/null @@ -1,46 +0,0 @@ - -Overview -============================== - -The Community Earth System Model Ensemble -Consistency Test (CESM-ECT or CECT) suite was developed as an -alternative to requiring bitwise identical output for quality -assurance. This objective test provides a statistical measurement -of consistency between an accepted ensemble created -by small initial temperature perturbations and a test set of -CESM simulations. - -The pyCECT package, or *python CESM Ensemble Consistency Test* -package contains the necessary tools to to compare the results of a set of new (modified) -CESM simulations against the accepted ensemble (pyCECT) as well as the tools to -create the ensemble summary files (pyEnsSum and pyEnsSumPop). These -three modules will be explained in more detail. - -CESM/CIME notes: ---------------------- -1. The pyCECT package is also included in CIME (Common Infrastructure for - Modeling the Earth). See: - - https://github.com/ESMCI/cime - -2. Creating the ensemble summaries (via pyEnsSum and pyEnsSumPop) is - typically done by the CESM software developers. See: - - http://www.cesm.ucar.edu/models/cesm2/python-tools/ - -3. A web-based interface to this tool is available here: - - http://www.cesm.ucar.edu/models/cesm2/verification/ - - -Relevant publications: ----------------------- - -Daniel J. Milroy, Allison H. Baker, Dorit M. Hammerling, and Elizabeth R. Jessup, “Nine time steps: ultra-fast statistical consistency testing of the Community Earth System Model (pyCECT v3.0)”, Geoscientific Model Development, 11, pp. 697-711, 2018. -https://gmd.copernicus.org/articles/11/697/2018/ - -A.H. Baker, Y. Hu, D.M. Hammerling, Y. Tseng, X. Hu, X. Huang, F.O. Bryan, and G. Yang, “Evaluating Statistical Consistency in the Ocean Model Component of the Community Earth System Model (pyCECT v2.0).” Geoscientific Model Development, 9, pp. 2391-2406, 2016. -https://gmd.copernicus.org/articles/9/2391/2016/ - -A.H. Baker, D.M. Hammerling, M.N. Levy, H. Xu, J.M. Dennis, B.E. Eaton, J. Edwards, C. Hannay, S.A. Mickelson, R.B. Neale, D. Nychka, J. Shollenberger, J. Tribbia, M. Vertenstein, and D. Williamson, “A new ensemble-based consistency test for the community earth system model (pyCECT v1.0).” Geoscientific Model Development, 8, pp. 2829–2840, 2015. -https://gmd.copernicus.org/articles/8/2829/2015/ diff --git a/tools/statistical_ensemble_test/pyCECT/empty_excluded.json b/tools/statistical_ensemble_test/pyCECT/empty_excluded.json deleted file mode 100644 index 7cac3ca0a77..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/empty_excluded.json +++ /dev/null @@ -1,3 +0,0 @@ -{ -"ExcludedVar": [] -} diff --git a/tools/statistical_ensemble_test/pyCECT/excluded_varlist.json b/tools/statistical_ensemble_test/pyCECT/excluded_varlist.json deleted file mode 100644 index e900eedc4c0..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/excluded_varlist.json +++ /dev/null @@ -1,3 +0,0 @@ -{ -"ExcludedVar": ["EMISCLD","ICEFRAC","ORO","PHIS","FSNTC","FSUTOA","FLNT","LWCF","SWCF","TGCLDCWP","dst_a3SF","TSMX","BURDEN2","TSMN","AODDUST1","BURDENSOA","BURDEN3","SNOWHICE","BURDENSO4","BURDEN1","OCNFRAC","LANDFRAC","BURDENPOM","BURDENSEASALT","AODDUST3","BURDENDUST","AODVIS","BURDENBC","SOLIN","AEROD_v"] -} diff --git a/tools/statistical_ensemble_test/pyCECT/included_varlist.json b/tools/statistical_ensemble_test/pyCECT/included_varlist.json deleted file mode 100644 index 76f91a8b6c7..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/included_varlist.json +++ /dev/null @@ -1,17 +0,0 @@ -{ -"IncludedVar": ["ANRAIN", "ANSNOW", "AQRAIN", "AQSNOW", "AREI", "AREL", "AWNC", "AWNI", -"CCN3", "CLDICE", "CLDLIQ", "CLOUD", "DCQ", "DTCOND", "DTV", "FICE", -"FREQI", "FREQL", "FREQR", "FREQS", "ICIMR", "ICWMR", "IWC", "NUMICE", -"NUMLIQ", "OMEGA", "OMEGAT", "Q", "QRL", "QRS", "RELHUM", "T", -"U", "UU", "V", "VQ", "VT", "VU", "VV", "WSUB", -"Z3", "AODDUST1", "AODDUST3", "AODVIS", "BURDENBC", "BURDENDUST", "BURDENPOM", -"BURDENSEASALT", "BURDENSO4", "BURDENSOA", "CDNUMC", "CLDHGH", "CLDLOW", -"CLDMED", "CLDTOT", "DMS_SRF", "FLDS", "FLNS", "FLNSC", "FLNT", "FLNTC", -"FLUT", "FLUTC", "FSDS", "FSDSC", "FSNS", "FSNSC", "FSNT", "FSNTC", "FSNTOA", -"FSNTOAC", "H2O2_SRF", "LHFLX", "PBLH", "PRECC", "PRECL", "PRECSC", "PRECSL", -"PS", "PSL", "QFLX", "QREFHT", "SHFLX", "SNOWHLND", "SO2_SRF", "SOAG_SRF", -"TAUGWX", "TAUGWY", "TAUX", "TAUY", "TGCLDIWP", "TGCLDLWP", "TMQ", "TREFHT", -"TS", "U10", "bc_a1_SRF", "dst_a1_SRF", "dst_a3_SRF", "ncl_a1_SRF", "ncl_a3_SRF", -"num_a1_SRF", "num_a2_SRF", "num_a3_SRF", "pom_a1_SRF", "so4_a1_SRF", "so4_a2_SRF", -"so4_a3_SRF", "soa_a1_SRF", "soa_a2_SRF"] -} diff --git a/tools/statistical_ensemble_test/pyCECT/pop_ensemble.json b/tools/statistical_ensemble_test/pyCECT/pop_ensemble.json deleted file mode 100644 index d7bd80271cb..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/pop_ensemble.json +++ /dev/null @@ -1,4 +0,0 @@ -{ -"Var3d": ["UVEL","VVEL","TEMP","SALT"], -"Var2d": ["SSH"] -} diff --git a/tools/statistical_ensemble_test/pyCECT/pyCECT.py b/tools/statistical_ensemble_test/pyCECT/pyCECT.py deleted file mode 100755 index ce6e72164b3..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/pyCECT.py +++ /dev/null @@ -1,544 +0,0 @@ -#! /usr/bin/env python -from __future__ import print_function -import sys, getopt, os -import numpy as np -import netCDF4 as nc -import time -import pyEnsLib -import json -import random -import glob -import re -from datetime import datetime -from asaptools.partition import EqualStride, Duplicate -import asaptools.simplecomm as simplecomm - -# This routine compares the results of several (default=3) new CAM tests -# or a POP test against the accepted ensemble (generated by pyEnsSum or -# pyEnsSumPop). - - -def main(argv): - - # Get command line stuff and store in a dictionary - s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= - minPCFail= minRunFail= numRunFile= printVars popens - jsonfile= mpi_enable nbin= minrange= maxrange= outfile= - casejson= npick= pepsi_gm pop_tol= web_enabled - pop_threshold= printStdMean fIndex= lev= eet= saveResults json_case= """ - optkeys = s.split() - try: - opts, args = getopt.getopt(argv, "h", optkeys) - except getopt.GetoptError: - pyEnsLib.CECT_usage() - sys.exit(2) - - # Set the default value for options - opts_dict = {} - opts_dict["input_globs"] = "" - opts_dict["indir"] = "" - opts_dict["tslice"] = 1 - opts_dict["nPC"] = 50 - opts_dict["sigMul"] = 2 - opts_dict["verbose"] = False - opts_dict["minPCFail"] = 3 - opts_dict["minRunFail"] = 2 - opts_dict["numRunFile"] = 3 - opts_dict["printVars"] = False - opts_dict["popens"] = False - opts_dict["jsonfile"] = "" - opts_dict["mpi_enable"] = False - opts_dict["nbin"] = 40 - opts_dict["minrange"] = 0.0 - opts_dict["maxrange"] = 4.0 - opts_dict["outfile"] = "testcase.result" - opts_dict["casejson"] = "" - opts_dict["npick"] = 10 - opts_dict["pepsi_gm"] = False - opts_dict["test_failure"] = True - opts_dict["pop_tol"] = 3.0 - opts_dict["pop_threshold"] = 0.90 - opts_dict["printStdMean"] = False - opts_dict["lev"] = 0 - opts_dict["eet"] = 0 - opts_dict["json_case"] = "" - opts_dict["sumfile"] = "" - opts_dict["web_enabled"] = False - opts_dict["saveResults"] = False - - # Call utility library getopt_parseconfig to parse the option keys - # and save to the dictionary - caller = "CECT" - opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict) - popens = opts_dict["popens"] - - # some mods for POP-ECT - if popens == True: - opts_dict["tslice"] = 0 - opts_dict["numRunFile"] = 1 - opts_dict["eet"] = 0 - opts_dict["mpi_enable"] = False - - # Create a mpi simplecomm object - if opts_dict["mpi_enable"]: - me = simplecomm.create_comm() - else: - me = simplecomm.create_comm(not opts_dict["mpi_enable"]) - - # Print out timestamp, input ensemble file and new run directory - dt = datetime.now() - verbose = opts_dict["verbose"] - if me.get_rank() == 0: - print(" ") - print("--------pyCECT--------") - print(" ") - print(dt.strftime("%A, %d. %B %Y %I:%M%p")) - print(" ") - if not opts_dict["web_enabled"]: - print("Ensemble summary file = " + opts_dict["sumfile"]) - print(" ") - print("Testcase file directory = " + opts_dict["indir"]) - print(" ") - print(" ") - - # make sure these are valid - if ( - opts_dict["web_enabled"] == False - and os.path.isfile(opts_dict["sumfile"]) == False - ): - print("ERROR: Summary file name is not valid.") - sys.exit() - if os.path.exists(opts_dict["indir"]) == False: - print("ERROR: --indir path is not valid.") - sys.exit() - - # Ensure sensible EET value - if opts_dict["eet"] and opts_dict["numRunFile"] > opts_dict["eet"]: - pyEnsLib.CECT_usage() - sys.exit(2) - - ifiles = [] - in_files = [] - # Random pick pop files from not_pick_files list - if opts_dict["casejson"]: - with open(opts_dict["casejson"]) as fin: - result = json.load(fin) - in_files_first = result["not_pick_files"] - in_files = random.sample(in_files_first, opts_dict["npick"]) - print("Testcase files:") - print("\n".join(in_files)) - - elif opts_dict["json_case"]: - json_file = opts_dict["json_case"] - if os.path.exists(json_file): - fd = open(json_file) - metainfo = json.load(fd) - if "CaseName" in metainfo: - casename = metainfo["CaseName"] - if os.path.exists(opts_dict["indir"]): - for name in casename: - wildname = "*." + name + ".*" - full_glob_str = os.path.join(opts_dict["indir"], wildname) - glob_file = glob.glob(full_glob_str) - in_files.extend(glob_file) - else: - print("ERROR: " + opts_dict["json_case"] + " does not exist.") - sys.exit() - print("in_files=", in_files) - else: - wildname = "*" + str(opts_dict["input_globs"]) + "*" - # Open all input files - if os.path.exists(opts_dict["indir"]): - full_glob_str = os.path.join(opts_dict["indir"], wildname) - glob_files = glob.glob(full_glob_str) - in_files.extend(glob_files) - num_file = len(in_files) - if num_file == 0: - print( - "ERROR: no matching files for wildcard=" - + wildname - + " found in specified --indir" - ) - sys.exit() - else: - print("Found " + str(num_file) + " matching files in specified --indir") - if opts_dict["numRunFile"] > num_file: - print( - "ERROR: more files needed (" - + str(opts_dict["numRunFile"]) - + ") than available in the indir (" - + str(num_file) - + ")." - ) - sys.exit() - - in_files.sort() - # print in_files - - if popens: - # Partition the input file list - in_files_list = me.partition(in_files, func=EqualStride(), involved=True) - - else: - # Random pick cam files - in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict) - - for frun_file in in_files_list: - if frun_file.find(opts_dict["indir"]) != -1: - frun_temp = frun_file - else: - frun_temp = opts_dict["indir"] + "/" + frun_file - if os.path.isfile(frun_temp): - ifiles.append(frun_temp) - else: - print("ERROR: COULD NOT LOCATE FILE " + frun_temp) - sys.exit() - - if opts_dict["web_enabled"]: - if len(opts_dict["sumfile"]) == 0: - opts_dict["sumfile"] = "/glade/p/cesmdata/cseg/inputdata/validation/" - # need to open ifiles - - opts_dict["sumfile"], machineid, compiler = pyEnsLib.search_sumfile( - opts_dict, ifiles - ) - if len(machineid) != 0 and len(compiler) != 0: - print(" ") - print( - "Validation file : machineid = " - + machineid - + ", compiler = " - + compiler - ) - print("Found summary file : " + opts_dict["sumfile"]) - print(" ") - else: - print("Warning: machine and compiler are unknown") - - if popens: - - # Read in the included var list - if not os.path.exists(opts_dict["jsonfile"]): - print( - "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile." - ) - sys.exit() - Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict["jsonfile"], "ESP") - print(" ") - print("Z-score tolerance = " + "{:3.2f}".format(opts_dict["pop_tol"])) - print("ZPR = " + "{:.2%}".format(opts_dict["pop_threshold"])) - zmall, n_timeslice = pyEnsLib.pop_compare_raw_score( - opts_dict, ifiles, me.get_rank(), Var3d, Var2d - ) - - np.set_printoptions(threshold=sys.maxsize) - - if opts_dict["mpi_enable"]: - zmall = pyEnsLib.gather_npArray_pop( - zmall, - me, - ( - me.get_size(), - len(Var3d) + len(Var2d), - len(ifiles), - opts_dict["nbin"], - ), - ) - if me.get_rank() == 0: - fout = open(opts_dict["outfile"], "w") - for i in range(me.get_size()): - for j in zmall[i]: - np.savetxt(fout, j, fmt="%-7.2e") - # cam - else: - # Read all variables from the ensemble summary file - ( - ens_var_name, - ens_avg, - ens_stddev, - ens_rmsz, - ens_gm, - num_3d, - mu_gm, - sigma_gm, - loadings_gm, - sigma_scores_gm, - is_SE_sum, - std_gm, - std_gm_array, - str_size, - ) = pyEnsLib.read_ensemble_summary(opts_dict["sumfile"]) - - # Only doing gm - - # Add ensemble rmsz and global mean to the dictionary "variables" - variables = {} - - for k, v in ens_gm.items(): - pyEnsLib.addvariables(variables, k, "gmRange", v) - - # Get 3d variable name list and 2d variable name list separately - var_name3d = [] - var_name2d = [] - for vcount, v in enumerate(ens_var_name): - if vcount < num_3d: - var_name3d.append(v) - else: - var_name2d.append(v) - - # Get ncol and nlev value - npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0]) - - if is_SE ^ is_SE_sum: - print( - "Warning: please note the ensemble summary file is different from the testing files: they use different grids" - ) - - # Compare the new run and the ensemble summary file - results = {} - countgm = np.zeros(len(ifiles), dtype=np.int32) - - # Calculate the new run global mean - mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary( - ifiles, var_name3d, var_name2d, is_SE, opts_dict["pepsi_gm"], opts_dict - ) - means = np.concatenate((mean3d, mean2d), axis=0) - - # Add the new run global mean to the dictionary "results" - for i in range(means.shape[1]): - for j in range(means.shape[0]): - pyEnsLib.addresults( - results, "means", means[j][i], ens_var_name[j], "f" + str(i) - ) - - # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range - for fcount, fid in enumerate(ifiles): - countgm[fcount] = pyEnsLib.evaluatestatus( - "means", "gmRange", variables, "gm", results, "f" + str(fcount) - ) - - # Calculate the PCA scores of the new run - new_scores, var_list, comp_std_gm = pyEnsLib.standardized( - means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict, ens_avg, me - ) - run_index, decision = pyEnsLib.comparePCAscores( - ifiles, new_scores, sigma_scores_gm, opts_dict, me - ) - - # If there is failure, plot out standardized mean and compared standardized mean in box plots - # if opts_dict['printStdMean'] and decision == 'FAILED': - if opts_dict["printStdMean"]: - - import seaborn as sns - import matplotlib - - matplotlib.use("Agg") # don't display figures - import matplotlib.pyplot as plt - - print(" ") - print( - "***************************************************************************** " - ) - print( - "Test run variable standardized means (for reference only - not used to determine pass/fail)" - ) - print( - "***************************************************************************** " - ) - print(" ") - - category = { - "all_outside99": [], - "two_outside99": [], - "one_outside99": [], - "all_oneside_outside1QR": [], - } - b = list(pyEnsLib.chunk(ens_var_name, 10)) - for f, alist in enumerate(b): - for fc, avar in enumerate(alist): - dist_995 = np.percentile(std_gm[avar], 99.5) - dist_75 = np.percentile(std_gm[avar], 75) - dist_25 = np.percentile(std_gm[avar], 25) - dist_05 = np.percentile(std_gm[avar], 0.5) - c = 0 - d = 0 - p = 0 - q = 0 - for i in range(comp_std_gm[f + fc].size): - if comp_std_gm[f + fc][i] > dist_995: - c = c + 1 - elif comp_std_gm[f + fc][i] < dist_05: - d = d + 1 - elif ( - comp_std_gm[f + fc][i] < dist_995 - and comp_std_gm[f + fc][i] > dist_75 - ): - p = p + 1 - elif ( - comp_std_gm[f + fc][i] > dist_05 - and comp_std_gm[f + fc][i] < dist_25 - ): - q = q + 1 - if c == 3 or d == 3: - category["all_outside99"].append((avar, f + fc)) - elif c == 2 or d == 2: - category["two_outside99"].append((avar, f + fc)) - elif c == 1 or d == 1: - category["one_outside99"].append((avar, f + fc)) - if p == 3 or q == 3: - category["all_oneside_outside1QR"].append((avar, f + fc)) - part_name = opts_dict["indir"].split("/")[-1] - if not part_name: - part_name = opts_dict["indir"].split("/")[-2] - for key in sorted(category): - list_array = [] - list_array2 = [] - list_var = [] - value = category[key] - - if key == "all_outside99": - print( - "*** ", - len(value), - " variables have 3 test run global means outside of the 99th percentile.", - ) - elif key == "two_outside99": - print( - "*** ", - len(value), - " variables have 2 test run global means outside of the 99th percentile.", - ) - elif key == "one_outside99": - print( - "*** ", - len(value), - " variables have 1 test run global mean outside of the 99th percentile.", - ) - elif key == "all_oneside_outside1QR": - print( - "*** ", - len(value), - " variables have all test run global means outside of the first quartile (but not outside the 99th percentile).", - ) - - if len(value) > 0: - print(" => generating plot ...") - if len(value) > 20: - print( - " NOTE: truncating to only plot the first 20 variables." - ) - value = value[0:20] - - for each_var in value: - list_array.append(std_gm[each_var[0]]) - list_array2.append(comp_std_gm[each_var[1]]) - name = each_var[0] - if isinstance(name, str) == False: - name = name.decode("utf-8") - - list_var.append(name) - - if len(value) != 0: - ax = sns.boxplot(data=list_array, whis=[0.5, 99.5], fliersize=0.0) - sns.stripplot(data=list_array2, jitter=True, color="r") - plt.xticks( - list(range(len(list_array))), list_var, fontsize=8, rotation=-45 - ) - - if decision == "FAILED": - plt.savefig(part_name + "_" + key + "_fail.png") - else: - plt.savefig(part_name + "_" + key + "_pass.png") - plt.close() - - ## - # Print file with info about new test runs....to a netcdf file - ## - if opts_dict["saveResults"]: - - num_vars = comp_std_gm.shape[0] - tsize = comp_std_gm.shape[1] - esize = std_gm_array.shape[1] - this_savefile = "savefile.nc" - if verbose == True: - print("VERBOSE: Creating ", this_savefile, " ...") - - if os.path.exists(this_savefile): - os.unlink(this_savefile) - nc_savefile = nc.Dataset(this_savefile, "w", format="NETCDF4_CLASSIC") - nc_savefile.createDimension("ens_size", esize) - nc_savefile.createDimension("test_size", tsize) - nc_savefile.createDimension("nvars", num_vars) - nc_savefile.createDimension("str_size", str_size) - - # Set global attributes - now = time.strftime("%c") - nc_savefile.creation_date = now - nc_savefile.title = "PyCECT compare results file" - nc_savefile.summaryfile = opts_dict["sumfile"] - # nc_savefile.testfiles = in_files - - # variables - v_vars = nc_savefile.createVariable("vars", "S1", ("nvars", "str_size")) - v_std_gm = nc_savefile.createVariable( - "std_gm", "f8", ("nvars", "test_size") - ) - v_scores = nc_savefile.createVariable( - "scores", "f8", ("nvars", "test_size") - ) - v_ens_sigma_scores = nc_savefile.createVariable( - "ens_sigma_scores", "f8", ("nvars",) - ) - v_ens_std_gm = nc_savefile.createVariable( - "ens_std_gm", "f8", ("nvars", "ens_size") - ) - - # hard-coded size - str_out = nc.stringtochar(np.array(ens_var_name, "S10")) - - v_vars[:] = str_out - v_std_gm[:, :] = comp_std_gm[:, :] - v_scores[:, :] = new_scores[:, :] - v_ens_sigma_scores[:] = sigma_scores_gm[:] - v_ens_std_gm[:, :] = std_gm_array[:, :] - - nc_savefile.close() - - # Print variables (optional) - if opts_dict["printVars"]: - print(" ") - print( - "***************************************************************************** " - ) - print( - "Variable global mean information (for reference only - not used to determine pass/fail)" - ) - print( - "***************************************************************************** " - ) - for fcount, fid in enumerate(ifiles): - print(" ") - print("Run " + str(fcount + 1) + ":") - print(" ") - print( - "***" + str(countgm[fcount]), - " of " - + str(len(ens_var_name)) - + " variables are outside of ensemble global mean distribution***", - ) - pyEnsLib.printsummary( - results, "gm", "means", "gmRange", fcount, variables, "global mean" - ) - print(" ") - print( - "----------------------------------------------------------------------------" - ) - - if me.get_rank() == 0: - print(" ") - print("Testing complete.") - print(" ") - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/tools/statistical_ensemble_test/pyCECT/pyEnsLib.py b/tools/statistical_ensemble_test/pyCECT/pyEnsLib.py deleted file mode 100644 index 688e42e6a6c..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/pyEnsLib.py +++ /dev/null @@ -1,2232 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -import configparser -import sys, getopt, os -import numpy as np -import netCDF4 as nc -import time -import re -import json -import random -import asaptools.simplecomm as simplecomm -from asaptools.partition import Duplicate -import fnmatch -import glob -import itertools -from itertools import islice -from EET import exhaustive_test -from scipy import linalg as sla - -# import pdb - -# -# Parse header file of a netcdf to get the variable 3d/2d/1d list -# -def parse_header_file(filename): - command = "ncdump -h " + filename - print(command) - - retvalue = os.popen(command).readline() - print(retvalue) - - -# -# Create RMSZ zscores for ensemble file sets -# o_files are not open -# this is used for POP -def calc_rmsz(o_files, var_name3d, var_name2d, opts_dict): - - threshold = 1e-12 - popens = opts_dict["popens"] - tslice = opts_dict["tslice"] - nbin = opts_dict["nbin"] - minrange = opts_dict["minrange"] - maxrange = opts_dict["maxrange"] - - if not popens: - print("ERROR: should not be calculating rmsz for CAM => EXITING") - sys.exit(2) - - first_file = nc.Dataset(o_files[0], "r") - input_dims = first_file.dimensions - - # Create array variables - nlev = len(input_dims["z_t"]) - if "nlon" in input_dims: - nlon = len(input_dims["nlon"]) - nlat = len(input_dims["nlat"]) - elif "lon" in input_dims: - nlon = len(input_dims["lon"]) - nlat = len(input_dims["lat"]) - - # npts2d=nlat*nlon - # npts3d=nlev*nlat*nlon - - # print("calc_rmsz: nlev,nlat,nlon,o_files, var3dv var2d ..." , nlev, nlat, nlon, len(o_files), len(var_name3d), len(var_name2d)) - - output3d = np.zeros((len(o_files), nlev, nlat, nlon), dtype=np.float32) - output2d = np.zeros((len(o_files), nlat, nlon), dtype=np.float32) - - ens_avg3d = np.zeros((len(var_name3d), nlev, nlat, nlon), dtype=np.float32) - ens_stddev3d = np.zeros((len(var_name3d), nlev, nlat, nlon), dtype=np.float32) - ens_avg2d = np.zeros((len(var_name2d), nlat, nlon), dtype=np.float32) - ens_stddev2d = np.zeros((len(var_name2d), nlat, nlon), dtype=np.float32) - - Zscore3d = np.zeros((len(var_name3d), len(o_files), (nbin)), dtype=np.float32) - Zscore2d = np.zeros((len(var_name2d), len(o_files), (nbin)), dtype=np.float32) - - first_file.close() - - # open all of the files at once - # (not too many for pop - and no longer doing this for cam) - handle_o_files = [] - for fname in o_files: - handle_o_files.append(nc.Dataset(fname, "r")) - - # Now lOOP THROUGH 3D - for vcount, vname in enumerate(var_name3d): - # print(vcount, " ", vname) - # Read in vname's data from all ens. files - for fcount, this_file in enumerate(handle_o_files): - data = this_file.variables[vname] - output3d[fcount, :, :, :] = data[tslice, :, :, :] - - # for this variable, Generate ens_avg and ens_stddev to store in the ensemble summary file - moutput3d = np.ma.masked_values(output3d, data._FillValue) - ens_avg3d[vcount] = np.ma.average(moutput3d, axis=0) - ens_stddev3d[vcount] = np.ma.std(moutput3d, axis=0, dtype=np.float32) - - # Generate avg, stddev and zscore for this 3d variable - for fcount, this_file in enumerate(handle_o_files): - data = this_file.variables[vname] - # rmask contains a number for each grid point indicating it's region - rmask = this_file.variables["REGION_MASK"] - Zscore = pop_zpdf( - output3d[fcount], - nbin, - (minrange, maxrange), - ens_avg3d[vcount], - ens_stddev3d[vcount], - data._FillValue, - threshold, - rmask, - opts_dict, - ) - Zscore3d[vcount, fcount, :] = Zscore[:] - - # LOOP THROUGH 2D - for vcount, vname in enumerate(var_name2d): - # Read in vname's data of all files - for fcount, this_file in enumerate(handle_o_files): - data = this_file.variables[vname] - output2d[fcount, :, :] = data[tslice, :, :] - - # Generate ens_avg and esn_stddev to store in the ensemble summary file - moutput2d = np.ma.masked_values(output2d, data._FillValue) - ens_avg2d[vcount] = np.ma.average(moutput2d, axis=0) - ens_stddev2d[vcount] = np.ma.std(moutput2d, axis=0, dtype=np.float32) - - # Generate avg, stddev and zscore for 3d variable - for fcount, this_file in enumerate(handle_o_files): - data = this_file.variables[vname] - - rmask = this_file.variables["REGION_MASK"] - Zscore = pop_zpdf( - output2d[fcount], - nbin, - (minrange, maxrange), - ens_avg2d[vcount], - ens_stddev2d[vcount], - data._FillValue, - threshold, - rmask, - opts_dict, - ) - Zscore2d[vcount, fcount, :] = Zscore[:] - - # close files - for this_file in handle_o_files: - this_file.close() - - return Zscore3d, Zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d - - -# -# Calculate pop zscore pass rate (ZPR) or pop zpdf values -# -def pop_zpdf( - input_array, - nbin, - zrange, - ens_avg, - ens_stddev, - FillValue, - threshold, - rmask, - opts_dict, -): - - if "test_failure" in opts_dict: - test_failure = opts_dict["test_failure"] - else: - test_failure = False - - # print("input_array.ndim = ", input_array.ndim) - - # Masked out the missing values (land) - moutput = np.ma.masked_values(input_array, FillValue) - if input_array.ndim == 3: - rmask3d = np.zeros(input_array.shape, dtype=np.int32) - for i in rmask3d: - i[:, :] = rmask[:, :] - rmask_array = rmask3d - elif input_array.ndim == 2: - rmask_array = np.zeros(input_array.shape, dtype=np.int32) - rmask_array[:, :] = rmask[:, :] - - # Now we just want the open oceans (not marginal seas) - # - so for g1xv7, those are 1,2,3,4,6 - # in the region mask - so we don't want rmask<1 or rmask>6 - moutput2 = np.ma.masked_where((rmask_array < 1) | (rmask_array > 6), moutput) - - # Use the masked array moutput2 to calculate Zscore_temp=(data-avg)/stddev - Zscore_temp = np.fabs( - (moutput2.astype(np.float64) - ens_avg) - / np.where(ens_stddev <= threshold, FillValue, ens_stddev) - ) - - # To retrieve only the valid entries of Zscore_temp - Zscore_nomask = Zscore_temp[~Zscore_temp.mask] - - # If just test failure, calculate ZPR only (DEFAULT - not chnagable via cmd line - if test_failure: - # Zpr=the count of Zscore_nomask is less than pop_tol (3.0)/ the total count of Zscore_nomask - Zpr = np.where(Zscore_nomask <= opts_dict["pop_tol"])[0].size / float( - Zscore_temp.count() - ) - return Zpr - - # Else calculate zpdf and return as zscore - # Count the unmasked value - count = Zscore_temp.count() - - Zscore, bins = np.histogram(Zscore_temp.compressed(), bins=nbin, range=zrange) - - # Normalize the number by dividing the count - if count != 0: - Zscore = Zscore.astype(np.float32) / count - else: - print(("count=0,sum=", np.sum(Zscore))) - return Zscore - - -# -# Calculate rmsz score by compare the run file with the ensemble summary file -# -def calculate_raw_score( - k, - v, - npts3d, - npts2d, - ens_avg, - ens_stddev, - is_SE, - opts_dict, - FillValue, - timeslice, - rmask, -): - count = 0 - Zscore = 0 - threshold = 1.0e-12 - has_zscore = True - popens = opts_dict["popens"] - if popens: # POP - minrange = opts_dict["minrange"] - maxrange = opts_dict["maxrange"] - Zscore = pop_zpdf( - v, - opts_dict["nbin"], - (minrange, maxrange), - ens_avg, - ens_stddev, - FillValue, - threshold, - rmask, - opts_dict, - ) - else: # CAM - if k in ens_avg: - if is_SE: - if ens_avg[k].ndim == 1: - npts = npts2d - else: - npts = npts3d - else: - if ens_avg[k].ndim == 2: - npts = npts2d - else: - npts = npts3d - - count, return_val = calc_Z( - v, - ens_avg[k].astype(np.float64), - ens_stddev[k].astype(np.float64), - count, - False, - ) - Zscore = np.sum(np.square(return_val.astype(np.float64))) - if npts == count: - Zscore = 0 - else: - Zscore = np.sqrt(Zscore / (npts - count)) - else: - has_zscore = False - - return Zscore, has_zscore - - -# -# Find the corresponding ensemble summary file from directory -# /glade/p/cesmdata/cseg/inputdata/validation/ when three -# validation files are input from the web server -# -# ifiles are not open -def search_sumfile(opts_dict, ifiles): - - sumfile_dir = opts_dict["sumfile"] - first_file = nc.Dataset(ifiles[0], "r") - machineid = "" - compiler = "" - - global_att = first_file.ncattrs() - for attr_name in global_att: - val = getattr(first_file, attr_name) - if attr_name == "model_version": - if val.find("-") != -1: - model_version = val[0 : val.find("-")] - else: - model_version = val - elif attr_name == "compset": - compset = val - elif attr_name == "testtype": - testtype = val - if val == "UF-ECT": - testtype = "uf_ensembles" - opts_dict["eet"] = len(ifiles) - elif val == "ECT": - testtype = "ensembles" - elif v == "POP": - testtype = val + "_ensembles" - elif attr_name == "machineid": - machineid = val - elif attr_name == "compiler": - compiler = val - elif attr_name == "grid": - grid = val - - if "testtype" in global_att: - sumfile_dir = sumfile_dir + "/" + testtype + "/" - else: - print( - "ERROR: No global attribute testtype in your validation file => EXITING...." - ) - sys.exit(2) - - if "model_version" in global_att: - sumfile_dir = sumfile_dir + "/" + model_version + "/" - else: - print( - "ERROR: No global attribute model_version in your validation file => EXITING...." - ) - sys.exit(2) - - first_file.close() - - if os.path.exists(sumfile_dir): - thefile_id = 0 - for i in os.listdir(sumfile_dir): - if os.path.isfile(sumfile_dir + i): - sumfile_id = nc.Dataset(sumfile_dir + i, "r") - sumfile_gatt = sumfile_id.ncattrs() - if "grid" not in sumfile_gatt and "resolution" not in sumfile_gatt: - print( - "ERROR: No global attribute grid or resolution in the summary file => EXITING...." - ) - sys.exit(2) - if "compset" not in sumfile_gatt: - print("ERROR: No global attribute compset in the summary file") - sys.exit(2) - if ( - getattr(sumfile_id, "resolution") == grid - and getsttr(sumfile_id, "compset") == compset - ): - thefile_id = sumfile_id - sumfile_id.close() - if thefile_id == 0: - print( - "ERROR: The verification files don't have a matching ensemble summary file to compare => EXITING...." - ) - sys.exit(2) - else: - print(("ERROR: Could not locate directory " + sumfile_dir + " => EXITING....")) - sys.exit(2) - - return sumfile_dir + i, machineid, compiler - - -# -# Create some variables and call a function to calculate PCA -# now gm comes in at 64 bits... - - -def pre_PCA(gm_orig, all_var_names, whole_list, me): - - # initialize - b_exit = False - gm_len = gm_orig.shape - nvar = gm_len[0] - nfile = gm_len[1] - if gm_orig.dtype == np.float32: - gm = gm_orig.astype(np.float64) - else: - gm = gm_orig[:] - - mu_gm = np.average(gm, axis=1) - sigma_gm = np.std(gm, axis=1, ddof=1) - - standardized_global_mean = np.zeros(gm.shape, dtype=np.float64) - scores_gm = np.zeros(gm.shape, dtype=np.float64) - - # AB: 4/19: whole list contains variables to be removed due to very small global means (calc elsewhere), but this is not currently needed - # and whole_list will be len = 0 - orig_len = len(whole_list) - if orig_len > 0: - if me.get_rank() == 0: - print("\n") - print( - "***************************************************************************************" - ) - print( - ( - "Warning: these ", - orig_len, - " variables have ~0 means (< O(e-15)) for each ensemble member, please exclude them via the json file (--jsonfile) :", - ) - ) - print((",".join(['"{0}"'.format(item) for item in whole_list]))) - print( - "***************************************************************************************" - ) - print("\n") - - # check for constants across ensemble - for var in range(nvar): - for file in range(nfile): - if np.any(sigma_gm[var] == 0.0) and all_var_names[var] not in set( - whole_list - ): - # keep track of zeros standard deviations - whole_list.append(all_var_names[var]) - - # print list - new_len = len(whole_list) - if new_len > orig_len: - sub_list = whole_list[orig_len:] - if me.get_rank() == 0: - print("\n") - print( - "*************************************************************************************" - ) - print( - ( - "Warning: these ", - new_len - orig_len, - " variables are constant across ensemble members, please exclude them via the json file (--jsonfile): ", - ) - ) - print("\n") - print((",".join(['"{0}"'.format(item) for item in sub_list]))) - print( - "*************************************************************************************" - ) - print("\n") - - # exit if non-zero length whole_list - if new_len > 0: - print("=> Exiting ...") - b_exit = True - - # check for linear dependent vars - if not b_exit: - - for var in range(nvar): - for file in range(nfile): - standardized_global_mean[var, file] = ( - gm[var, file] - mu_gm[var] - ) / sigma_gm[var] - - eps = np.finfo(np.float32).eps - norm = np.linalg.norm(standardized_global_mean, ord=2) - sh = max(standardized_global_mean.shape) - mytol = sh * norm * eps - - standardized_rank = np.linalg.matrix_rank(standardized_global_mean, mytol) - print("STATUS: checking for dependent vars using QR...") - print(("STATUS: standardized_global_mean rank = ", standardized_rank)) - - dep_var_list = get_dependent_vars_index( - standardized_global_mean, standardized_rank - ) - num_dep = len(dep_var_list) - orig_len = len(whole_list) - - for i in dep_var_list: - whole_list.append(all_var_names[i]) - - if num_dep > 0: - sub_list = whole_list[orig_len:] - - print("\n") - print( - "********************************************************************************************" - ) - print( - ( - "Warning: these ", - num_dep, - " variables are linearly dependent, please exclude them via the json file (--jsonfile): ", - ) - ) - print("\n") - print((",".join(['"{0}"'.format(item) for item in sub_list]))) - print( - "********************************************************************************************" - ) - print("\n") - print("=> EXITING....") - - # need to exit - b_exit = True - - # now check for any variables that have less than 3% (of the ensemble size) unique values - if not b_exit: - print("STATUS: checking for unique values across ensemble") - - cts = np.count_nonzero(np.diff(np.sort(standardized_global_mean)), axis=1) + 1 - # thresh = .02* standardized_global_mean.shape[1] - thresh = 0.03 * standardized_global_mean.shape[1] - result = np.where(cts < thresh) - indices = result[0] - if len(indices) > 0: - nu_list = [] - for i in indices: - nu_list.append(all_var_names[i]) - - print("\n") - print( - "********************************************************************************************" - ) - print( - ( - "Warning: these ", - len(indices), - " variables contain fewer than 3% unique values across the ensemble, please exclude them via the json file (--jsonfile): ", - ) - ) - print("\n") - print((",".join(['"{0}"'.format(item) for item in nu_list]))) - print( - "********************************************************************************************" - ) - print("\n") - print("=> EXITING....") - - # need to exit - b_exit = True - - if not b_exit: - # find principal components - loadings_gm = princomp(standardized_global_mean) - # now do coord transformation on the standardized means to get the scores - scores_gm = np.dot(loadings_gm.T, standardized_global_mean) - sigma_scores_gm = np.std(scores_gm, axis=1, ddof=1) - else: - loadings_gm = np.zeros(gm.shape, dtype=np.float64) - sigma_scores_gm = np.zeros(gm.shape, dtype=np.float64) - - # return mu_gm.astype(np.float32),sigma_gm.astype(np.float32),standardized_global_mean.astype(np.float32),loadings_gm.astype(np.float32),sigma_scores_gm.astype(np.float32),b_exit - - return ( - mu_gm, - sigma_gm, - standardized_global_mean, - loadings_gm, - sigma_scores_gm, - b_exit, - ) - - -# -# Performs principal components analysis (PCA) on the p-by-n data matrix A -# rows of A correspond to (p) variables AND cols of A correspond to the (n) tests -# assume already standardized -# -# Returns the loadings: p-by-p matrix, each column containing coefficients -# for one principal component. -# -def princomp(standardized_global_mean): - # find covariance matrix (will be pxp) - co_mat = np.cov(standardized_global_mean) - # Calculate evals and evecs of covariance matrix (evecs are also pxp) - [evals, evecs] = np.linalg.eig(co_mat) - # Above may not be sorted - sort largest first - new_index = np.argsort(evals)[::-1] - evecs = evecs[:, new_index] - evals = evals[new_index] - - return evecs - - -# -# Calculate (val-avg)/stddev and exclude zero value -# -def calc_Z(val, avg, stddev, count, flag): - return_val = np.empty(val.shape, dtype=np.float32, order="C") - tol = 1e-12 - if stddev[(stddev > tol)].size == 0: - if flag: - print("WARNING: ALL standard dev are < 1e-12") - flag = False - count = count + stddev[(stddev <= tol)].size - return_val = np.zeros(val.shape, dtype=np.float32, order="C") - else: - if stddev[(stddev <= tol)].size > 0: - if flag: - print("WARNING: some standard dev are < 1e-12") - flag = False - count = count + stddev[(stddev <= tol)].size - return_val[np.where(stddev <= tol)] = 0.0 - return_val[np.where(stddev > tol)] = ( - val[np.where(stddev > tol)] - avg[np.where(stddev > tol)] - ) / stddev[np.where(stddev > tol)] - else: - return_val = (val - avg) / stddev - return count, return_val - - -# -# Read a json file for the excluded/included list of variables -# -def read_jsonlist(metajson, method_name): - - if not os.path.exists(metajson): - print("\n") - print( - "*************************************************************************************" - ) - print("Warning: Specified json file does not exist: ", metajson) - print( - "*************************************************************************************" - ) - print("\n") - varList = [] - exclude = True - return varList, exclude - else: - fd = open(metajson) - metainfo = json.load(fd) - if method_name == "ES": - exclude = False - # varList = metainfo['ExcludedVar'] - if "ExcludedVar" in metainfo: - exclude = True - varList = metainfo["ExcludedVar"] - elif "IncludedVar" in metainfo: - varList = metainfo["IncludedVar"] - return varList, exclude - elif method_name == "ESP": - var2d = metainfo["Var2d"] - var3d = metainfo["Var3d"] - return var2d, var3d - - -# -# Calculate Normalized RMSE metric -# -def calc_nrmse(orig_array, comp_array): - - orig_size = orig_array.size - sumsqr = np.sum( - np.square(orig_array.astype(np.float64) - comp_array.astype(np.float64)) - ) - rng = np.max(orig_array) - np.min(orig_array) - if abs(rng) < 1e-18: - rmse = 0.0 - else: - rmse = np.sqrt(sumsqr / orig_size) / rng - - return rmse - - -# -# Calculate weighted global mean for one level of CAM output -# works in dp -def area_avg(data_orig, weight, is_SE): - - # TO DO: take into account missing values - if data_orig.dtype == np.float32: - data = data_orig.astype(np.float64) - else: - data = data_orig[:] - - if is_SE == True: - a = np.average(data, weights=weight) - else: # FV - # weights are for lat - a_lat = np.average(data, axis=0, weights=weight) - a = np.average(a_lat) - return a - - -# -# Calculate weighted global mean for one level of OCN output -# -def pop_area_avg(data_orig, weight): - - # Take into account missing values - # weights are for lat - if data_orig.dtype == np.float32: - data = data_orig.astype(np.float64) - else: - data = data_orig[:] - - a = np.ma.average(data, weights=weight) - return a - - -# def get_lev(file_dim_dict,lev_name): -# return len(file_dim_dict[lev_name]) - -# -# Get dimension 'lev' or 'z_t' -# -def get_nlev(o_files, popens): - - first_file = nc.Dataset(o_files[0], "r") - input_dims = first_file.dimensions - - if not popens: - nlev = len(input_dims["lev"]) - else: - nlev = len(input_dims["z_t"]) - - first_file.close() - - return nlev - - -# -# Calculate area_wgt when processes cam se/cam fv/pop files -# -def get_area_wgt(o_files, is_SE, nlev, popens): - - z_wgt = {} - first_file = nc.Dataset(o_files[0], "r") - input_dims = first_file.dimensions - - if is_SE == True: - ncol = len(input_dims["ncol"]) - output3d = np.zeros((nlev, ncol), dtype=np.float64) - output2d = np.zeros(ncol, dtype=np.float64) - area_wgt = np.zeros(ncol, dtype=np.float64) - area = first_file.variables["area"] - area_wgt[:] = area[:] - total = np.sum(area_wgt) - area_wgt[:] /= total - else: - if not popens: - nlon = len(input_dims["lon"]) - nlat = len(input_dims["lat"]) - gw = first_file.variables["gw"] - else: - if "nlon" in input_dims: - nlon = len(input_dims["nlon"]) - nlat = len(input_dims["nlat"]) - elif "lon" in input_dims: - nlon = len(input_dims["lon"]) - nlat = len(input_dims["lat"]) - gw = first_file.variables["TAREA"] - z_wgt = first_file.variables["dz"] - output3d = np.zeros((nlev, nlat, nlon), dtype=np.float64) - output2d = np.zeros((nlat, nlon), dtype=np.float64) - area_wgt = np.zeros( - nlat, dtype=np.float64 - ) # note gauss weights are length nlat - area_wgt[:] = gw[:] - - first_file.close() - - return output3d, output2d, area_wgt, z_wgt - - -# -# compute area_wgts, and then loop through all files to call calc_global_means_for_onefile -# o_files are not open for CAM -# 12/19 - summary file will now be double precision -def generate_global_mean_for_summary( - o_files, var_name3d, var_name2d, is_SE, pepsi_gm, opts_dict -): - - tslice = opts_dict["tslice"] - popens = opts_dict["popens"] - - n3d = len(var_name3d) - n2d = len(var_name2d) - tot = n3d + n2d - - # gm3d = np.zeros((n3d,len(o_files)), dtype=np.float32) - # gm2d = np.zeros((n2d,len(o_files)), dtype=np.float32) - gm3d = np.zeros((n3d, len(o_files)), dtype=np.float64) - gm2d = np.zeros((n2d, len(o_files)), dtype=np.float64) - - nlev = get_nlev(o_files, popens) - - output3d, output2d, area_wgt, z_wgt = get_area_wgt(o_files, is_SE, nlev, popens) - - # loop through the input file list to calculate global means - # var_name3d=[] - for fcount, in_file in enumerate(o_files): - - fname = nc.Dataset(in_file, "r") - - if pepsi_gm: - # Generate global mean for pepsi challenge data timeseries daily files, they all are 2d variables - var_name2d = [] - for k, v in fname.variables.items(): - if v.typecode() == "f": - var_name2d.append(k) - fout = open(k + "_33.txt", "w") - if k == "time": - ntslice = v[:] - for i in np.nditer(ntslice): - temp1, temp2 = calc_global_mean_for_onefile( - fname, - area_wgt, - var_name3d, - var_name2d, - output3d, - output2d, - int(i), - is_SE, - nlev, - opts_dict, - ) - fout.write(str(temp2[0]) + "\n") - elif popens: - gm3d[:, fcount], gm2d[:, fcount] = calc_global_mean_for_onefile_pop( - fname, - area_wgt, - z_wgt, - var_name3d, - var_name2d, - output3d, - output2d, - tslice, - is_SE, - nlev, - opts_dict, - ) - - else: - gm3d[:, fcount], gm2d[:, fcount] = calc_global_mean_for_onefile( - fname, - area_wgt, - var_name3d, - var_name2d, - output3d, - output2d, - tslice, - is_SE, - nlev, - opts_dict, - ) - - fname.close() - - var_list = [] - # some valid CAM vars are all small entries(e.g. DTWR_H2O2 and DTWR_H2O4), so we no longer excluse them via var_list - - return gm3d, gm2d, var_list - - -# -# Calculate global means for one OCN input file -# (fname is open) NOT USED ANY LONGER -def calc_global_mean_for_onefile_pop( - fname, - area_wgt, - z_wgt, - var_name3d, - var_name2d, - output3d, - output2d, - tslice, - is_SE, - nlev, - opts_dict, -): - - nan_flag = False - - n3d = len(var_name3d) - n2d = len(var_name2d) - - # gm3d = np.zeros((n3d),dtype=np.float32) - # gm2d = np.zeros((n2d),dtype=np.float32) - gm3d = np.zeros((n3d), dtype=np.float64) - gm2d = np.zeros((n2d), dtype=np.float64) - - # calculate global mean for each 3D variable - for count, vname in enumerate(var_name3d): - gm_lev = np.zeros(nlev, dtype=np.float64) - data = fname.variables[vname] - if np.any(np.isnan(data)): - print("ERROR: ", vname, " data contains NaNs - please check input.") - nan_flag = True - output3d[:, :, :] = data[tslice, :, :, :] - dbl_output3d = output3d.astype(dtype=np.float64) - for k in range(nlev): - moutput3d = np.ma.masked_values(dbl_output3d[k, :, :], data._FillValue) - gm_lev[k] = pop_area_avg(moutput3d, area_wgt) - # note: averaging over levels - in future, consider pressure-weighted (?) - gm3d[count] = np.average(gm_lev, weights=z_wgt) - - # calculate global mean for each 2D variable - for count, vname in enumerate(var_name2d): - data = fname.variables[vname] - if np.any(np.isnan(data)): - print("ERROR: ", vname, " data contains NaNs - please check input.") - nan_flag = True - output2d[:, :] = data[tslice, :, :] - dbl_output2d = output2d.astype(dtype=np.float64) - moutput2d = np.ma.masked_values(dbl_output2d[:, :], data._FillValue) - gm2d_mean = pop_area_avg(moutput2d, area_wgt) - gm2d[count] = gm2d_mean - - if nan_flag: - print("ERROR: Nans in input data => EXITING....") - sys.exit() - - return gm3d, gm2d - - -# -# Calculate global means for one CAM input file -# fname is open -def calc_global_mean_for_onefile( - fname, - area_wgt, - var_name3d, - var_name2d, - output3d, - output2d, - tslice, - is_SE, - nlev, - opts_dict, -): - - nan_flag = False - - if "cumul" in opts_dict: - cumul = opts_dict["cumul"] - else: - cumul = False - n3d = len(var_name3d) - n2d = len(var_name2d) - - # gm3d = np.zeros((n3d),dtype=np.float32) - # gm2d = np.zeros((n2d),dtype=np.float32) - gm3d = np.zeros((n3d), dtype=np.float64) - gm2d = np.zeros((n2d), dtype=np.float64) - - # calculate global mean for each 3D variable (note: area_avg casts into dp before computation) - for count, vname in enumerate(var_name3d): - - if isinstance(vname, str) == True: - vname_d = vname - else: - vname_d = vname.decode("utf-8") - - if vname_d not in fname.variables: - print( - "WARNING 1: the test file does not have the variable ", - vname_d, - " that is in the ensemble summary file ...", - ) - continue - data = fname.variables[vname_d] - if not data[tslice].size: - print("ERROR: ", vname_d, " data is empty => EXITING....") - sys.exit(2) - if np.any(np.isnan(data)): - print( - "ERROR: ", - vname_d, - " data contains NaNs - please check input => EXITING", - ) - nan_flag = True - continue - if is_SE == True: - if not cumul: - temp = data[tslice].shape[0] - gm_lev = np.zeros(temp, dtype=np.float64) - for k in range(temp): - gm_lev[k] = area_avg(data[tslice, k, :], area_wgt, is_SE) - else: - gm_lev = np.zeros(nlev, dtype=np.float64) - for k in range(nlev): - gm_lev[k] = area_avg(output3d[k, :], area_wgt, is_SE) - else: - if not cumul: - temp = data[tslice].shape[0] - gm_lev = np.zeros(temp, dtype=np.float64) - for k in range(temp): - gm_lev[k] = area_avg(data[tslice, k, :, :], area_wgt, is_SE) - else: - gm_lev = np.zeros(nlev) - for k in range(nlev): - gm_lev[k] = area_avg(output3d[k, :, :], area_wgt, is_SE) - # note: averaging over levels could be pressure-weighted (?) - gm3d[count] = np.mean(gm_lev) - - # calculate global mean for each 2D variable - for count, vname in enumerate(var_name2d): - - if isinstance(vname, str) == True: - vname_d = vname - else: - vname_d = vname.decode("utf-8") - - if vname_d not in fname.variables: - print( - "WARNING 2: the test file does not have the variable ", - vname_d, - " that is in the ensemble summary file", - ) - continue - data = fname.variables[vname_d] - if np.any(np.isnan(data)): - print( - "ERROR: ", - vname_d, - " data contains NaNs - please check input => EXITING....", - ) - nan_flag = True - continue - if is_SE == True: - if not cumul: - output2d[:] = data[tslice, :] - gm2d_mean = area_avg(output2d[:], area_wgt, is_SE) - else: - if not cumul: - output2d[:, :] = data[tslice, :, :] - gm2d_mean = area_avg(output2d[:, :], area_wgt, is_SE) - gm2d[count] = gm2d_mean - - if nan_flag: - print("ERROR: Nans in input data => EXITING....") - sys.exit() - - return gm3d, gm2d - - -# -# Read variable values from ensemble summary file -# -def read_ensemble_summary(ens_file): - if os.path.isfile(ens_file): - fens = nc.Dataset(ens_file, "r") - else: - print("ERROR: file ens summary: ", ens_file, " not found => EXITING....") - sys.exit(2) - - is_SE = False - dims = fens.dimensions - if "ncol" in dims: - is_SE = True - - esize = len(dims["ens_size"]) - str_size = len(dims["str_size"]) - - ens_avg = {} - ens_stddev = {} - ens_var_name = [] - ens_rmsz = {} - ens_gm = {} - std_gm = {} - - # Retrieve the variable list from ensemble file - for k, v in fens.variables.items(): - if k == "vars": - for i in v[0 : len(v)]: - l = 0 - for j in i: - if j: - l = l + 1 - ens_var_name.append(i[0:l].tostring().strip()) - elif k == "var3d": - num_var3d = len(v) - elif k == "var2d": - num_var2d = len(v) - - for k, v in fens.variables.items(): - # Retrieve the ens_avg3d or ens_avg2d array - if k == "ens_avg3d" or k == "ens_avg2d": - if k == "ens_avg2d": - m = num_var3d - else: - m = 0 - if v: - for i in v[0 : len(v)]: - temp_name = ens_var_name[m] - ens_avg[temp_name] = i - m = m + 1 - - # Retrieve the ens_stddev3d or ens_stddev2d array - elif k == "ens_stddev3d" or k == "ens_stddev2d": - if k == "ens_stddev2d": - m = num_var3d - else: - m = 0 - if v: - for i in v[0 : len(v)]: - temp_name = ens_var_name[m] - ens_stddev[temp_name] = i - m = m + 1 - # Retrieve the RMSZ score array - elif k == "RMSZ": - m = 0 - for i in v[0 : len(v)]: - temp_name = ens_var_name[m] - ens_rmsz[temp_name] = i - m = m + 1 - elif k == "global_mean": - m = 0 - for i in v[0 : len(v)]: - temp_name = ens_var_name[m] - ens_gm[temp_name] = i - m = m + 1 - elif k == "standardized_gm": - m = 0 - for i in v[0 : len(v)]: - temp_name = ens_var_name[m] - std_gm[temp_name] = i - m = m + 1 - # also get as array (not just dictionary) - std_gm_array = np.zeros((num_var3d + num_var2d, esize), dtype=np.float64) - std_gm_array[:] = v[:, :] - elif k == "mu_gm": - mu_gm = np.zeros((num_var3d + num_var2d), dtype=np.float64) - mu_gm[:] = v[:] - elif k == "sigma_gm": - sigma_gm = np.zeros((num_var3d + num_var2d), dtype=np.float64) - sigma_gm[:] = v[:] - elif k == "loadings_gm": - loadings_gm = np.zeros( - (num_var3d + num_var2d, num_var3d + num_var2d), dtype=np.float64 - ) - loadings_gm[:, :] = v[:, :] - elif k == "sigma_scores_gm": - sigma_scores_gm = np.zeros((num_var3d + num_var2d), dtype=np.float64) - sigma_scores_gm[:] = v[:] - - fens.close() - - return ( - ens_var_name, - ens_avg, - ens_stddev, - ens_rmsz, - ens_gm, - num_var3d, - mu_gm, - sigma_gm, - loadings_gm, - sigma_scores_gm, - is_SE, - std_gm, - std_gm_array, - str_size, - ) - - -# -# Get the ncol and nlev value from cam run file -# (frun is not open) -def get_ncol_nlev(frun): - - o_frun = nc.Dataset(frun, "r") - input_dims = o_frun.dimensions - ncol = -1 - nlev = -1 - ilev = -1 - nlat = -1 - nlon = -1 - icol = -1 - ilat = -1 - ilon = -1 - for k, v in input_dims.items(): - if k == "lev": - nlev = len(v) - if k == "ncol": - ncol = len(v) - if (k == "lat") or (k == "nlat"): - nlat = len(v) - if (k == "lon") or (k == "nlon"): - nlon = len(v) - - if ncol == -1: - one_spatial_dim = False - else: - one_spatial_dim = True - - if one_spatial_dim: - npts3d = float(nlev * ncol) - npts2d = float(ncol) - else: - npts3d = float(nlev * nlat * nlon) - npts2d = float(nlat * nlon) - - o_frun.close() - - return npts3d, npts2d, one_spatial_dim - - -# -# Calculate max norm ensemble value for each variable base on all ensemble run files -# the inputdir should only have all ensemble run files -# -def calculate_maxnormens(opts_dict, var_list): - ifiles = [] - Maxnormens = {} - threshold = 1e-12 - # input file directory - inputdir = opts_dict["indir"] - - # the timeslice that we want to process - tstart = opts_dict["tslice"] - - # open all files - for frun_file in os.listdir(inputdir): - if os.path.isfile(inputdir + frun_file): - ifiles.append(nc.Dataset(inputdir + frun_file, "r")) - else: - print( - "ERROR: Could not locate file= " - + inputdir - + frun_file - + " => EXITING...." - ) - sys.exit() - comparision = {} - # loop through each variable - for k in var_list: - output = [] - # read all data of variable k from all files - for f in ifiles: - v = f.variables - output.append(v[k][tstart]) - max_val = 0 - # open an output file - outmaxnormens = k + "_ens_maxnorm.txt" - fout = open(outmaxnormens, "w") - Maxnormens[k] = [] - - # calculate E(i=0:n)(maxnormens[i][x])=max(comparision[i]-E(x=0:n)(output[x])) - for n in range(len(ifiles)): - Maxnormens[k].append(0) - comparision[k] = ifiles[n].variables[k][tstart] - for m in range(len(ifiles)): - max_val = np.max(np.abs(comparision[k] - output[m])) - if Maxnormens[k][n] < max_val: - Maxnormens[k][n] = max_val - range_max = np.max((comparision[k])) - range_min = np.min((comparision[k])) - if range_max - range_min < threshold: - Maxnormens[k][n] = 0.0 - else: - Maxnormens[k][n] = Maxnormens[k][n] / (range_max - range_min) - fout.write(str(Maxnormens[k][n]) + "\n") - strtmp = ( - k - + " : " - + "ensmax min max" - + " : " - + "{0:9.2e}".format(min(Maxnormens[k])) - + " " - + "{0:9.2e}".format(max(Maxnormens[k])) - ) - print(strtmp) - fout.close() - - -# -# Parse options from command line or from config file -# -def getopt_parseconfig(opts, optkeys, caller, opts_dict): - # integer - integer = "-[0-9]+" - int_p = re.compile(integer) - # scientific notation - flt = "-*[0-9]+\.[0-9]+" - flt_p = re.compile(flt) - - for opt, arg in opts: - if opt == "-h" and caller == "CECT": - CECT_usage() - sys.exit() - elif opt == "-h" and caller == "ES": - EnsSum_usage() - sys.exit() - elif opt == "-h" and caller == "ESP": - EnsSumPop_usage() - sys.exit() - elif opt == "-f": - opts_dict["orig"] = arg - elif opt == "-m": - opts_dict["reqmeth"] = arg - # parse config file - elif opt in ("--config"): - configfile = arg - config = configparser.ConfigParser() - config.read(configfile) - for sec in config.sections(): - for name, value in config.items(sec): - if sec == "bool_arg" or sec == "metrics": - opts_dict[name] = config.getboolean(sec, name) - elif sec == "int_arg": - opts_dict[name] = config.getint(sec, name) - elif sec == "float_arg": - opts_dict[name] = config.getfloat(sec, name) - else: - opts_dict[name] = value - - # parse command line options which might replace the settings in the config file - else: - for k in optkeys: - if k.find("=") != -1: - keyword = k[0 : k.find("=")] - if opt == "--" + keyword: - if arg.isdigit(): - opts_dict[keyword] = int(arg) - else: - if flt_p.match(arg): - opts_dict[keyword] = float(arg) - elif int_p.match(arg): - opts_dict[keyword] = int(arg) - else: - opts_dict[keyword] = arg - else: - if opt == "--" + k: - opts_dict[k] = True - return opts_dict - - -# -# Figure out the scores of the 3 new runs, standardized global means, then multiple by the loadings_gm -# -def standardized( - gm, mu_gm, sigma_gm, loadings_gm, all_var_names, opts_dict, ens_avg, me -): - nvar = gm.shape[0] - nfile = gm.shape[1] - sum_std_mean = np.zeros((nvar,), dtype=np.float64) - standardized_mean = np.zeros(gm.shape, dtype=np.float64) - for var in range(nvar): - for file in range(nfile): - standardized_mean[var, file] = ( - gm[var, file].astype(np.float64) - mu_gm[var].astype(np.float64) - ) / sigma_gm[var].astype(np.float64) - sum_std_mean[var] = sum_std_mean[var] + np.abs(standardized_mean[var, file]) - new_scores = np.dot(loadings_gm.T.astype(np.float64), standardized_mean) - - var_list = [] - sorted_sum_std_mean = np.argsort(sum_std_mean)[::-1] - if opts_dict["printStdMean"]: - if me.get_rank() == 0: - print(" ") - print( - "************************************************************************" - ) - print(" Sum of standardized mean of all variables in decreasing order") - print( - "************************************************************************" - ) - for var in range(nvar): - var_list.append(all_var_names[sorted_sum_std_mean[var]]) - vname = all_var_names[sorted_sum_std_mean[var]] - if me.get_rank() == 0: - - if isinstance(vname, str) == True: - vname_d = vname - else: - vname_d = vname.decode("utf-8") - - print( - "{:>15}".format(vname_d), - "{0:9.2e}".format(sum_std_mean[sorted_sum_std_mean[var]]), - ) - print(" ") - return new_scores, var_list, standardized_mean - - -# -# Insert rmsz scores, global mean of new run to the dictionary results -# -def addresults(results, key, value, var, thefile): - if var in results: - temp = results[var] - if key in temp: - temp2 = temp[key] - if thefile in temp2: - temp3 = results[var][key][thefile] - else: - temp3 = {} - else: - temp[key] = {} - temp2 = {} - temp3 = {} - temp3 = value - temp2[thefile] = temp3 - temp[key] = temp2 - results[var] = temp - else: - results[var] = {} - results[var][key] = {} - results[var][key][thefile] = value - - return results - - -# -# Print out rmsz score failure, global mean failure summary -# -def printsummary(results, key, name, namerange, thefilecount, variables, label): - thefile = "f" + str(thefilecount) - for k, v in results.items(): - if "status" in v: - temp0 = v["status"] - if key in temp0: - if thefile in temp0[key]: - temp = temp0[key][thefile] - if temp < 1: - print(" ") - print( - k - + " (" - + "{0:9.2e}".format(v[name][thefile]) - + " outside of [" - + "{0:9.2e}".format(variables[k][namerange][0]) - + " " - + "{0:9.2e}".format(variables[k][namerange][1]) - + "])" - ) - - -# -# Insert the range of rmsz score and global mean of the ensemble summary file to the dictionary variables -# -def addvariables(variables, var, vrange, thearray): - if var in variables: - variables[var][vrange] = (np.min(thearray), np.max(thearray)) - else: - variables[var] = {} - variables[var][vrange] = (np.min(thearray), np.max(thearray)) - - return variables - - -# -# Evaluate if the new run rmsz score/global mean in the range of rmsz scores/global mean of the ensemble summary -# -def evaluatestatus(name, rangename, variables, key, results, thefile): - totalcount = 0 - for k, v in results.items(): - if name in v and rangename in variables[k]: - temp0 = results[k] - xrange = variables[k][rangename] - if v[name][thefile] > xrange[1] or v[name][thefile] < xrange[0]: - val = 0 - else: - val = 1 - if "status" in temp0: - temp = temp0["status"] - if key in temp: - temp2 = temp[key] - else: - temp[key] = temp2 = {} - - if val == 0: - totalcount = totalcount + 1 - temp2[thefile] = val - temp[key] = temp2 - results[k]["status"] == temp - else: - temp0["status"] = {} - temp0["status"][key] = {} - temp0["status"][key][thefile] = val - if val == 0: - totalcount = totalcount + 1 - - return totalcount - - -# -# Evaluate if the new run PCA scores pass or fail by comparing with the PCA scores of the ensemble summary -# ifiles are open -def comparePCAscores(ifiles, new_scores, sigma_scores_gm, opts_dict, me): - - comp_array = np.zeros(new_scores.shape, dtype=np.int32) - sum = np.zeros(new_scores.shape[0], dtype=np.int32) - eachruncount = np.zeros(new_scores.shape[1], dtype=np.int32) - totalcount = 0 - sum_index = [] - if me.get_rank() == 0: - print("*********************************************** ") - print("PCA Test Results") - print("*********************************************** ") - - # Test to check if new_scores out of range of sigMul*sigma_scores_gm - for i in range(opts_dict["nPC"]): - for j in range(new_scores.shape[1]): - if abs(new_scores[i][j]) > opts_dict["sigMul"] * (sigma_scores_gm[i]): - comp_array[i][j] = 1 - eachruncount[j] = eachruncount[j] + 1 - # Only check the first nPC number of scores, and sum comp_array together - sum[i] = sum[i] + comp_array[i][j] - - if len(ifiles) >= opts_dict["minRunFail"]: - num_run_less = False - else: - num_run_less = True - # Check to see if sum is larger than min_run_fail, if so save the index of the sum - for i in range(opts_dict["nPC"]): - if sum[i] >= opts_dict["minRunFail"]: - totalcount = totalcount + 1 - sum_index.append(i + 1) - - # false_positive=check_falsepositive(opts_dict,sum_index) - - # If the length of sum_index is larger than min_PC_fail, the three runs failed. - # This doesn't apply for UF-ECT. - if opts_dict["numRunFile"] > opts_dict["eet"]: - if len(sum_index) >= opts_dict["minPCFail"]: - decision = "FAILED" - else: - decision = "PASSED" - if (num_run_less == False) and (me.get_rank() == 0): - print(" ") - print( - "Summary: " - + str(totalcount) - + " PC scores failed at least " - + str(opts_dict["minRunFail"]) - + " runs: ", - sum_index, - ) - print(" ") - print("These runs " + decision + " according to our testing criterion.") - elif me.get_rank() == 0: - print(" ") - print( - "The number of run files is less than minRunFail (=2), so we cannot determin an overall pass or fail." - ) - print(" ") - - # Record the histogram of comp_array which value is one by the PCA scores - for i in range(opts_dict["nPC"]): - index_list = [] - for j in range(comp_array.shape[1]): - if comp_array[i][j] == 1: - index_list.append(j + 1) - if len(index_list) > 0 and me.get_rank() == 0: - print( - "PC " + str(i + 1) + ": failed " + str(len(index_list)) + " runs ", - index_list, - ) - if me.get_rank() == 0: - print(" ") - - # Record the index of comp_array which value is one - run_index = [] - - if opts_dict["eet"] >= opts_dict["numRunFile"]: - eet = exhaustive_test() - faildict = {} - - for j in range(comp_array.shape[1]): - index_list = [] - for i in range(opts_dict["nPC"]): - if comp_array[i][j] == 1: - index_list.append(i + 1) - if me.get_rank() == 0: - print( - "Run " - + str(j + 1) - + ": " - + str(eachruncount[j]) - + " PC scores failed ", - index_list, - ) - run_index.append((j + 1)) - faildict[str(j + 1)] = set(index_list) - - passes, failures = eet.test_combinations( - faildict, - runsPerTest=opts_dict["numRunFile"], - nRunFails=opts_dict["minRunFail"], - ) - if me.get_rank() == 0: - print(" ") - print( - "%d tests failed out of %d possible tests." - % (failures, passes + failures) - ) - print( - "This represents a failure percent of %.2f." - % (100.0 * failures / float(failures + passes)) - ) - print(" ") - if float(failures) > 0.1 * float(passes + failures): - decision = "FAILED" - else: - decision = "PASSED" - - else: - for j in range(comp_array.shape[1]): - index_list = [] - for i in range(opts_dict["nPC"]): - if comp_array[i][j] == 1: - index_list.append(i + 1) - if me.get_rank() == 0: - print( - "Run " - + str(j + 1) - + ": " - + str(eachruncount[j]) - + " PC scores failed ", - index_list, - ) - run_index.append((j + 1)) - - return run_index, decision - - -# -# Command options for pyCECT.py -# -def CECT_usage(): - print("\n Compare test runs to an ensemble summary file. \n") - print(" ----------------------------") - print(" Args for pyCECT :") - print(" ----------------------------") - print(" pyCECT.py") - print(" -h : prints out this usage message") - print(" --verbose : prints out in verbose mode (off by default)") - print( - " --sumfile : the ensemble summary file (generated by pyEnsSum.py)" - ) - print( - " --indir : directory containing the input run files (at least 3 files)" - ) - print( - " --tslice : which time slice to use from input run files (default = 1)" - ) - print(" ----------------------------") - print(" Args for CAM-CECT and UF-CAM-ECT:") - print(" ----------------------------") - print( - " --nPC : number of principal components (PCs) to check (default = 50, but can't be greater than the number of variables)" - ) - print( - ' --sigMul : number of standard deviations away from the mean defining the "acceptance region" (default = 2)' - ) - print( - " --minPCFail : minimum number of PCs that must fail the specified number of runs for a FAILURE (default = 3)" - ) - print( - " --minRunFail : minimum number of runs that PCs must fail for a FAILURE (default = 2)" - ) - print( - " --numRunFile : total number of runs to include in test (default = 3)" - ) - print( - " --printVars : print out variables that fall outsie of the global mean ensemble distribution (off by default)" - ) - print( - " --printStdMean : print out sum of standardized mean of all variables in decreasing order. If test returns a FAIL, " - ) - print( - " then output associated box plots (off by default) - requires Python seaborn package" - ) - print( - " --saveResults : save a netcdf file with scores and std global means from the test runs (savefile.nc). " - ) - print( - " --eet : enable Ensemble Exhaustive Test (EET) to compute failure percent of runs (greater than or equal to numRunFile)" - ) - print(" ----------------------------") - print(" Args for POP-CECT :") - print(" ----------------------------") - print( - " --popens : indicate POP-ECT (required!) (tslice will bet set to 0)" - ) - print( - " --jsonfile : list the json file that specifies variables to test (required!), e.g. pop_ensemble.json" - ) - print( - " --pop_tol : set pop zscore tolerance (default is 3.0 - recommended)" - ) - print(" --pop_threshold : set pop threshold (default is 0.9)") - print( - " --input_globs : set the search pattern (wildcard) for the file(s) to compare from " - ) - print( - " the input directory (indir), such as core48.pop.h.0003-12 or core48.pop.h.0003 (more info in README)" - ) - - -# print 'Version 3.0.8' - -# -# Command options for pyEnsSum.py -# -def EnsSum_usage(): - print("\n Creates the summary file for an ensemble of CAM data. \n") - print(" ------------------------") - print(" Args for pyEnsSum : ") - print(" ------------------------") - print(" pyEnsSum.py") - print(" -h : prints out this usage message") - print(" --verbose : prints out in verbose mode (off by default)") - print( - " --sumfile : the output summary data file (default = ens.summary.nc)" - ) - print( - " --indir : directory containing all of the ensemble runs (default = ./)" - ) - print(" --esize : Number of ensemble members (default = 350)") - print(" --tag : Tag name used in metadata (default = cesm2_0)") - print(" --compset : Compset used in metadata (default = F2000climo)") - print(" --res : Resolution used in metadata (default = f19_f19)") - print( - " --mach : Machine name used in the metadata (default = cheyenne)" - ) - print(" --tslice : the index into the time dimension (default = 1)") - print( - " --jsonfile : Jsonfile to provide that a list of variables that will " - ) - print( - " be excluded or included (default = exclude_empty.json)" - ) - print( - " --mpi_disable : Disable mpi mode to run in serial (off by default)" - ) - # print ' --cumul : ' - print( - " --fIndex : Use this to start at ensemble member instead of 000 (so " - ) - print( - " ensembles with numbers less than are excluded from summary file) " - ) - print(" ") - - -# print 'Version 3.0.7' - - -# -# Command options for pyEnsSumPop.py -# -def EnsSumPop_usage(): - print("\n Creates the summary file for an ensemble of POP data. \n") - print(" ------------------------") - print(" Args for pyEnsSumPop : ") - print(" ------------------------") - print(" pyEnsSumPop.py") - print(" -h : prints out this usage message") - print(" --verbose : prints out in verbose mode (off by default)") - print( - " --sumfile : the output summary data file (default = pop.ens.summary.nc)" - ) - print( - " --indir : directory containing all of the ensemble runs (default = ./)" - ) - # print ' --npert : Number of ensemble members (default = 40)' - print(" --esize : Number of ensemble members (default = 40)") - print(" (Note: backwards compatible with --npert)") - print(" --tag : Tag name used in metadata (default = cesm2_0_0)") - print(" --compset : Compset used in metadata (default = G)") - print(" --res : Resolution (used in metadata) (default = T62_g17)") - print( - " --mach : Machine name used in the metadata (default = cheyenne)" - ) - print( - " --tslice : the time slice of the variable that we will use (default = 0)" - ) - print(" --nyear : Number of years (default = 1)") - print(" --nmonth : Number of months (default = 12)") - print( - " --jsonfile : Jsonfile to provide that a list of variables that will be" - ) - print( - " included (RECOMMENDED: default = pop_ensemble.json)" - ) - print( - " --mpi_disable : Disable mpi mode to run in serial (off by default)" - ) - print(" ") - - -# -# Random pick up three files out of a lot files -# -def Random_pickup(ifiles, opts_dict): - if opts_dict["numRunFile"] > opts_dict["eet"]: - nFiles = opts_dict["numRunFile"] - else: - nFiles = opts_dict["eet"] - if len(ifiles) > nFiles: - random_index = random.sample(list(range(0, len(ifiles))), nFiles) - else: - random_index = list(range(len(ifiles))) - new_ifiles = [] - print("Randomly pick input files:") - for i in random_index: - new_ifiles.append(ifiles[i]) - print(ifiles[i]) - - return new_ifiles - - -# -# Random pick up opts_dict['npick'] files out of a lot of OCN files -# -def Random_pickup_pop(indir, opts_dict, npick): - random_year_range = opts_dict["nyear"] - random_month_range = opts_dict["nmonth"] - random_case_range = opts_dict["esize"] - - pyear = 1 - pmonth = 12 - - pcase = random.sample(list(range(0, random_case_range)), npick) - - new_ifiles_temp = [] - not_pick_files = [] - for i in pcase: - wildname = ( - "*" - + str(i).zfill(4) - + "*" - + str(pyear).zfill(4) - + "-" - + str(pmonth).zfill(2) - + "*" - ) - print(wildname) - for filename in os.listdir(opts_dict["indir"]): - if fnmatch.fnmatch(filename, wildname): - new_ifiles_temp.append(filename) - for filename in os.listdir(opts_dict["indir"]): - if filename not in new_ifiles_temp: - not_pick_files.append(filename) - with open( - opts_dict["jsondir"] - + "random_testcase." - + str(npick) - + "." - + str(opts_dict["seq"]) - + ".json", - "wb", - ) as fout: - json.dump( - {"not_pick_files": not_pick_files}, - fout, - sort_keys=True, - indent=4, - ensure_ascii=True, - ) - print(sorted(new_ifiles_temp)) - print(sorted(not_pick_files)) - return sorted(new_ifiles_temp) - - -# -# Check the false positive rate -# (needs updating: this is only for ensemble 151) -def check_falsepositive(opts_dict, sum_index): - - fp = np.zeros((opts_dict["nPC"],), dtype=np.float32) - fp[0] = 0.30305 - fp[1] = 0.05069 - fp[2] = 0.005745 - fp[3] = 0.000435 - fp[4] = 5.0e-05 - nPC = 50 - sigMul = 2 - minPCFail = 3 - minRunFail = 2 - numRunFile = 3 - - if opts_dict["numRunFile"] > opts_dict["eet"]: - nFiles = opts_dict["numRunFile"] - else: - nFiles = opts_dict["eet"] - - if ( - (nPC == opts_dict["nPC"]) - and (sigMul == opts_dict["sigMul"]) - and (minPCFail == opts_dict["minPCFail"]) - and (minRunFail == opts_dict["minRunFail"]) - and (numRunFile == nFiles) - ): - false_positive = fp[len(sum_index) - 1] - else: - false_positive = 1.0 - - return false_positive - - -# -# Get the shape of all variable list in tuple for all processor -# -def get_shape(shape_tuple, shape1, rank): - lst = list(shape_tuple) - lst[0] = shape1 - shape_tuple = tuple(lst) - return shape_tuple - - -# -# Get the mpi partition list for each processor -# -def get_stride_list(len_of_list, me): - slice_index = [] - for i in range(me.get_size()): - index_arr = np.arange(len_of_list) - slice_index.append(index_arr[i :: me.get_size()]) - return slice_index - - -# -# Gather arrays from each processor by the file_list to the master processor and make it an array -# -def gather_npArray_pop(npArray, me, array_shape): - - the_array = np.zeros(array_shape, dtype=np.float32) - - if me.get_rank() == 0: - j = me.get_rank() - if len(array_shape) == 1: - the_array[j] = npArray[0] - elif len(array_shape) == 2: - the_array[j, :] = npArray[:] - elif len(array_shape) == 3: - the_array[j, :, :] = npArray[:, :] - elif len(array_shape) == 4: - the_array[j, :, :, :] = npArray[:, :, :] - elif len(array_shape) == 5: - the_array[j, :, :, :, :] = npArray[:, :, :, :] - for i in range(1, me.get_size()): - if me.get_rank() == 0: - rank, npArray = me.collect() - if len(array_shape) == 1: - the_array[rank] = npArray[0] - elif len(array_shape) == 2: - the_array[rank, :] = npArray[:] - elif len(array_shape) == 3: - the_array[rank, :, :] = npArray[:, :] - elif len(array_shape) == 4: - the_array[rank, :, :, :] = npArray[:, :, :] - elif len(array_shape) == 5: - the_array[rank, :, :, :, :] = npArray[:, :, :, :] - if me.get_rank() != 0: - message = {"from_rank": me.get_rank(), "shape": npArray.shape} - me.collect(npArray) - me.sync() - return the_array - - -# -# Use input files from opts_dict['input_globs'] to get timeslices for pop ensemble -# -def get_files_from_glob(opts_dict): - in_files = [] - wildname = "*" + str(opts_dict["input_globs"]) + "*" - if os.path.exists(opts_dict["indir"]): - full_glob_str = os.path.join(opts_dict["indir"], wildname) - glob_files = glob.glob(full_glob_str) - in_files.extend(glob_files) - in_files.sort() - else: - print("ERROR: Input directory does not exist => EXITING....") - sys.exit() - n_timeslice = [] - for fname in in_files: - istr = fname.find(".nc") - temp = ( - (int(fname[istr - 7 : istr - 3]) - 1) * 12 + int(fname[istr - 2 : istr]) - 1 - ) - n_timeslice.append(temp) - return n_timeslice, in_files - - -# -# POP-ECT Compare the testcase with the ensemble summary file -# ifiles are not open -def pop_compare_raw_score(opts_dict, ifiles, timeslice, Var3d, Var2d): - rmask_var = "REGION_MASK" - if not opts_dict["test_failure"]: - nbin = opts_dict["nbin"] - else: - nbin = 1 - Zscore = np.zeros((len(Var3d) + len(Var2d), len(ifiles), (nbin)), dtype=np.float32) - Zscore_tran = np.zeros( - (len(ifiles), len(Var3d) + len(Var2d), (nbin)), dtype=np.float32 - ) - failure_count = np.zeros((len(ifiles)), dtype=np.int) - sum_file = nc.Dataset(opts_dict["sumfile"], "r") - for k, v in sum_file.variables.items(): - if k == "ens_stddev2d": - ens_stddev2d = v - elif k == "ens_avg2d": - ens_avg2d = v - elif k == "ens_stddev3d": - ens_stddev3d = v - elif k == "ens_avg3d": - ens_avg3d = v - elif k == "time": - ens_time = v - - # check time slice 0 for zeros....indicating an incomplete summary file - sum_problem = False - all_zeros = not np.any(ens_stddev2d[0, :, :]) - if all_zeros: - print("ERROR: ens_stddev2d field in summary file was not computed.") - sum_problem = True - - all_zeros = not np.any(ens_avg2d[0, :, :]) - if all_zeros: - print("ERROR: ens_avg2d field in summary file was not computed.") - sum_problem = True - - all_zeros = not np.any(ens_stddev3d[0, :, :, :]) - if all_zeros: - print("ERROR: ens_stddev3d field in summary file was not computed.") - sum_problem = True - - all_zeros = not np.any(ens_avg3d[0, :, :, :]) - if all_zeros: - print("ERROR: ens_avg3d field in summary file was not computed.") - sum_problem = True - - if sum_problem: - print("=> EXITING....") - sys.exit() - - npts3d = 0 - npts2d = 0 - is_SE = False - ens_timeslice = len(ens_time) - - # Get the exact month from the file names - n_timeslice = [] - in_file_names = [] - if not opts_dict["mpi_enable"]: - n_timeslice, in_file_names = get_files_from_glob(opts_dict) - # print in_file_names - temp_list = [] - for i in n_timeslice: - temp_list.append(i + 1) - print("STATUS: Checkpoint month(s) = ", temp_list) - - # Compare an individual file with ensemble summary file to get zscore - for fcount, fid in enumerate(ifiles): - print(" ") - # If not in mpi_enable mode, the timeslice will be decided by the month of the input files - if not opts_dict["mpi_enable"]: - timeslice = n_timeslice[fcount] - - o_fid = nc.Dataset(fid, "r") - otimeSeries = o_fid.variables - rmask = otimeSeries[rmask_var] - - print( - "**********" - + "Run " - + str(fcount + 1) - + " (file=" - + in_file_names[fcount] - + "):" - ) - - if timeslice >= ens_timeslice: - print( - "WARNING: The summary file containing only ", - ens_timeslice, - " timeslices. Skipping this run evaluation...", - ) - continue - for vcount, var_name in enumerate(Var3d): - orig = otimeSeries[var_name][0] - FillValue = otimeSeries[var_name]._FillValue - Zscore[vcount, fcount, :], has_zscore = calculate_raw_score( - var_name, - orig, - npts3d, - npts2d, - ens_avg3d[timeslice][vcount], - ens_stddev3d[timeslice][vcount], - is_SE, - opts_dict, - FillValue, - 0, - rmask, - ) - if opts_dict["test_failure"]: - temp = Zscore[vcount, fcount, 0] - print( - " " - + "{:>10}".format(var_name) - + ": " - + "{:.2%}".format(temp) - ) - if Zscore[vcount, fcount, :] < opts_dict["pop_threshold"]: - failure_count[fcount] = failure_count[fcount] + 1 - - for vcount, var_name in enumerate(Var2d): - orig = otimeSeries[var_name][0] - FillValue = otimeSeries[var_name]._FillValue - # print var_name,timeslice - Zscore[vcount + len(Var3d), fcount, :], has_zscore = calculate_raw_score( - var_name, - orig, - npts3d, - npts2d, - ens_avg2d[timeslice][vcount], - ens_stddev2d[timeslice][vcount], - is_SE, - opts_dict, - FillValue, - 0, - rmask, - ) - if opts_dict["test_failure"]: - temp = Zscore[vcount + len(Var3d), fcount, 0] - print( - " " - + "{:>10}".format(var_name) - + ": " - + "{:.2%}".format(temp) - ) - if Zscore[vcount + len(Var3d), fcount, :] < opts_dict["pop_threshold"]: - failure_count[fcount] = failure_count[fcount] + 1 - - if failure_count[fcount] > 0: - print( - "**********" - + str(failure_count[fcount]) - + " of " - + str(len(Var3d) + len(Var2d)) - + " variables failed, resulting in an overall FAIL" - + "**********" - ) - else: - print( - "**********" - + str(failure_count[fcount]) - + " of " - + str(len(Var3d) + len(Var2d)) - + " variables failed, resulting in an overall PASS" - + "**********" - ) - - o_fid.close() - - sum_file.close() - if has_zscore: - return Zscore, n_timeslice - else: - Zscore = 0 - return Zscore, n_timeslice - - -# Get the deficit row number of the standardized global mean matrix -# (AB: no longer used...) -def get_failure_index(the_array): - mat_rows = the_array.shape[0] - mat_cols = the_array.shape[1] - - mat_rank = np.linalg.matrix_rank(the_array) - deficit = mat_rows - mat_rank - deficit_row = [] - x = 0 - while deficit > 0: - for i in range(mat_rows): - temp_mat = np.delete(the_array, i, axis=0) - new_rank = np.linalg.matrix_rank(temp_mat) - if new_rank == mat_rank: - # print "removing row ", i - if len(deficit_row) != 0: - # print "deficit_row=",deficit_row - x = i - for num, j in enumerate(deficit_row): - if j - num <= i: - # print "j=",j,"i=",i - x = x + 1 - deficit_row.append(x) - else: - deficit_row.append(i) - - the_array = temp_mat - mat_rows = the_array.shape[0] - mat_rank = new_rank - deficit = mat_rows - mat_rank - break - return deficit_row - - -# -# Alternative method to get the linearly dependent rows (using QR for faster perf) -# -def get_dependent_vars_index(a_mat, orig_rank): - - # initialize - dv_index = [] - - # the_array is nvars x nens - nvars = a_mat.shape[0] - - if orig_rank < nvars: - # transpose so vars are the columns - t_mat = a_mat.transpose() - # now do a rank-revealing qr (pivots for stability) - q_mat, r_mat, piv = sla.qr(t_mat, pivoting=True) - # rank = num of nonzero diag of r - r_mat_d = np.fabs(r_mat.diagonal()) - # print r_mat_d - # AB: 4/1/19: instead of an arbitrary tolerance here, we'll just remove vars according to the - # tolerance from the rank calculation already done - rank_est = orig_rank - ind_vars_index = piv[0:rank_est] - dv_index = piv[rank_est:] - - return dv_index - - -def chunk(it, size): - it = iter(it) - return iter(lambda: tuple(islice(it, size)), ()) diff --git a/tools/statistical_ensemble_test/pyCECT/pyEnsSum.py b/tools/statistical_ensemble_test/pyCECT/pyEnsSum.py deleted file mode 100644 index 935e96fde08..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/pyEnsSum.py +++ /dev/null @@ -1,617 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -import configparser -import sys, getopt, os -import numpy as np -import netCDF4 as nc -import time -import re -from asaptools.partition import EqualStride, Duplicate, EqualLength -import asaptools.simplecomm as simplecomm -import pyEnsLib - -# This routine creates a summary file from an ensemble of CAM -# output files - - -def main(argv): - - # Get command line stuff and store in a dictionary - s = "tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex= mpi_disable" - optkeys = s.split() - try: - opts, args = getopt.getopt(argv, "h", optkeys) - except getopt.GetoptError: - pyEnsLib.EnsSum_usage() - sys.exit(2) - - # Put command line options in a dictionary - also set defaults - opts_dict = {} - - # Defaults - opts_dict["tag"] = "cesm2_0" - opts_dict["compset"] = "F2000climo" - opts_dict["mach"] = "cheyenne" - opts_dict["esize"] = 350 - opts_dict["tslice"] = 1 - opts_dict["res"] = "f19_f19" - opts_dict["sumfile"] = "ens.summary.nc" - opts_dict["indir"] = "./" - opts_dict["sumfiledir"] = "./" - opts_dict["jsonfile"] = "exclude_empty.json" - opts_dict["verbose"] = False - opts_dict["mpi_enable"] = True - opts_dict["mpi_disable"] = False - opts_dict["maxnorm"] = False - opts_dict["gmonly"] = True - opts_dict["popens"] = False - opts_dict["cumul"] = False - opts_dict["regx"] = "test" - opts_dict["startMon"] = 1 - opts_dict["endMon"] = 1 - opts_dict["fIndex"] = 151 - - # This creates the dictionary of input arguments - opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, "ES", opts_dict) - - verbose = opts_dict["verbose"] - - st = opts_dict["esize"] - esize = int(st) - - if opts_dict["popens"] == True: - print( - "ERROR: Please use pyEnsSumPop.py for a POP ensemble (not --popens) => EXITING...." - ) - sys.exit() - - if not ( - opts_dict["tag"] - and opts_dict["compset"] - and opts_dict["mach"] - or opts_dict["res"] - ): - print( - "ERROR: Please specify --tag, --compset, --mach and --res options => EXITING...." - ) - sys.exit() - - if opts_dict["mpi_disable"] == True: - opts_dict["mpi_enable"] = False - - # Now find file names in indir - input_dir = opts_dict["indir"] - # The var list that will be excluded - ex_varlist = [] - inc_varlist = [] - - # Create a mpi simplecomm object - if opts_dict["mpi_enable"]: - me = simplecomm.create_comm() - else: - me = simplecomm.create_comm(not opts_dict["mpi_enable"]) - - if me.get_rank() == 0: - print("STATUS: Running pyEnsSum.py") - - if me.get_rank() == 0 and (verbose == True): - print(opts_dict) - print("STATUS: Ensemble size for summary = ", esize) - - exclude = False - if me.get_rank() == 0: - if opts_dict["jsonfile"]: - inc_varlist = [] - # Read in the excluded or included var list - ex_varlist, exclude = pyEnsLib.read_jsonlist(opts_dict["jsonfile"], "ES") - if exclude == False: - inc_varlist = ex_varlist - ex_varlist = [] - - # Broadcast the excluded var list to each processor - if opts_dict["mpi_enable"]: - exclude = me.partition(exclude, func=Duplicate(), involved=True) - if exclude: - ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True) - else: - inc_varlist = me.partition(inc_varlist, func=Duplicate(), involved=True) - - in_files = [] - if os.path.exists(input_dir): - # Get the list of files - in_files_temp = os.listdir(input_dir) - in_files = sorted(in_files_temp) - - # Make sure we have enough - num_files = len(in_files) - if me.get_rank() == 0 and (verbose == True): - print("VERBOSE: Number of files in input directory = ", num_files) - if num_files < esize: - if me.get_rank() == 0 and (verbose == True): - print( - "VERBOSE: Number of files in input directory (", - num_files, - ") is less than specified ensemble size of ", - esize, - ) - sys.exit(2) - if num_files > esize: - if me.get_rank() == 0 and (verbose == True): - print( - "VERBOSE: Note that the number of files in ", - input_dir, - "is greater than specified ensemble size of ", - esize, - "\nwill just use the first ", - esize, - "files", - ) - else: - if me.get_rank() == 0: - print("ERROR: Input directory: ", input_dir, " not found") - sys.exit(2) - - if opts_dict["cumul"]: - if opts_dict["regx"]: - in_files_list = get_cumul_filelist( - opts_dict, opts_dict["indir"], opts_dict["regx"] - ) - in_files = me.partition(in_files_list, func=EqualLength(), involved=True) - if me.get_rank() == 0 and (verbose == True): - print("VERBOSE: in_files = ", in_files) - - # Check full file names in input directory (don't open yet) - full_in_files = [] - if me.get_rank() == 0 and opts_dict["verbose"]: - print("VERBOSE: Input files are: ") - - for onefile in in_files[0:esize]: - fname = input_dir + "/" + onefile - if me.get_rank() == 0 and opts_dict["verbose"]: - print(fname) - if os.path.isfile(fname): - full_in_files.append(fname) - else: - if me.get_rank() == 0: - print("ERROR: Could not locate file ", fname, " => EXITING....") - sys.exit() - - # open just the first file - first_file = nc.Dataset(full_in_files[0], "r") - - # Store dimensions of the input fields - if me.get_rank() == 0 and (verbose == True): - print("VERBOSE: Getting spatial dimensions") - nlev = -1 - nilev = -1 - ncol = -1 - nlat = -1 - nlon = -1 - lonkey = "" - latkey = "" - # Look at first file and get dims - input_dims = first_file.dimensions - ndims = len(input_dims) - - for key in input_dims: - if key == "lev": - nlev = len(input_dims["lev"]) - elif key == "ilev": - nilev = len(input_dims["ilev"]) - elif key == "ncol": - ncol = len(input_dims["ncol"]) - elif (key == "nlon") or (key == "lon"): - nlon = len(input_dims[key]) - lonkey = key - elif (key == "nlat") or (key == "lat"): - nlat = len(input_dims[key]) - latkey = key - - if nlev == -1: - if me.get_rank() == 0: - print("ERROR: could not locate a valid dimension (lev) => EXITING....") - sys.exit() - - if (ncol == -1) and ((nlat == -1) or (nlon == -1)): - if me.get_rank() == 0: - print("ERROR: Need either lat/lon or ncol => EXITING....") - sys.exit() - - # Check if this is SE or FV data - if ncol != -1: - is_SE = True - else: - is_SE = False - - # output dimensions - if me.get_rank() == 0 and (verbose == True): - print("lev = ", nlev) - if is_SE == True: - print("ncol = ", ncol) - else: - print("nlat = ", nlat) - print("nlon = ", nlon) - - # Get 2d vars, 3d vars and all vars (For now include all variables) - vars_dict_all = first_file.variables - - # Remove the excluded variables (specified in json file) from variable dictionary - if exclude: - vars_dict = vars_dict_all - for i in ex_varlist: - if i in vars_dict: - del vars_dict[i] - # Given an included var list, remove all the variables that are not on the list - else: - vars_dict = vars_dict_all.copy() - for k, v in vars_dict_all.items(): - if (k not in inc_varlist) and (vars_dict_all[k].typecode() == "f"): - del vars_dict[k] - - num_vars = len(vars_dict) - - str_size = 0 - d2_var_names = [] - d3_var_names = [] - num_2d = 0 - num_3d = 0 - - # Which are 2d, which are 3d and max str_size - for k, v in vars_dict.items(): - var = k - vd = v.dimensions # all the variable's dimensions (names) - vr = len(v.dimensions) # num dimension - vs = v.shape # dim values - is_2d = False - is_3d = False - if is_SE == True: # (time, lev, ncol) or (time, ncol) - if (vr == 2) and (vs[1] == ncol): - is_2d = True - num_2d += 1 - elif (vr == 3) and (vs[2] == ncol and vs[1] == nlev): - is_3d = True - num_3d += 1 - else: # (time, lev, nlon, nlon) or (time, nlat, nlon) - if (vr == 3) and (vs[1] == nlat and vs[2] == nlon): - is_2d = True - num_2d += 1 - elif (vr == 4) and ( - vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1] == nilev) - ): - is_3d = True - num_3d += 1 - - if is_3d == True: - str_size = max(str_size, len(k)) - d3_var_names.append(k) - elif is_2d == True: - str_size = max(str_size, len(k)) - d2_var_names.append(k) - - if me.get_rank() == 0 and (verbose == True): - print("VERBOSE: Number of variables found: ", num_3d + num_2d) - print( - "VERBOSE: 3D variables: " + str(num_3d) + ", 2D variables: " + str(num_2d) - ) - - # Now sort these and combine (this sorts caps first, then lower case - - # which is what we want) - d2_var_names.sort() - d3_var_names.sort() - - if esize < num_2d + num_3d: - if me.get_rank() == 0: - print( - "************************************************************************************************************************************" - ) - print( - " ERROR: the total number of 3D and 2D variables " - + str(num_2d + num_3d) - + " is larger than the number of ensemble files " - + str(esize) - ) - print( - " Cannot generate ensemble summary file, please remove more variables from your included variable list," - ) - print( - " or add more variables in your excluded variable list => EXITING...." - ) - print( - "************************************************************************************************************************************" - ) - sys.exit() - # All vars is 3d vars first (sorted), the 2d vars - all_var_names = list(d3_var_names) - all_var_names += d2_var_names - n_all_var_names = len(all_var_names) - - # Rank 0 - Create new summary ensemble file - this_sumfile = opts_dict["sumfile"] - - # check if directory is valid - sum_dir = os.path.dirname(this_sumfile) - if len(sum_dir) == 0: - sum_dir = "." - if os.path.exists(sum_dir) == False: - if me.get_rank() == 0: - print("ERROR: Summary file directory: ", sum_dir, " not found") - sys.exit(2) - - if me.get_rank() == 0: - - if verbose == True: - print("VERBOSE: Creating ", this_sumfile, " ...") - - if os.path.exists(this_sumfile): - os.unlink(this_sumfile) - nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC") - - # Set dimensions - if verbose == True: - print("VERBOSE: Setting dimensions .....") - if is_SE == True: - nc_sumfile.createDimension("ncol", ncol) - else: - nc_sumfile.createDimension("nlat", nlat) - nc_sumfile.createDimension("nlon", nlon) - - nc_sumfile.createDimension("nlev", nlev) - nc_sumfile.createDimension("ens_size", esize) - nc_sumfile.createDimension("nvars", num_3d + num_2d) - nc_sumfile.createDimension("nvars3d", num_3d) - nc_sumfile.createDimension("nvars2d", num_2d) - nc_sumfile.createDimension("str_size", str_size) - - # Set global attributes - now = time.strftime("%c") - if verbose == True: - print("VERBOSE: Setting global attributes .....") - nc_sumfile.creation_date = now - nc_sumfile.title = "CAM verification ensemble summary file" - nc_sumfile.tag = opts_dict["tag"] - nc_sumfile.compset = opts_dict["compset"] - nc_sumfile.resolution = opts_dict["res"] - nc_sumfile.machine = opts_dict["mach"] - - # Create variables - if verbose == True: - print("VERBOSE: Creating variables .....") - v_lev = nc_sumfile.createVariable("lev", "f8", ("nlev",)) - v_vars = nc_sumfile.createVariable("vars", "S1", ("nvars", "str_size")) - v_var3d = nc_sumfile.createVariable("var3d", "S1", ("nvars3d", "str_size")) - v_var2d = nc_sumfile.createVariable("var2d", "S1", ("nvars2d", "str_size")) - - v_gm = nc_sumfile.createVariable("global_mean", "f8", ("nvars", "ens_size")) - v_standardized_gm = nc_sumfile.createVariable( - "standardized_gm", "f8", ("nvars", "ens_size") - ) - v_loadings_gm = nc_sumfile.createVariable( - "loadings_gm", "f8", ("nvars", "nvars") - ) - v_mu_gm = nc_sumfile.createVariable("mu_gm", "f8", ("nvars",)) - v_sigma_gm = nc_sumfile.createVariable("sigma_gm", "f8", ("nvars",)) - v_sigma_scores_gm = nc_sumfile.createVariable( - "sigma_scores_gm", "f8", ("nvars",) - ) - - # Assign vars, var3d and var2d - if verbose == True: - print("VERBOSE: Assigning vars, var3d, and var2d .....") - - eq_all_var_names = [] - eq_d3_var_names = [] - eq_d2_var_names = [] - - l_eq = len(all_var_names) - for i in range(l_eq): - tt = list(all_var_names[i]) - l_tt = len(tt) - if l_tt < str_size: - extra = list(" ") * (str_size - l_tt) - tt.extend(extra) - eq_all_var_names.append(tt) - - l_eq = len(d3_var_names) - for i in range(l_eq): - tt = list(d3_var_names[i]) - l_tt = len(tt) - if l_tt < str_size: - extra = list(" ") * (str_size - l_tt) - tt.extend(extra) - eq_d3_var_names.append(tt) - - l_eq = len(d2_var_names) - for i in range(l_eq): - tt = list(d2_var_names[i]) - l_tt = len(tt) - if l_tt < str_size: - extra = list(" ") * (str_size - l_tt) - tt.extend(extra) - eq_d2_var_names.append(tt) - - v_vars[:] = eq_all_var_names[:] - v_var3d[:] = eq_d3_var_names[:] - v_var2d[:] = eq_d2_var_names[:] - - # Time-invarient metadata - if verbose == True: - print("VERBOSE: Assigning time invariant metadata .....") - # lev_data = np.zeros(num_lev,dtype=np.float64) - lev_data = first_file.variables["lev"] - v_lev[:] = lev_data[:] - # end of rank=0 work - - # All: - tslice = opts_dict["tslice"] - if not opts_dict["cumul"]: - # Partition the var list - var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True) - var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True) - else: - var3_list_loc = d3_var_names - var2_list_loc = d2_var_names - - # close first_file - first_file.close() - - # Calculate global means # - if me.get_rank() == 0 and (verbose == True): - print("VERBOSE: Calculating global means .....") - if not opts_dict["cumul"]: - gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary( - full_in_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict - ) - if me.get_rank() == 0 and (verbose == True): - print("VERBOSE: Finished calculating global means .....") - - # gather to rank = 0 - if opts_dict["mpi_enable"]: - - if not opts_dict["cumul"]: - # Gather the 3d variable results from all processors to the master processor - slice_index = get_stride_list(len(d3_var_names), me) - - # Gather global means 3d results - gm3d = gather_npArray( - gm3d, me, slice_index, (len(d3_var_names), len(full_in_files)) - ) - - # Gather 2d variable results from all processors to the master processor - slice_index = get_stride_list(len(d2_var_names), me) - - # Gather global means 2d results - gm2d = gather_npArray( - gm2d, me, slice_index, (len(d2_var_names), len(full_in_files)) - ) - - # gather variables ro exclude (in pre_pca) - var_list = gather_list(var_list, me) - - else: - gmall = np.concatenate((temp1, temp2), axis=0) - gmall = pyEnsLib.gather_npArray_pop( - gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names)) - ) - - # rank =0 : complete calculations for summary file - if me.get_rank() == 0: - if not opts_dict["cumul"]: - gmall = np.concatenate((gm3d, gm2d), axis=0) - else: - gmall_temp = np.transpose(gmall[:, :]) - gmall = gmall_temp - - # PCA prep and calculation - ( - mu_gm, - sigma_gm, - standardized_global_mean, - loadings_gm, - scores_gm, - b_exit, - ) = pyEnsLib.pre_PCA(gmall, all_var_names, var_list, me) - - # if PCA calc encounters an error, then remove the summary file and exit - if b_exit: - nc_sumfile.close() - os.unlink(this_sumfile) - print("STATUS: Summary could not be created.") - sys.exit(2) - - v_gm[:, :] = gmall[:, :] - v_standardized_gm[:, :] = standardized_global_mean[:, :] - v_mu_gm[:] = mu_gm[:] - v_sigma_gm[:] = sigma_gm[:] - v_loadings_gm[:, :] = loadings_gm[:, :] - v_sigma_scores_gm[:] = scores_gm[:] - - print("STATUS: Summary file is complete.") - - nc_sumfile.close() - - -def get_cumul_filelist(opts_dict, indir, regx): - if not opts_dict["indir"]: - print("input dir is not specified") - sys.exit(2) - # regx='(pgi(.)*-(01|02))' - regx_list = ["mon", "gnu", "pgi"] - all_files = [] - for prefix in regx_list: - for i in range( - opts_dict["fIndex"], opts_dict["fIndex"] + opts_dict["esize"] / 3 - ): - for j in range(opts_dict["startMon"], opts_dict["endMon"] + 1): - mon_str = str(j).zfill(2) - regx = "(^" + prefix + "(.)*" + str(i) + "(.)*-(" + mon_str + "))" - # print 'regx=',regx - res = [f for f in os.listdir(indir) if re.search(regx, f)] - in_files = sorted(res) - all_files.extend(in_files) - # print "all_files=",all_files - # in_files=res - return all_files - - -# -# Get the shape of all variable list in tuple for all processor -# -def get_shape(shape_tuple, shape1, rank): - lst = list(shape_tuple) - lst[0] = shape1 - shape_tuple = tuple(lst) - return shape_tuple - - -# -# Get the mpi partition list for each processor -# -def get_stride_list(len_of_list, me): - slice_index = [] - for i in range(me.get_size()): - index_arr = np.arange(len_of_list) - slice_index.append(index_arr[i :: me.get_size()]) - return slice_index - - -def gather_list(var_list, me): - whole_list = [] - if me.get_rank() == 0: - whole_list.extend(var_list) - for i in range(1, me.get_size()): - if me.get_rank() == 0: - rank_id, var_list = me.collect() - whole_list.extend(var_list) - if me.get_rank() != 0: - me.collect(var_list) - me.sync() - return whole_list - - -# -# Gather arrays from each processor by the var_list to the master processor and make it an array -# -def gather_npArray(npArray, me, slice_index, array_shape): - # the_array=np.zeros(array_shape,dtype=np.float32) - the_array = np.zeros(array_shape, dtype=np.float64) - if me.get_rank() == 0: - k = 0 - for j in slice_index[me.get_rank()]: - the_array[j, :] = npArray[k, :] - k = k + 1 - for i in range(1, me.get_size()): - if me.get_rank() == 0: - rank, npArray = me.collect() - k = 0 - for j in slice_index[rank]: - the_array[j, :] = npArray[k, :] - k = k + 1 - if me.get_rank() != 0: - message = {"from_rank": me.get_rank(), "shape": npArray.shape} - me.collect(npArray) - me.sync() - return the_array - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/tools/statistical_ensemble_test/pyCECT/pyEnsSumPop.py b/tools/statistical_ensemble_test/pyCECT/pyEnsSumPop.py deleted file mode 100644 index 483c9e4476d..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/pyEnsSumPop.py +++ /dev/null @@ -1,371 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -import configparser -import sys, getopt, os -import numpy as np -import netCDF4 as nc -import time -import re -from asaptools.partition import EqualStride, Duplicate -import asaptools.simplecomm as simplecomm -import pyEnsLib - - -def main(argv): - - # Get command line stuff and store in a dictionary - s = "nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable mpi_disable nrand= rand seq= jsondir= esize=" - optkeys = s.split() - try: - opts, args = getopt.getopt(argv, "h", optkeys) - except getopt.GetoptError: - pyEnsLib.EnsSumPop_usage() - sys.exit(2) - - # Put command line options in a dictionary - also set defaults - opts_dict = {} - - # Defaults - opts_dict["tag"] = "cesm2_1_0" - opts_dict["compset"] = "G" - opts_dict["mach"] = "cheyenne" - opts_dict["tslice"] = 0 - opts_dict["nyear"] = 1 - opts_dict["nmonth"] = 12 - opts_dict["esize"] = 40 - opts_dict["npert"] = 0 # for backwards compatible - opts_dict["nbin"] = 40 - opts_dict["minrange"] = 0.0 - opts_dict["maxrange"] = 4.0 - opts_dict["res"] = "T62_g17" - opts_dict["sumfile"] = "pop.ens.summary.nc" - opts_dict["indir"] = "./" - opts_dict["jsonfile"] = "pop_ensemble.json" - opts_dict["verbose"] = True - opts_dict["mpi_enable"] = True - opts_dict["mpi_disable"] = False - # opts_dict['zscoreonly'] = True - opts_dict["popens"] = True - opts_dict["nrand"] = 40 - opts_dict["rand"] = False - opts_dict["seq"] = 0 - opts_dict["jsondir"] = "./" - - # This creates the dictionary of input arguments - # print "before parseconfig" - opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, "ESP", opts_dict) - - verbose = opts_dict["verbose"] - nbin = opts_dict["nbin"] - - if opts_dict["mpi_disable"]: - opts_dict["mpi_enable"] = False - - # still have npert for backwards compatibility - check if it was set - # and override esize - if opts_dict["npert"] > 0: - user_size = opts_dict["npert"] - print( - "WARNING: User specified value for --npert will override --esize. Please consider using --esize instead of --npert in the future." - ) - opts_dict["esize"] = user_size - - # Now find file names in indir - input_dir = opts_dict["indir"] - - # Create a mpi simplecomm object - if opts_dict["mpi_enable"]: - me = simplecomm.create_comm() - else: - me = simplecomm.create_comm(False) - - if opts_dict["jsonfile"]: - # Read in the included var list - Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict["jsonfile"], "ESP") - str_size = 0 - for str in Var3d: - if str_size < len(str): - str_size = len(str) - for str in Var2d: - if str_size < len(str): - str_size = len(str) - - if me.get_rank() == 0: - print("STATUS: Running pyEnsSumPop!") - - if verbose: - print("VERBOSE: opts_dict = ") - print(opts_dict) - - in_files = [] - if os.path.exists(input_dir): - # Pick up the 'nrand' random number of input files to generate summary files - if opts_dict["rand"]: - in_files = pyEnsLib.Random_pickup_pop( - input_dir, opts_dict, opts_dict["nrand"] - ) - else: - # Get the list of files - in_files_temp = os.listdir(input_dir) - in_files = sorted(in_files_temp) - num_files = len(in_files) - - else: - if me.get_rank() == 0: - print("ERROR: Input directory: ", input_dir, " not found => EXITING....") - sys.exit(2) - - # make sure we have enough files - files_needed = opts_dict["nmonth"] * opts_dict["esize"] * opts_dict["nyear"] - if num_files < files_needed: - if me.get_rank() == 0: - print( - "ERROR: Input directory does not contain enough files (must be esize*nyear*nmonth = ", - files_needed, - " ) and it has only ", - num_files, - " files).", - ) - sys.exit(2) - - # Partition the input file list (ideally we have one processor per month) - in_file_list = me.partition(in_files, func=EqualStride(), involved=True) - - # Check the files in the input directory - full_in_files = [] - if me.get_rank() == 0 and opts_dict["verbose"]: - print("VERBOSE: Input files are:") - - for onefile in in_file_list: - fname = input_dir + "/" + onefile - if opts_dict["verbose"]: - print("my_rank = ", me.get_rank(), " ", fname) - if os.path.isfile(fname): - full_in_files.append(fname) - else: - print("ERROR: Could not locate file: " + fname + " => EXITING....") - sys.exit() - - # open just the first file (all procs) - first_file = nc.Dataset(full_in_files[0], "r") - - # Store dimensions of the input fields - if (verbose == True) and me.get_rank() == 0: - print("VERBOSE: Getting spatial dimensions") - nlev = -1 - nlat = -1 - nlon = -1 - - # Look at first file and get dims - input_dims = first_file.dimensions - ndims = len(input_dims) - - # Make sure all files have the same dimensions - if (verbose == True) and me.get_rank() == 0: - print("VERBOSE: Checking dimensions ...") - for key in input_dims: - if key == "z_t": - nlev = len(input_dims["z_t"]) - elif key == "nlon": - nlon = len(input_dims["nlon"]) - elif key == "nlat": - nlat = len(input_dims["nlat"]) - - # Rank 0: prepare new summary ensemble file - this_sumfile = opts_dict["sumfile"] - if me.get_rank() == 0: - if os.path.exists(this_sumfile): - os.unlink(this_sumfile) - - if verbose: - print("VERBOSE: Creating ", this_sumfile, " ...") - - nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC") - - # Set dimensions - if verbose: - print("VERBOSE: Setting dimensions .....") - nc_sumfile.createDimension("nlat", nlat) - nc_sumfile.createDimension("nlon", nlon) - nc_sumfile.createDimension("nlev", nlev) - nc_sumfile.createDimension("time", None) - nc_sumfile.createDimension("ens_size", opts_dict["esize"]) - nc_sumfile.createDimension("nbin", opts_dict["nbin"]) - nc_sumfile.createDimension("nvars", len(Var3d) + len(Var2d)) - nc_sumfile.createDimension("nvars3d", len(Var3d)) - nc_sumfile.createDimension("nvars2d", len(Var2d)) - nc_sumfile.createDimension("str_size", str_size) - - # Set global attributes - now = time.strftime("%c") - if verbose: - print("VERBOSE: Setting global attributes .....") - nc_sumfile.creation_date = now - nc_sumfile.title = "POP verification ensemble summary file" - nc_sumfile.tag = opts_dict["tag"] - nc_sumfile.compset = opts_dict["compset"] - nc_sumfile.resolution = opts_dict["res"] - nc_sumfile.machine = opts_dict["mach"] - - # Create variables - if verbose: - print("VERBOSE: Creating variables .....") - v_lev = nc_sumfile.createVariable("z_t", "f", ("nlev",)) - v_vars = nc_sumfile.createVariable("vars", "S1", ("nvars", "str_size")) - v_var3d = nc_sumfile.createVariable("var3d", "S1", ("nvars3d", "str_size")) - v_var2d = nc_sumfile.createVariable("var2d", "S1", ("nvars2d", "str_size")) - v_time = nc_sumfile.createVariable("time", "d", ("time",)) - v_ens_avg3d = nc_sumfile.createVariable( - "ens_avg3d", "f", ("time", "nvars3d", "nlev", "nlat", "nlon") - ) - v_ens_stddev3d = nc_sumfile.createVariable( - "ens_stddev3d", "f", ("time", "nvars3d", "nlev", "nlat", "nlon") - ) - v_ens_avg2d = nc_sumfile.createVariable( - "ens_avg2d", "f", ("time", "nvars2d", "nlat", "nlon") - ) - v_ens_stddev2d = nc_sumfile.createVariable( - "ens_stddev2d", "f", ("time", "nvars2d", "nlat", "nlon") - ) - v_RMSZ = nc_sumfile.createVariable( - "RMSZ", "f", ("time", "nvars", "ens_size", "nbin") - ) - - # Assign vars, var3d and var2d - if verbose: - print("VERBOSE: Assigning vars, var3d, and var2d .....") - - eq_all_var_names = [] - eq_d3_var_names = [] - eq_d2_var_names = [] - all_var_names = list(Var3d) - all_var_names += Var2d - l_eq = len(all_var_names) - for i in range(l_eq): - tt = list(all_var_names[i]) - l_tt = len(tt) - if l_tt < str_size: - extra = list(" ") * (str_size - l_tt) - tt.extend(extra) - eq_all_var_names.append(tt) - - l_eq = len(Var3d) - for i in range(l_eq): - tt = list(Var3d[i]) - l_tt = len(tt) - if l_tt < str_size: - extra = list(" ") * (str_size - l_tt) - tt.extend(extra) - eq_d3_var_names.append(tt) - - l_eq = len(Var2d) - for i in range(l_eq): - tt = list(Var2d[i]) - l_tt = len(tt) - if l_tt < str_size: - extra = list(" ") * (str_size - l_tt) - tt.extend(extra) - eq_d2_var_names.append(tt) - - v_vars[:] = eq_all_var_names[:] - v_var3d[:] = eq_d3_var_names[:] - v_var2d[:] = eq_d2_var_names[:] - - # Time-invarient metadata - if verbose: - print("VERBOSE: Assigning time invariant metadata .....") - vars_dict = first_file.variables - lev_data = vars_dict["z_t"] - v_lev[:] = lev_data[:] - - # end of rank 0 - - # All: - # Time-varient metadata - if verbose: - if me.get_rank() == 0: - print("VERBOSE: Assigning time variant metadata .....") - vars_dict = first_file.variables - time_value = vars_dict["time"] - time_array = np.array([time_value]) - time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(),)) - if me.get_rank() == 0: - v_time[:] = time_array[:] - - # Assign zero values to first time slice of RMSZ and avg and stddev for 2d & 3d - # in case of a calculation problem before finishing - e_size = opts_dict["esize"] - b_size = opts_dict["nbin"] - z_ens_avg3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32) - z_ens_stddev3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32) - z_ens_avg2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32) - z_ens_stddev2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32) - z_RMSZ = np.zeros(((len(Var3d) + len(Var2d)), e_size, b_size), dtype=np.float32) - - # rank 0 (put zero values in summary file) - if me.get_rank() == 0: - v_RMSZ[0, :, :, :] = z_RMSZ[:, :, :] - v_ens_avg3d[0, :, :, :, :] = z_ens_avg3d[:, :, :, :] - v_ens_stddev3d[0, :, :, :, :] = z_ens_stddev3d[:, :, :, :] - v_ens_avg2d[0, :, :, :] = z_ens_avg2d[:, :, :] - v_ens_stddev2d[0, :, :, :] = z_ens_stddev2d[:, :, :] - - # close file[0] - first_file.close() - - # Calculate RMSZ scores - if verbose == True and me.get_rank() == 0: - print("VERBOSE: Calculating RMSZ scores .....") - - ( - zscore3d, - zscore2d, - ens_avg3d, - ens_stddev3d, - ens_avg2d, - ens_stddev2d, - ) = pyEnsLib.calc_rmsz(full_in_files, Var3d, Var2d, opts_dict) - - if verbose == True and me.get_rank() == 0: - print("VERBOSE: Finished with RMSZ scores .....") - - # Collect from all processors - if opts_dict["mpi_enable"]: - # Gather the 3d variable results from all processors to the master processor - - zmall = np.concatenate((zscore3d, zscore2d), axis=0) - zmall = pyEnsLib.gather_npArray_pop( - zmall, - me, - (me.get_size(), len(Var3d) + len(Var2d), len(full_in_files), nbin), - ) - - ens_avg3d = pyEnsLib.gather_npArray_pop( - ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon) - ) - ens_avg2d = pyEnsLib.gather_npArray_pop( - ens_avg2d, me, (me.get_size(), len(Var2d), (nlat), nlon) - ) - ens_stddev3d = pyEnsLib.gather_npArray_pop( - ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon) - ) - ens_stddev2d = pyEnsLib.gather_npArray_pop( - ens_stddev2d, me, (me.get_size(), len(Var2d), (nlat), nlon) - ) - - # Assign to summary file: - if me.get_rank() == 0: - - v_RMSZ[:, :, :, :] = zmall[:, :, :, :] - v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :] - v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :] - v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :] - v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :] - - print("STATUS: PyEnsSumPop has completed.") - - nc_sumfile.close() - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/tools/statistical_ensemble_test/pyCECT/pyPlots.py b/tools/statistical_ensemble_test/pyCECT/pyPlots.py deleted file mode 100644 index bd0a4e8d86d..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/pyPlots.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python - -import xarray as xr -import matplotlib.pyplot as plt -import numpy as np -import seaborn as sns - -# change to input argument later -filename = "savefile.nc" -test = "hr_mpeO3_9ts" -ptest = " (" + test + ")" - -ds = xr.open_dataset(filename) - -test_size = ds.dims["test_size"] -ens_size = ds.dims["ens_size"] -nvars = ds.dims["nvars"] - -# get var list and names -vars = ds["vars"].values - -# get test scores and means -t_scores = ds["scores"].values -t_std_gm = ds["std_gm"].values - -# get ens scores distribution and ens means -ens_score_dist = ds["ens_sigma_scores"].values -ens_std_gm = ds["ens_std_gm"].values - -all_outside99 = [] -two_outside99 = [] -one_outside99 = [] -all_oneside_IQR = [] - -# go through each variables -for i, thisvar in enumerate(vars): - # print i - # print thisvar - # ensemble distribution information - p995 = np.percentile(ens_std_gm[i, :], 99.5) - p75 = np.percentile(ens_std_gm[i, :], 75) - p25 = np.percentile(ens_std_gm[i, :], 25) - p05 = np.percentile(ens_std_gm[i, :], 0.5) - # print p995 - - isout_995 = 0 - isout_75 = 0 - isout_25 = 0 - # go through the test cases - - # outside of 995 or all on one side? - for j in range(test_size): - # print j - thisval = t_std_gm[i, j] - if thisval > p995 or thisval < p05: - isout_995 = isout_995 + 1 - if thisval > p75: - isout_75 = isout_75 + 1 - if thisval < p25: - isout_25 = isout_25 + 1 - - if isout_995 == 1: - one_outside99.append(i) - elif isout_995 == 2: - two_outside99.append(i) - elif isout_995 == 3: - all_outside99.append(i) - - if isout_75 == 3 or isout_25 == 3: - all_oneside_IQR.append(i) - -num_one99 = len(one_outside99) -num_two99 = len(two_outside99) -num_all99 = len(all_outside99) -num_oneside = len(all_oneside_IQR) - -c = set(one_outside99) | set(two_outside99) | set(all_outside99) | set(all_oneside_IQR) -uni = len(c) - -print("total variables = ", nvars) -print("one test outside 99th percentile = ", num_one99) -print("two tests outside 99th percentile = ", num_two99) -print("three (all) tests outside 99th percentile = ", num_all99) -print("all tests on one side of IQR = ", num_oneside) -print("unique number of variables that fall into the above categories = ", uni) - -# now make plots -ens_list_array = [] -test_points = [] -flierprops = dict(marker="x", markerfacecolor="gray", markersize=1) -# all outside -if num_all99 > 0: - sf_name = "all_out99_" + test + ".png" - for i in all_outside99: - ens_list_array.append(ens_std_gm[i, :]) - test_points.append(t_std_gm[i, :]) - labels = vars[all_outside99] - f = plt.figure() - sns.boxplot(data=ens_list_array, flierprops=flierprops, whis=[0.5, 99.5]) - # sns.boxplot(data=ens_list_array, fliersize= 2.0) - sns.stripplot(data=test_points, jitter=True, color="r", size=3, marker="D") - plt.title("Variables with all (three) tests outside the 99th percentile" + ptest) - plt.ylabel("standardized global means") - plt.xticks(range(num_all99), labels, fontsize=8, rotation="vertical") - plt.subplots_adjust(bottom=0.2) - plt.savefig(sf_name, bbox_inches="tight") - f.clear() - plt.close(f) - -# two outside -if num_two99 > 0: - sf_name = "two_out99_" + test + ".png" - ens_list_array = [] - test_points = [] - for i in two_outside99: - ens_list_array.append(ens_std_gm[i, :]) - test_points.append(t_std_gm[i, :]) - labels = vars[two_outside99] - f = plt.figure() - sns.boxplot(data=ens_list_array, flierprops=flierprops, whis=[0.5, 99.5]) - # sns.boxplot(data=ens_list_array, fliersize= 2.0) - sns.stripplot(data=test_points, jitter=True, color="r", size=3, marker="D") - plt.title("Variables with two tests outside the 99th percentile" + ptest) - plt.ylabel("standardized global means") - plt.xticks(range(num_two99), labels, fontsize=8, rotation="vertical") - plt.subplots_adjust(bottom=0.2) - plt.savefig(sf_name, bbox_inches="tight") - f.clear() - plt.close(f) - -# one outside -if num_one99 > 0: - sf_name = "one_out99_" + test + ".png" - ens_list_array = [] - test_points = [] - for i in one_outside99: - ens_list_array.append(ens_std_gm[i, :]) - test_points.append(t_std_gm[i, :]) - labels = vars[one_outside99] - f = plt.figure() - sns.boxplot(data=ens_list_array, flierprops=flierprops, whis=[0.5, 99.5]) - # sns.boxplot(data=ens_list_array, fliersize= 2.0) - sns.stripplot(data=test_points, jitter=True, color="r", size=3, marker="D") - plt.title("Variables with one test outside the 99th percentile" + ptest) - plt.ylabel("standardized global means") - plt.xticks(range(num_one99), labels, fontsize=8, rotation="vertical") - plt.subplots_adjust(bottom=0.2) - plt.savefig(sf_name, bbox_inches="tight") - f.clear() - plt.close(f) - -# oneside -if num_oneside > 0: - sf_name = "oneside_IQR_" + test + ".png" - ens_list_array = [] - test_points = [] - for i in all_oneside_IQR: - ens_list_array.append(ens_std_gm[i, :]) - test_points.append(t_std_gm[i, :]) - labels = vars[all_oneside_IQR] - f = plt.figure() - sns.boxplot(data=ens_list_array, flierprops=flierprops, whis=[0.5, 99.5]) - # sns.boxplot(data=ens_list_array, fliersize= 2.0) - sns.stripplot(data=test_points, jitter=True, color="r", size=3, marker="D") - plt.title("Variables with all tests on one side of the IQR" + ptest) - plt.ylabel("standardized global means") - plt.xticks(range(num_oneside), labels, fontsize=8, rotation="vertical") - plt.subplots_adjust(bottom=0.2) - plt.savefig(sf_name, bbox_inches="tight") - f.clear() - plt.close(f) diff --git a/tools/statistical_ensemble_test/pyCECT/readthedocs.yml b/tools/statistical_ensemble_test/pyCECT/readthedocs.yml deleted file mode 100644 index 2f5d6d1c2fd..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/readthedocs.yml +++ /dev/null @@ -1,8 +0,0 @@ -python: - version: 3 - system_packages: true - install: - - requirements: docs/requirements.txt - - sphinx: - configuration: docs/conf.py diff --git a/tools/statistical_ensemble_test/pyCECT/test_pop_CECT.sh b/tools/statistical_ensemble_test/pyCECT/test_pop_CECT.sh deleted file mode 100644 index 8e2c3e55c80..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/test_pop_CECT.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/tcsh - -#PBS -A NIOW0001 -#PBS -N pop-CECT -#PBS -q regular -#PBS -l select=1:ncpus=1:mpiprocs=1 -#PBS -l walltime=0:15:00 -#PBS -j oe -#PBS -M abaker@ucar.edu - -setenv TMPDIR /glade/scratch/$USER/temp -mkdir -p $TMPDIR - - -python pyCECT.py --popens --sumfile /glade/p/cisl/asap//pycect_sample_data/pop_c2.0.b10/summary_files/pop.cesm2.0.b10.nc --indir /glade/p/cisl/asap//pycect_sample_data/pop_c2.0.b10/pop_test_files/C96 --jsonfile pop_ensemble.json --input_glob C96.pop.000.pop.h.0001-12 - -python pyCECT.py --popens --sumfile /glade/p/cisl/asap//pycect_sample_data/pop_c2.0.b10/summary_files/pop.cesm2.0.b10.nc --indir /glade/p/cisl/asap//pycect_sample_data/pop_c2.0.b10/pop_test_files/lw-lim --jsonfile pop_ensemble.json --input_glob lw-lim.pop.000.pop.h.0001-12 diff --git a/tools/statistical_ensemble_test/pyCECT/test_pyEnsSum.sh b/tools/statistical_ensemble_test/pyCECT/test_pyEnsSum.sh deleted file mode 100644 index d2cbfa66c6b..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/test_pyEnsSum.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/tcsh -#PBS -A NIOW0001 -#PBS -N ensSum -#PBS -q regular -#PBS -l select=8:ncpus=9:mpiprocs=9 -#PBS -l walltime=0:20:00 -#PBS -j oe -#PBS -M abaker@ucar.edu - -setenv TMPDIR /glade/scratch/$USER/temp -mkdir -p $TMPDIR - -mpiexec_mpt python pyEnsSum.py --esize 350 --indir /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/uf_cam_ens_files --sumfile uf.ens.c1.2.2.1_fc5.ne30.nc --tslice 1 --tag cesm1.2.2.1 --compset FC5 --res ne30_ne30 --mach cheyenne --verbose --jsonfile excluded_varlist.json diff --git a/tools/statistical_ensemble_test/pyCECT/test_pyEnsSumPop.sh b/tools/statistical_ensemble_test/pyCECT/test_pyEnsSumPop.sh deleted file mode 100644 index 89d3f620152..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/test_pyEnsSumPop.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/tcsh -#PBS -A NIOW0001 -#PBS -N ensSumPop -#PBS -q regular -#PBS -l select=4:ncpus=3:mpiprocs=3 -#PBS -l walltime=0:20:00 -#PBS -j oe -#PBS -M abaker@ucar.edu - - -mpiexec_mpt python pyEnsSumPop.py --verbose --indir /glade/p/cisl/asap/pycect_sample_data/pop_c2.0.b10/pop_ens_files --sumfile pop.cesm2.0.b10.nc --tslice 0 --nyear 1 --nmonth 12 --esize 40 --jsonfile pop_ensemble.json --mach cheyenne --compset G --tag cesm2_0_beta10 --res T62_g17 diff --git a/tools/statistical_ensemble_test/pyCECT/test_uf_cam_ect.sh b/tools/statistical_ensemble_test/pyCECT/test_uf_cam_ect.sh deleted file mode 100644 index 938f662f082..00000000000 --- a/tools/statistical_ensemble_test/pyCECT/test_uf_cam_ect.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/tcsh -#PBS -A NIOW0001 -#PBS -N UF-CAM-ECT -#PBS -q regular -#PBS -l select=1:ncpus=1:mpiprocs=1 -#PBS -l walltime=0:15:00 -#PBS -j oe -#PBS -M abaker@ucar.edu - -setenv TMPDIR /glade/scratch/$USER/temp -mkdir -p $TMPDIR - -python pyCECT.py --sumfile /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/summary_files/uf.ens.c1.2.2.1_fc5.ne30.nc --indir /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/uf_cam_test_files --tslice 1 --saveResults diff --git a/tools/statistical_ensemble_test/single_run.py b/tools/statistical_ensemble_test/single_run.py deleted file mode 100644 index fe182fb5b86..00000000000 --- a/tools/statistical_ensemble_test/single_run.py +++ /dev/null @@ -1,434 +0,0 @@ -#! /usr/bin/env python -from __future__ import print_function -import sys, getopt, os - -# -# Command options -# -def disp_usage(callType): - if callType == "ensemble.py": - print( - "\nSets up multiple CESM cases for either an ensemble of runs or a small (CAM-ECT = 3, POP-ECT = 1)" - ) - print("test set (default). Then use pyCECT utilities to create an ensemble") - print( - "summary file or to evaluate the small test set of runs against the ensemble." - ) - print(" ") - print("----------------------------") - print("ensemble.py :") - else: - print("\nSets up a single CESM case. ") - print(" ") - print("----------------------------") - print("single_run.py :") - print("----------------------------") - print(" ") - print("Required flags:") - if callType == "single_run.py": - print( - " --case Case name passed on to create_newcase (incl. full path AND same)" - ) - else: - print( - ' --case Case name passed on to create_newcase (incl. full path AND must end in ".000")' - ) - print(" --mach Machine name passed on to create_newcase") - print(" ") - print('Optional flags (+ all "--" options to create_newcase): ') - print(" --project Project number to charge in job scripts") - print( - " --ect Specify whether ensemble is for CAM-ECT or POP-ECT (default = cam)" - ) - if callType == "single_run.py": - print(" --pertlim Run (CAM or POP) with specified non-zero pertlim") - print( - " --walltime Amount of walltime requested (default = 4:30 (CAM-ECT) 2:00 (POP-ECT), or 0:10 with --uf enabled)" - ) - print(" --compiler Compiler to use (default = same as Machine default) ") - print( - " --compset Compset to use (default = F2000climo (CAM-ECT) or G (POP-ECT))" - ) - print( - " --res Resolution to run (default = f19_f19 (CAM-ECT) or T62_g17 (POP-ECT))" - ) - print( - " --uf Enable ninth time step runs (ultra-fast mode for CAM-ECT) - otherwise the default is 12-month runs" - ) - if callType == "ensemble.py": - print( - " --nb Disables auto building the root case of the ensemble" - ) - print( - " --ns Disables auto submitting any members of the ensemble" - ) - print( - " --ensemble Build the ensemble (instead of building case(s) with random pertlim values for verification)," - ) - print( - " and specify the number of ensemble members to generate (e.g.: 151 for CAM-ECT annual averages " - ) - print( - " or 350 for ultra-fast CAM-ECT mode or 40 for POP-ECT)" - ) - else: - print(" --nb Disables building (and submitting) the single case") - print(" --ns Disables submitting the single case") - print(" --help, -h Prints out this usage message") - - -######## -def process_args_dict(caller, caller_argv): - - # Pull in and analyze the command line arguements - s = "case= mach= project= compiler= compset= res= uf nb ns ensemble= verbose silent test multi-driver pecount= nist= mpilib= pesfile= gridfile= srcroot= output-root= script-root= queue= user-modes-dir= input-dir= pertlim= walltime= h ect=" - - optkeys = s.split() - - try: - opts, args = getopt.getopt(caller_argv, "hf:", optkeys) - - except getopt.GetoptError: - print("\nERROR: unrecognized command line argument") - disp_usage(caller) - sys.exit(2) - - # check for help - for opt, arg in opts: - if opt == "-h": - disp_usage(caller) - sys.exit() - - # opts_dict and defaults - opts_dict = {} - opts_dict["walltime"] = "00:00" - opts_dict["pertlim"] = "0" - opts_dict["nb"] = False - opts_dict["ns"] = False - opts_dict["uf"] = False - opts_dict["ensemble"] = 0 - opts_dict["ect"] = "cam" - # for create newcase - opts_dict["verbose"] = False - opts_dict["silent"] = False - opts_dict["test"] = False - opts_dict["multi-driver"] = False - opts_dict["case"] = "NONE" - opts_dict["mach"] = "NONE" - opts_dict["compset"] = "NONE" - opts_dict["res"] = "NONE" - - s_case_flags = "" - - # opts_dict = utility.getopt_parseconfig(opts, optkeys, caller, opts_dict) - for opt, arg in opts: - if opt == "--case": - opts_dict["case"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--mach": - opts_dict["mach"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--project": - opts_dict["project"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--compset": - opts_dict["compset"] = arg - # required - add to flags later - elif opt == "--res": - opts_dict["res"] = arg - # required - add to flags later - elif opt == "--ect": - opts_dict["ect"] = arg - elif opt == "--ensemble": - opts_dict["ensemble"] = int(arg) - elif opt == "--compiler": - opts_dict["compiler"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--pertlim": - if caller == "ensemble.py": - print("WARNING: pertlim ignored for ensemble.py.") - opts_dict["pertlim"] = "0" - else: - opts_dict["pertlim"] = arg - elif opt == "--project": - opts_dict["project"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--uf": - opts_dict["uf"] = True - elif opt == "--nb": - opts_dict["nb"] = True - elif opt == "--ns": - opts_dict["ns"] = True - elif opt == "--verbose": - opts_dict["verbose"] = True - s_case_flags += " " + opt - elif opt == "--silent": - opts_dict["silent"] = True - s_case_flags += " " + opt - elif opt == "--test": - opts_dict["test"] = True - s_case_flags += " " + opt - elif opt == "--multi-driver": - opts_dict["multi-driver"] = True - s_case_flags += " " + opt - elif opt == "--nist": - opts_dict["nist"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--pecount": - opts_dict["pecount"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--mpilib": - opts_dict["mpilib"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--pesfile": - opts_dict["pesfile"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--srcroot": - opts_dict["srcroot"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--output-root": - opts_dict["output-root"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--script-root": - opts_dict["script-root"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--queue": - opts_dict["queue"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--input-dir": - opts_dict["input-dir"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--user-modes-dir": - opts_dict["user-modes-dir"] = arg - s_case_flags += " " + opt + " " + arg - elif opt == "--walltime": - opts_dict["walltime"] = arg - # add below - - # check required things: case, machine - if opts_dict["mach"] == "NONE": - print("Error: Must specify machine (--mach)") - sys.exit() - if opts_dict["case"] == "NONE": - print("Error: Must specify case (--case)") - sys.exit() - else: - case = opts_dict["case"] - if caller == "ensemble.py": - if case[-4:] != ".000": - print( - 'Error: when using ensemble.py, the case name (--case) must end in ".000".' - ) - sys.exit() - case_dir = os.path.dirname(case) - if os.path.isdir(case_dir) == False: - print("Error: Need a valid full path with the case name (--case).") - sys.exit() - # defaults for resolution and case - if opts_dict["ect"] == "pop": - if opts_dict["compset"] == "NONE": - opts_dict["compset"] = "G" - if opts_dict["res"] == "NONE": - opts_dict["res"] = "T62_g17" - else: # cam - if opts_dict["compset"] == "NONE": - opts_dict["compset"] = "F2000climo" - if opts_dict["res"] == "NONE": - opts_dict["res"] = "f19_f19" - - if opts_dict["walltime"] == "00:00": - if opts_dict["uf"] == True: - opts_dict["walltime"] = "00:10" - else: - if opts_dict["ect"] == "pop": - opts_dict["walltime"] = "02:00" - else: - opts_dict["walltime"] = "04:30" - s_case_flags += " --walltime " + opts_dict["walltime"] - - return opts_dict, s_case_flags - - -####### - - -def single_case(opts_dict, case_flags, stat_dir): - - # scripts dir - ret = os.chdir(stat_dir) - ret = os.chdir("../../scripts") - - ##res and compset are required for create_newcase - case_flags += ( - " --compset " - + opts_dict["compset"] - + " --res " - + opts_dict["res"] - + " --run-unsupported" - ) - - # create newcase - print("STATUS: create_newcase flags = " + case_flags) - command = "./create_newcase " + case_flags - ret = os.system(command) - - if ret != 0: - print("ERROR: create_newcase returned a non-zero exit code.") - sys.exit() - - # modify namelist settings - this_case = opts_dict["case"] - print("STATUS: case = " + this_case) - ret = os.chdir(this_case) - - command = "chmod u+w *" - ret = os.system(command) - - command = "cp env_run.xml env_run.xml.orig" - ret = os.system(command) - - print("STATUS: Adjusting env_run.xml....") - command = "./xmlchange --file env_run.xml --id BFBFLAG --val TRUE" - ret = os.system(command) - command = "./xmlchange --file env_run.xml --id DOUT_S --val FALSE" - ret = os.system(command) - command = "./xmlchange --file env_run.xml --id REST_OPTION --val never" - ret = os.system(command) - - # time steps - if opts_dict["ect"] == "pop": - command = "./xmlchange --file env_run.xml --id STOP_OPTION --val nyears" - ret = os.system(command) - command = " ./xmlchange --file env_run.xml --id STOP_N --val 1" - ret = os.system(command) - else: - if opts_dict["uf"] == True: - command = "./xmlchange --file env_run.xml --id STOP_OPTION --val nsteps" - ret = os.system(command) - command = " ./xmlchange --file env_run.xml --id STOP_N --val 9" - ret = os.system(command) - else: - command = "./xmlchange --file env_run.xml --id STOP_OPTION --val nmonths" - ret = os.system(command) - command = "./xmlchange --file env_run.xml --id STOP_N --val 12" - ret = os.system(command) - - print("STATUS: running setup for single case...") - command = "./case.setup" - ret = os.system(command) - - print("STATUS: Adjusting user_nl_* files....") - - # POP-ECT - if opts_dict["ect"] == "pop": - if os.path.isfile("user_nl_pop") == True: - with open("user_nl_pop", "a") as f: - if opts_dict["pertlim"] != "0": - text = "\ninit_ts_perturb = {}".format(opts_dict["pertlim"]) - f.write(text) - else: - print("Warning: no user_nl_pop found") - else: - # CAM-ECT - # cam - if os.path.isfile("user_nl_cam") == True: - if opts_dict["uf"] == True: - text1 = "\navgflag_pertape = 'I'" - text2 = "\nnhtfrq = 9" - else: - text1 = "\navgflag_pertape = 'A'" - text2 = "\nnhtfrq = -8760" - - text3 = "\ninithist = 'NONE'" - - with open("user_nl_cam", "a") as f: - f.write(text1) - f.write(text2) - f.write(text3) - if opts_dict["pertlim"] != "0": - text = "\npertlim = " + opts_dict["pertlim"] - f.write(text) - else: - print("Warning: no user_nl_cam found") - - # clm - if os.path.isfile("user_nl_clm") == True: - if opts_dict["uf"] == True: - text1 = "\nhist_avgflag_pertape = 'I'" - text2 = "\nhist_nhtfrq = 9" - else: - text1 = "\nhist_avgflag_pertape = 'A'" - text2 = "\nhist_nhtfrq = -8760" - - with open("user_nl_clm", "a") as f: - f.write(text1) - f.write(text2) - - # disable ice output - if os.path.isfile("user_nl_cice") == True: - text = "\nhistfreq = 'x','x','x','x','x'" - with open("user_nl_cice", "a") as f: - f.write(text) - - # pop - if os.path.isfile("user_nl_pop") == True: - text = ["'\nn_tavg_streams = 1"] - text.append("\nldiag_bsf = .false.") - text.append("\nldiag_global_tracer_budgets = .false.") - text.append("\nldiag_velocity = .false.") - text.append("\ndiag_gm_bolus = .false.") - text.append("\nltavg_nino_diags_requested = .false.") - text.append("\nmoc_requested = .false.") - text.append("\nn_heat_trans_requested = .false.") - text.append("\nn_salt_trans_requested = .false.") - test.append("\ntavg_freq_opt = 'once', 'never', 'never'") - text.append("\ntavg_file_freq_opt = 'once', 'never', 'never'") - text.append("\ndiag_cfl_freq_opt = 'never'") - text.append("\ndiag_global_freq_opt = 'never'") - text.append("\ndiag_transp_freq_opt = 'never'") - - with open("user_nl_pop", "a") as f: - for i in range(len(text)): - f.write(text[i]) - - # preview namelists - print("STATUS: Updating namelists....") - command = "./preview_namelists" - ret = os.system(command) - - # Build executable - nb = opts_dict["nb"] - ns = opts_dict["ns"] - print("STATUS: no-build = " + str(nb)) - print("STATUS: no-submit = " + str(ns)) - if nb == False: - print("STATUS: building case ...") - command = "./case.build" - ret = os.system(command) - if ret != 0: - print("Error building...") - sys.exit() - if ns == False: - command = "./case.submit" - ret = os.system(command) - - -######## -def main(argv): - - caller = "single_run.py" - - # directory with single_run.py and ensemble.py - stat_dir = os.path.dirname(os.path.realpath(__file__)) - print("STATUS: stat_dir = " + stat_dir) - - opts_dict, case_flags = process_args_dict(caller, argv) - - single_case(opts_dict, case_flags, stat_dir) - - print("STATUS: completed single run setup.") - - -######## -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/tools/statistical_ensemble_test/user_nl_cam_LENS b/tools/statistical_ensemble_test/user_nl_cam_LENS deleted file mode 100644 index 6a7d8d05ef5..00000000000 --- a/tools/statistical_ensemble_test/user_nl_cam_LENS +++ /dev/null @@ -1,24 +0,0 @@ -! Users should add all user specific namelist changes below in the form of -! namelist_var = new_namelist_value -! These are specific changes to CAM for the LENS experiment - cldfrc_rhminl = 0.8925D0 - empty_htapes=.true. - fincl1='ABSORB:A','ANRAIN:A','ANSNOW:A','AODDUST1:A','AODDUST2:A','AODDUST3:A', - 'AODVIS:A','AQRAIN:A','AQSNOW:A','AREI:A','AREL:A','AWNC:A','AWNI:A','CDNUMC:A', - 'CLDHGH:A','CLDICE:A','CLDLIQ:A','CLDLOW:A','CLDMED:A','CLDTOT:A','CLOUD:A', - 'DCQ:A','DTCOND:A','DTV:A','FICE:A','FLDS:A','FLNS:A','FLNSC:A','FLNT:A', - 'FLNTC:A','FLUT:A','FLUTC:A','FREQI:A','FREQL:A','FREQR:A','FREQS:A','FSDS:A', - 'FSDSC:A','FSNS:A','FSNSC:A','FSNT:A','FSNTC:A','FSNTOA:A','FSNTOAC:A', - 'ICEFRAC:A','ICIMR:A','ICWMR:A','IWC:A','LANDFRAC:A','LHFLX:A','LWCF:A', - 'NUMICE:A','NUMLIQ:A','OCNFRAC:A','OMEGA:A','OMEGAT:A','PBLH:A','PRECC:A', - 'PRECL:A','PRECSC:A','PRECSL:A','PS:A','PSL:A','Q:A','QFLX:A','QRL:A','QRS:A', - 'RELHUM:A','SHFLX:A','SNOWHICE:A','SNOWHLND:A','SOLIN:A','SRFRAD:A','SWCF:A', - 'T:A','TAUX:A','TAUY:A','TGCLDIWP:A','TGCLDLWP:A','TMQ:A','TREFHT:A','TS:A', - 'U:A','U10:A','UU:A','V:A','VD01:A','VQ:A','VT:A','VU:A','VV:A','WSUB:A','Z3:A', - 'CCN3:A','UQ:A','WGUSTD:X','WSPDSRFMX:A','TSMX:X','TSMN:M','TREFHTMX:X','TREFHTMN:M', - 'bc_a1_SRF:A','dst_a1_SRF:A','dst_a3_SRF:A','pom_a1_SRF:A','so4_a1_SRF:A', - 'so4_a2_SRF:A','so4_a3_SRF:A','soa_a1_SRF:A','soa_a2_SRF:A','BURDENSO4:A', - 'BURDENBC:A','BURDENPOM:A','BURDENSOA:A','BURDENDUST:A','BURDENSEASALT:A', - 'AODABS:A','EXTINCT:A','PHIS:A','TROP_P:A','TROP_T:A','TOT_CLD_VISTAU:A', - 'ICLDIWP:A','ICLDTWP:A','CO2:A','CO2_LND:A','CO2_OCN:A','SFCO2:A','SFCO2_LND:A', - 'SFCO2_OCN:A','TMCO2:A','TMCO2_LND:A','TMCO2_OCN:A''CO2_FFF:A', 'SFCO2_FFF:A', 'TMCO2_FFF:A'