diff --git a/CHANGELOG.md b/CHANGELOG.md index 159f6b54..a152d333 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added statement `from dask.array import Array as DaskArray` in `gcpy plot.py` - Added SLURM run script `gcpy/benchmark/benchmark_slurm.sh` - Added `gcpy/plot/gcpy_plot_style` style sheet for title and label default settings +- Added `gcpy/gcpy_plot_style` style sheet for title and label default settings +- Added new cubed-sphere grid inquiry functions to `gcpy/cstools.py` +- Added functions `get_ilev_coord` and `get_lev_coord` to `gcpy/grid.py` ### Changed - Simplified the Github issues templates into two options: `new-feature-or-discussion.md` and `question-issue.md` @@ -62,6 +65,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Now add `if` statements to turn of `Parallel()` commands when `n_jobs==1`. - Do not hardwire fontsize in `gcpy/plot.py`; get defaults from `gcpy_plot_style` - `gcpy/plot.py` has been split up into smaller modules in the `gcpy/plot` folder +- Updated and cleaned up code in `gcpy/regrid.py` +- Example scripts`plot_single_level` and `plot_comparisons` can now accept command-line arguments +- Example scripts `plot_single_level.py`, `plot_comparisons.py`, `compare_diags.py` now handle GCHP restart files properly ### Fixed - Generalized test for GCHP or GCClassic restart file in `regrid_restart_file.py` @@ -69,6 +75,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Routine `create_display_name` now splits on only the first `_` in species & diag names - Prevent plot panels from overlapping in six-panel plots - Prevent colorbar tick labels from overlapping in dynamic-range ratio plots +- Updated `seaborn` plot style names to conform to the latest matplotlib +- Set `lev:positive` and/or `ilev:positive` properly in `regrid_restart_file.py` and `file_regrid.py` +- Prevent overwriting of `lev` coord in `file_regrid.py` at netCDF write time ### Removed - Removed `gchp_is_pre_13_1` arguments & code from benchmarking routines @@ -78,6 +87,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Removed `gcpy_test_dir` option from `examples/diagnostics/compare_diags.*` - Removed `docs/environment_files/gchp_regridding.yml` environment file - Removed `gcpy/gcpy/benchmark/plot_driver.sh` +- Made benchmark configuration files consistent ## [1.3.3] -- 2023-03-09 ### Added @@ -365,16 +375,3 @@ This is the first labeled version of GCPy. The primary functionality of GCPy is - Support for plotting benchmark output for both GEOS-Chem Classic (lat/lon data) and GCHP (cubed-sphere data). The first official release version of GCPy, v1.0.0, will correspond with the release of GEOS-Chem 13.0.0. - - -## [Unreleased] - -### Added - -### Changed - -### Deprecated - -### Fixed - -### Removed diff --git a/gcpy/benchmark/cloud/template.1hr_benchmark.yml b/gcpy/benchmark/cloud/template.1hr_benchmark.yml index b3612444..257d22a5 100644 --- a/gcpy/benchmark/cloud/template.1hr_benchmark.yml +++ b/gcpy/benchmark/cloud/template.1hr_benchmark.yml @@ -4,15 +4,15 @@ # customize in the following manner: # # (1) Edit the path variables so that they point to folders -# containing model data. -# (2) Edit the version strings for each benchmark simulation. +# containing model data +# (2) Edit the version strings for each benchmark simulation # (3) Edit the switches that turn on/off creating of plots and -# tables, as well as other plotting options. +# tables as well as other plotting options # (4) If necessary, edit labels for the dev and ref versions # # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared # to gcc_dev (not gcc_ref!). This ensures consistency in version names -# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) +# when doing GCHP vs GCC diff-of-diffs. # ===================================================================== # # Configuration for 1-hour FullChemBenchmark @@ -76,11 +76,11 @@ data: is_pre_14.0: False resolution: c24 # -# options: Customizes the benchmark plot output. +# options: Customizes the benchmark plot output # options: # - # bmk_type: Specifies the type of benchmark. + # bmk_type: Specifies the type of benchmark # bmk_type: FullChemBenchmark # @@ -103,7 +103,7 @@ options: run: False dir: GCHP_GCC_diff_of_diffs # - # outputs: Specifies the plots and tables to generate. + # outputs: Specifies the plots and tables to generate # outputs: plot_conc: False diff --git a/gcpy/benchmark/cloud/template.1mo_benchmark.yml b/gcpy/benchmark/cloud/template.1mo_benchmark.yml index 792f0d8b..fad5d6d0 100644 --- a/gcpy/benchmark/cloud/template.1mo_benchmark.yml +++ b/gcpy/benchmark/cloud/template.1mo_benchmark.yml @@ -4,18 +4,18 @@ # customize in the following manner: # # (1) Edit the path variables so that they point to folders -# containing model data. -# (2) Edit the version strings for each benchmark simulation. +# containing model data +# (2) Edit the version strings for each benchmark simulation # (3) Edit the switches that turn on/off creating of plots and -# tables, as well as other plotting options. +# tables as well as other plotting options # (4) If necessary, edit labels for the dev and ref versions # # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared # to gcc_dev (not gcc_ref!). This ensures consistency in version names -# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) +# when doing GCHP vs GCC diff-of-diffs. # ===================================================================== # -# Configuration for 1 month FullChemBenchmark +# Configuration for 1-month FullChemBenchmark # # paths: # main_dir: High-level directory containing ref & dev rundirs @@ -76,11 +76,11 @@ data: is_pre_14.0: False resolution: c24 # -# options: Customizes the benchmark plot output. +# options: Customizes the benchmark plot output # options: # - # bmk_type: Specifies the type of benchmark. + # bmk_type: Specifies the type of benchmark # bmk_type: FullChemBenchmark # @@ -103,7 +103,7 @@ options: run: False dir: GCHP_GCC_diff_of_diffs # - # outputs: Specifies the plots and tables to generate. + # outputs: Specifies the plots and tables to generate # outputs: plot_conc: True diff --git a/gcpy/benchmark/config/1mo_benchmark.yml b/gcpy/benchmark/config/1mo_benchmark.yml index 5a122d40..637f9145 100644 --- a/gcpy/benchmark/config/1mo_benchmark.yml +++ b/gcpy/benchmark/config/1mo_benchmark.yml @@ -1,21 +1,21 @@ -&--- +--- # ===================================================================== # Benchmark configuration file (**EDIT AS NEEDED**) # customize in the following manner: # # (1) Edit the path variables so that they point to folders -# containing model data. -# (2) Edit the version strings for each benchmark simulation. +# containing model data +# (2) Edit the version strings for each benchmark simulation # (3) Edit the switches that turn on/off creating of plots and -# tables, as well as other plotting options. +# tables as well as other plotting options # (4) If necessary, edit labels for the dev and ref versions # # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared # to gcc_dev (not gcc_ref!). This ensures consistency in version names -# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) +# when doing GCHP vs GCC diff-of-diffs. # ===================================================================== # -# Configuration for 1 month FullChemBenchmark +# Configuration for 1-month FullChemBenchmark # # paths: # main_dir: High-level directory containing ref & dev rundirs @@ -26,9 +26,9 @@ # species_database.yml in one of the Dev rundirs. # paths: - main_dir: /path/to/benchmark/main/dir # EDIT AS NEEDED - results_dir: /path/to/BenchmarkResults # EDIT AS NEEDED - weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData/GCHP/RegriddingWeights + main_dir: /path/to/benchmark/main/dir + results_dir: /path/to/BenchmarkResults + weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/data/ExtData/GCHP/RegriddingWeights spcdb_dir: default # # data: Contains configurations for ref and dev runs @@ -47,7 +47,7 @@ data: dir: GCC_ref outputs_subdir: OutputDir restarts_subdir: Restarts - bmk_start: "2019-07-01T00:00:00" + bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" gchp: version: GCHP_ref @@ -64,46 +64,46 @@ data: dir: GCC_dev outputs_subdir: OutputDir restarts_subdir: Restarts - bmk_start: "2019-07-01T00:00:00" + bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" gchp: version: GCHP_dev dir: GCHP_dev outputs_subdir: OutputDir restarts_subdir: Restarts - bmk_start: "2019-07-01T00:00:00" + bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" is_pre_14.0: False resolution: c24 # -# options: Customizes the benchmark plot output. +# options: Customizes the benchmark plot output # options: # - # bmk_type: Specifies the type of benchmark. + # bmk_type: Specifies the type of benchmark # bmk_type: FullChemBenchmark # # comparisons: Specifies the comparisons to perform. # comparisons: - gcc_vs_gcc: + gcc_vs_gcc: run: True dir: GCC_version_comparison tables_subdir: Tables - gchp_vs_gcc: + gchp_vs_gcc: run: True - dir: GCHP_GCC_comparison + dir: GCHP_GCC_comparison tables_subdir: Tables - gchp_vs_gchp: + gchp_vs_gchp: run: True dir: GCHP_version_comparison tables_subdir: Tables - gchp_vs_gcc_diff_of_diffs: + gchp_vs_gcc_diff_of_diffs: run: True dir: GCHP_GCC_diff_of_diffs # - # outputs: Specifies the plots and tables to generate. + # outputs: Specifies the plots and tables to generate # outputs: plot_conc: True @@ -117,13 +117,14 @@ options: OH_metrics: True ste_table: True # GCC only summary_table: True - plot_options: # Plot concentrations and emissions by category? + plot_options: by_spc_cat: True by_hco_cat: True # - # n_cores: Specify the number of cores to use: - # -1: Use all avaiable cores - # -N: Use N cores + # n_cores: Specify the number of cores to use. + # -1: Use $OMP_NUM_THREADS cores + # -2: Use $OMP_NUM_THREADS - 1 cores + # -N: Use $OMP_NUM_THREADS - (N-1) cores # 1: Disable parallelization (use a single core) # n_cores: -1 diff --git a/gcpy/benchmark/config/1yr_ch4_benchmark.yml b/gcpy/benchmark/config/1yr_ch4_benchmark.yml index 90bc9763..29af5bfa 100644 --- a/gcpy/benchmark/config/1yr_ch4_benchmark.yml +++ b/gcpy/benchmark/config/1yr_ch4_benchmark.yml @@ -4,18 +4,18 @@ # customize in the following manner: # # (1) Edit the path variables so that they point to folders -# containing model data. -# (2) Edit the version strings for each benchmark simulation. -# (3) Edit the switches that turn on/off creating of plots and tables, -# as well as other plotting options. -# (4) If necessary, edit labels for the dev and ref versions. +# containing model data +# (2) Edit the version strings for each benchmark simulation +# (3) Edit the switches that turn on/off creating of plots and +# tables as well as other plotting options +# (4) If necessary, edit labels for the dev and ref versions # # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared # to gcc_dev (not gcc_ref!). This ensures consistency in version names -# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) +# when doing GCHP vs GCC diff-of-diffs. # ===================================================================== # -# Configuration for 1yr CH4Benchmark +# Configuration for 1-year CH4Benchmark # # paths: # main_dir: High-level directory containing ref & dev rundirs @@ -56,11 +56,11 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_14.0: False # for gcpy_test_data, edit if needed - resolution: c24 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 dev: gcc: - version: GCC_dev + version: GCC_dev dir: GCC_dev outputs_subdir: OutputDir restarts_subdir: Restarts @@ -73,37 +73,37 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_14.0: False # for gcpy_test_data, edit if needed - resolution: c24 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 +# +# options: Customizes the benchmark plot output # -# options: Customize the benchmark plot output. -# options: # - # bmk_type: Specifies the type of benchmark. + # bmk_type: Specifies the type of benchmark # bmk_type: CH4Benchmark # # comparisons: Specifies the comparisons to perform. # comparisons: - gcc_vs_gcc: - run: True # True to run this comparison + gcc_vs_gcc: + run: True dir: GCC_version_comparison tables_subdir: Tables - gchp_vs_gcc: - run: False - dir: GCHP_GCC_comparison + gchp_vs_gcc: + run: True + dir: GCHP_GCC_comparison tables_subdir: Tables - gchp_vs_gchp: - run: False + gchp_vs_gchp: + run: True dir: GCHP_version_comparison tables_subdir: Tables gchp_vs_gcc_diff_of_diffs: run: False dir: GCHP_GCC_diff_of_diffs # - # outputs: Specifies the plots and tables to generate. + # outputs: Specifies the plots and tables to generate # outputs: plot_conc: True diff --git a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml index 10536861..8f86efae 100644 --- a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml +++ b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml @@ -1,4 +1,4 @@ -&--- +--- # ===================================================================== # Benchmark configuration file (**EDIT AS NEEDED**) # customize in the following manner: @@ -7,15 +7,15 @@ # containing model data # (2) Edit the version strings for each benchmark simulation # (3) Edit the switches that turn on/off creating of plots and -# tables as well as other plotting options. +# tables as well as other plotting options # (4) If necessary, edit labels for the dev and ref versions # # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared # to gcc_dev (not gcc_ref!). This ensures consistency in version names -# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) +# when doing GCHP vs GCC diff-of-diffs. # ===================================================================== # -# Configuration for 1yr FullChemBenchmark +# Configuration for 1-year FullChemBenchmark # # paths: # main_dir: High-level directory containing ref & dev rundirs @@ -27,8 +27,8 @@ # obs_data_dir: Path to observational data (for models vs obs plots) # paths: - main_dir: /path/to/benchmark/main/dir # EDIT AS NEEDED - results_dir: /path/to/BenchmarkResults # EDIT AS NEEDED + main_dir: /path/to/benchmark/main/dir + results_dir: /path/to/BenchmarkResults weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/data/ExtData/GCHP/RegriddingWeights spcdb_dir: default obs_data_dir: /path/to/observational/data @@ -48,7 +48,7 @@ data: version: GCC_ref dir: GCC_ref outputs_subdir: OutputDir - restarts_subdir: restarts + restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" gchp: @@ -58,14 +58,14 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_14.0: True # for gcpy_test_data, edit if needed - resolution: c48 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 dev: gcc: version: GCC_dev dir: GCC_dev outputs_subdir: OutputDir - restarts_subdir: restarts + restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" gchp: @@ -75,11 +75,11 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_14.0: False # for gcpy_test_data, edit if needed - resolution: c24 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 +# +# options: Customizes the benchmark plot output # -# options: Customize the benchmark plot output. -# options: # # bmk_type: Specifies the type of benchmark @@ -89,15 +89,15 @@ options: # comparisons: Specifies the comparisons to perform. # comparisons: - gcc_vs_gcc: - run: True # True to run this comparison + gcc_vs_gcc: + run: True dir: GCC_version_comparison tables_subdir: Tables - gchp_vs_gcc: + gchp_vs_gcc: run: True - dir: GCHP_GCC_comparison + dir: GCHP_GCC_comparison tables_subdir: Tables - gchp_vs_gchp: + gchp_vs_gchp: run: True dir: GCHP_version_comparison tables_subdir: Tables @@ -105,7 +105,7 @@ options: run: True dir: GCHP_GCC_diff_of_diffs # - # outputs: Types of output to generate (plots/tables) + # outputs: Specifies the plots and tables to generate # outputs: plot_conc: True diff --git a/gcpy/benchmark/config/1yr_tt_benchmark.yml b/gcpy/benchmark/config/1yr_tt_benchmark.yml index 763f571f..ffc01b1a 100644 --- a/gcpy/benchmark/config/1yr_tt_benchmark.yml +++ b/gcpy/benchmark/config/1yr_tt_benchmark.yml @@ -1,21 +1,21 @@ -&--- +--- # ===================================================================== # Benchmark configuration file (**EDIT AS NEEDED**) # customize in the following manner: # -# (1) Edit the path variables so that they point to folders w/ -# containing model data. -# (2) Edit the version strings for each benchmark simulation. +# (1) Edit the path variables so that they point to folders +# containing model data +# (2) Edit the version strings for each benchmark simulation # (3) Edit the switches that turn on/off creating of plots and -# tables, as well as other plotting options. -# (4) If necessary, edit labels for the dev and ref versions. +# tables as well as other plotting options +# (4) If necessary, edit labels for the dev and ref versions # # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared # to gcc_dev (not gcc_ref!). This ensures consistency in version names -# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) +# when doing GCHP vs GCC diff-of-diffs. # ===================================================================== # -# Configuration for 1 year TransportTracersBenchmark +# Configuration for 1-year TransportTracersBenchmark # # paths: # main_dir: High-level directory containing ref & dev rundirs @@ -26,8 +26,8 @@ # species_database.yml in one of the Dev rundirs. # paths: - main_dir: /path/to/benchmark/main/dir # EDIT AS NEEDED - results_dir: /path/to/BenchmarkResults # EDIT AS NEEDED + main_dir: /path/to/benchmark/main/dir + results_dir: /path/to/BenchmarkResults weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/data/ExtData/GCHP/RegriddingWeights spcdb_dir: default # @@ -56,11 +56,11 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_14.0: True # for gcpy_test_data, edit if needed - resolution: c48 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 dev: gcc: - version: GCC_dev + version: GCC_dev dir: GCC_dev outputs_subdir: OutputDir restarts_subdir: Restarts @@ -73,14 +73,14 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_14.0: True # for gcpy_test_data, edit if needed - resolution: c48 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 # -# options: Customize the benchmark plot output. +# options: Customizes the benchmark plot output # options: # - # bmk_type: Specifies the type of benchmark. + # bmk_type: Specifies the type of benchmark # bmk_type: TransportTracersBenchmark # @@ -99,9 +99,11 @@ options: run: True dir: GCHP_version_comparison tables_subdir: Tables - # GCHP vs GCC diff of diffs not included in 1-yr tt benchmark + gchp_vs_gcc_diff_of_diffs: + run: False + dir: GCHP_GCC_diff_of_diffs # - # outputs: Specifies the plots and tables to generate. + # outputs: Specifies the plots and tables to generate # outputs: plot_conc: True diff --git a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py index 8dfffab0..7321c50a 100644 --- a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py @@ -530,7 +530,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): dev_interval=sec_per_month_dev, overwrite=True, spcdb_dir=spcdb_dir, - n_job=config["options"]["n_cores"] ) # ================================================================== @@ -689,7 +688,7 @@ def gcc_vs_gcc_mass_table(mon): # Create tables in parallel # Turn off parallelization if n_jobs==1 - if n_jobs != 1: + if config["options"]["n_cores"] != 1: results = Parallel(n_jobs=config["options"]["n_cores"])( delayed(gcc_vs_gcc_mass_table)(mon) for mon in range(bmk_n_months) @@ -736,7 +735,7 @@ def gcc_vs_gcc_ops_budg(mon): # Create tables in parallel # Turn off parallelization if n_jobs==1 - if n_jobs != 1: + if config["options"]["n_cores"] != 1: results = Parallel(n_jobs=config["options"]["n_cores"])( delayed(gcc_vs_gcc_ops_budg)(mon) \ for mon in range(bmk_n_months) @@ -1250,7 +1249,7 @@ def gchp_vs_gcc_mass_table(mon): # Create tables in parallel # Turn off parallelization if n_jobs==1 - if n_jobs != 1: + if config["options"]["n_cores"] != 1: results = Parallel(n_jobs=config["options"]["n_cores"])( delayed(gchp_vs_gcc_mass_table)(mon) \ for mon in range(bmk_n_months) @@ -1306,7 +1305,7 @@ def gchp_vs_gcc_ops_budg(mon): # Create tables in parallel # Turn off parallelization if n_jobs==1 - if n_jobs != 1: + if config["options"]["n_cores"] != 1: results = Parallel(n_jobs=config["options"]["n_cores"])( delayed(gchp_vs_gcc_ops_budg)(mon) \ for mon in range(bmk_n_months) @@ -1858,7 +1857,7 @@ def gchp_vs_gchp_mass_table(mon): # Create tables in parallel # Turn off parallelization if n_jobs==1 - if n_jobs != 1: + if config["options"]["n_cores"] != 1: results = Parallel(n_jobs=config["options"]["n_cores"])( delayed(gchp_vs_gchp_mass_table)(mon) \ for mon in range(bmk_n_months) @@ -1916,7 +1915,7 @@ def gchp_vs_gchp_ops_budg(mon): # Create tables in parallel # Turn off parallelization if n_jobs==1 - if n_jobs != 1: + if config["options"]["n_cores"] != 1: results = Parallel(n_jobs=config["options"]["n_cores"])( delayed(gchp_vs_gchp_ops_budg)(mon) \ for mon in range(bmk_n_months) diff --git a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py index 796c4b41..878dad76 100644 --- a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py @@ -534,7 +534,7 @@ def gcc_vs_gcc_mass_table(mon): # Create tables in parallel # Turn off parallelization if n_jobs==1 - if n_jobs != 1: + if config["options"]["n_cores"] != 1: results = Parallel(n_jobs=config["options"]["n_cores"])( delayed(gcc_vs_gcc_mass_table)(mon) \ for mon in range(bmk_n_months) @@ -831,7 +831,7 @@ def gchp_vs_gcc_mass_table(mon): # Create tables in parallel # Turn off parallelization if n_jobs==1 - if n_jobs != 1: + if config["options"]["n_cores"] != 1: results = Parallel(n_jobs=config["options"]["n_cores"])( delayed(gchp_vs_gcc_mass_table)(mon) \ for mon in range(bmk_n_months) @@ -1143,7 +1143,7 @@ def gchp_vs_gchp_mass_table(mon): # Create tables in parallel # Turn off parallelization if n_jobs==1 - if n_jobs != 1: + if config["options"]["n_cores"] != 1: results = Parallel(n_jobs=config["options"]["n_cores"])( delayed(gchp_vs_gchp_mass_table)(mon) \ for mon in range(bmk_n_months) diff --git a/gcpy/benchmark/run_benchmark.py b/gcpy/benchmark/run_benchmark.py index a742f8fd..42d36418 100755 --- a/gcpy/benchmark/run_benchmark.py +++ b/gcpy/benchmark/run_benchmark.py @@ -847,7 +847,6 @@ def run_benchmark_default(config): overwrite=True, devmet=devmet, spcdb_dir=spcdb_dir, - n_job=config["options"]["n_cores"] ) # ================================================================== diff --git a/gcpy/cstools.py b/gcpy/cstools.py index 751908e0..a1d5dcac 100644 --- a/gcpy/cstools.py +++ b/gcpy/cstools.py @@ -37,15 +37,10 @@ import pyproj import shapely.ops import shapely.geometry -# HOTFIX: Don't raise an ImportError until we can add pyproj to -# the AWS cloud container. This is causing errors that prevent -# benchmark plotting jobs from finishing. -- Bob Y. (01 Aug 2023) -#except ImportError as exc: -# raise ImportError( -# "gcpy.cstools needs packages 'pyproj' and 'shapely'!" -# ) from exc -except: - print("pyproj is not available") +except ImportError as exc: + raise ImportError( + "gcpy.cstools needs packages 'pyproj' and 'shapely'!" + ) from exc # Constants RAD_TO_DEG = 180.0 / np.pi @@ -74,8 +69,8 @@ def extract_grid( if not is_cubed_sphere(data): return None - n_cs = data["Xdim"].shape[-1] - return gen_grid(n_cs) + cs_res = get_cubed_sphere_res(data) + return gen_grid(cs_res) def read_gridspec(gs_obj): @@ -111,21 +106,23 @@ def read_gridspec(gs_obj): area[i,...] = tile.area[...] data = xr.Dataset( - data_vars=dict( - area=(['nf','Ydim','Xdim'],area), - lon=(['nf','Ydim','Xdim'],lon), - lat=(['nf','Ydim','Xdim'],lat), - lon_b=(['nf','Ydim_b','Xdim_b'],lon_b), - lat_b=(['nf','Ydim_b','Xdim_b'],lat_b), - ), - coords=dict( - nf=(['nf'],list(range(6))), - Ydim=(['Ydim'],list(range(n_cs))), - Xdim=(['Xdim'],list(range(n_cs))), - Ydim_b=(['Ydim_b'],list(range(n_cs+1))), - Xdim_b=(['Xdim_b'],list(range(n_cs+1))), - ), - attrs=dict(description=f'c{n_cs:d} grid data'), + data_vars={ + "area": (['nf','Ydim','Xdim'], area), + "lon": (['nf','Ydim','Xdim'], lon), + "lat": (['nf','Ydim','Xdim'], lat), + "lon_b": (['nf','Ydim_b','Xdim_b'], lon_b), + "lat_b": (['nf','Ydim_b','Xdim_b'], lat_b), + }, + coords={ + "nf": (['nf'], list(range(6))), + "Ydim": (['Ydim'], list(range(n_cs))), + "Xdim": (['Xdim'], list(range(n_cs))), + "Ydim_b": (['Ydim_b'], list(range(n_cs+1))), + "Xdim_b": (['Xdim_b'], list(range(n_cs+1))), + }, + attrs={ + "description": f"c{n_cs:d} grid data" + }, ) return data @@ -348,7 +345,7 @@ def gen_grid( where each value has an extra face dimension of length 6. """ if stretch_factor is not None: - cs_temp, ignore = gcpy.make_grid_SG( + cs_temp, _ = gcpy.make_grid_SG( n_cs, stretch_factor, target_lon, @@ -680,7 +677,7 @@ def is_cubed_sphere( ): """ Given an xarray Dataset or DataArray object, determines if the - data is placed on a cubed-sphere grid + data is placed on a cubed-sphere grid. Args: ----- @@ -692,10 +689,123 @@ def is_cubed_sphere( is_gchp : bool Returns True if data is placed on a cubed-sphere grid, and False otherwise. + + Remarks: + -------- + A cubed-sphere data file has one of the following attributes + (1) A dimension named "nf" (GCHP/GEOS diagnostic files) + (2) The lat/lon ratio is exactly 6 (GCHP/GEOS checkpoints) + """ + gcpy.util.verify_variable_type(data, (xr.DataArray, xr.Dataset)) + + if is_cubed_sphere_diag_grid(data): + return True + if is_cubed_sphere_rst_grid(data): + return True + return False + + +def is_cubed_sphere_diag_grid(data): + """ + Determines if a cubed-sphere grid has History file dimensions. + (i.e. a dimension named "nf", aka number of grid faces). + + Args: + ----- + data : xarray.DataArray or xarray.Dataset + The input data. + + Returns: + -------- + True if the grid has History diagnostic dimensions, + False otherwise. + """ + if "nf" in data.dims: + return True + return False + + +def is_cubed_sphere_rst_grid(data): + """ + Determines if a cubed-sphere grid has restart file dimensions. + (i.e. lat and lon, with lat = lon*6). + + Args: + ----- + data : xarray.DataArray or xarray.Dataset + The input data. + + Returns: + -------- + True if the grid has restart dimensions, False otherwise. """ gcpy.util.verify_variable_type(data, (xr.DataArray, xr.Dataset)) - if "nf" in data.dims: # nf = number of cubed-sphere faces + # TODO: Rethink this if we ever end up changing the GC-Classic + # restart variables to start with SPC, or if we ever rename the + # internal state variables in GCHP. A more robust back-up check + # could be to see if all the lats and lons are integer, since + # that will be the case with the GCHP restart file format. + if "lat" in data.dims: + if data.dims["lat"] == data.dims["lon"] * 6: + return True + if "SPC_" in data.data_vars.keys(): return True + return False + +def get_cubed_sphere_res(data): + """ + Given a Dataset or DataArray object, returns the number of + grid cells along one side of the cubed-sphere grid face + (e.g. 24 for grid resolution C24, which has 24x25 grid cells + per face). + + Args: + ----- + data : xarray.DataArray or xarray.Dataset + The input data. + + Returns: + -------- + cs_res : int + The cubed-sphere resolution. Will return 0 if the data + is not placed on a cubed-sphere grid. + """ + gcpy.util.verify_variable_type(data, (xr.DataArray, xr.Dataset)) + + if not is_cubed_sphere(data): + return 0 + if is_cubed_sphere_rst_grid(data): + return data.dims["lon"] + return data.dims["Xdim"] + + +def is_gchp_lev_positive_down(data): + """ + Determines if GCHP data is arranged vertically from the top of the + atmosphere downwards or from the surface upwards, according to: + + (1) Checkpoint files: lev:positive="down + (2) Emissions collection: lev:positive="down" + (3) Other collections lev:positive="up" + + Args: + ----- + data : xarray.DataArray or xarray.Dataset + The input data + + Returns: + -------- + True if the data is arranged from top-of-atm downwards. + False if the data is arranged from the surface upwards. + """ + gcpy.util.verify_variable_type(data, (xr.DataArray, xr.Dataset)) + + if is_cubed_sphere_rst_grid(data): + return True + if is_cubed_sphere_diag_grid(data): + emis_vars = [var for var in data.data_vars if var.startswith("Emis")] + if len(emis_vars) > 0: + return True return False diff --git a/gcpy/examples/diagnostics/compare_diags.py b/gcpy/examples/diagnostics/compare_diags.py index 245528ae..92906ec1 100755 --- a/gcpy/examples/diagnostics/compare_diags.py +++ b/gcpy/examples/diagnostics/compare_diags.py @@ -6,8 +6,6 @@ configuration are specified in a YAML file whose name is passed as an argument. """ - -# Imports import os import sys import warnings @@ -108,6 +106,10 @@ def read_data(config): msg = "Error reading " + dev_file raise FileNotFoundError(msg) from exc + # Special handling for GCHP restart files + refdata = util.rename_and_flip_gchp_rst_vars(refdata) + devdata = util.rename_and_flip_gchp_rst_vars(devdata) + # Define dictionary for return data = { "ref": refdata, @@ -314,8 +316,7 @@ def compare_data(config, data): # ================================================================== # Print totals for each quantity # ================================================================== - if config["options"]["totals_and_diffs"]["create_table"] or \ - config["options"]["totals_and_diffs"]["print_to_screen"]: + if config["options"]["totals_and_diffs"]["create_table"]: print('... Printing totals and differences') print_totals_and_diffs( config, diff --git a/gcpy/examples/plotting/plot_comparisons.py b/gcpy/examples/plotting/plot_comparisons.py index 103c1be1..3430eb92 100755 --- a/gcpy/examples/plotting/plot_comparisons.py +++ b/gcpy/examples/plotting/plot_comparisons.py @@ -12,49 +12,71 @@ The example data described here is in lat/lon format, but the same code works equally well for cubed-sphere (GCHP) data. """ +import argparse import xarray as xr +from matplotlib import use as mpl_use import matplotlib.pyplot as plt from gcpy.constants import skip_these_vars from gcpy.plot.compare_single_level import compare_single_level from gcpy.plot.compare_zonal_mean import compare_zonal_mean +from gcpy.util import rename_and_flip_gchp_rst_vars +# X11 backend needed for plt.show() +mpl_use("tkagg") -def main(): + +def plot_comparisons( + ref, + dev, + varname, + level +): """ Example function to create six-panel comparison plots. - """ + Args: + ----- + ref (str) : Path to the "Ref" data file. + dev (str) : Path to the "Dev" data file. + varname (str) : Variable to plot + level (int) : Level to plot (for single-level comparisons only). + """ # xarray allows us to read in any NetCDF file, the format of # GEOS-Chem diagnostics, #as an xarray Dataset # # The skip_these_vars list avoids trying to read certain # GCHP variables that cause data read issues. ref_ds = xr.open_dataset( - 'first_run/GEOSChem.Restart.20160801_0000z.nc4', + ref, drop_variables=skip_these_vars ) dev_ds = xr.open_dataset( - 'second_run/GEOSChem.Restart.20160801_0000z.nc4', + dev, drop_variables=skip_these_vars ) + # Special handling is needed for GCHP restart files + ref_ds = rename_and_flip_gchp_rst_vars(ref_ds) + dev_ds = rename_and_flip_gchp_rst_vars(dev_ds) + # ================== # Single level plots # ================== # compare_single_level generates sets of six panel plots for # data at a specified level in your datasets. By default, the - # level at index 0 (likely the surface) is plotted. Here we will - # plot data at ~500 hPa, which is located at index 21 in the - # standard 72-level and 47-level GMAO vertical grids. - ilev=21 - + # level at index 0 (likely the surface) is plotted. + # # You likely want to look at the same variables across both of # your datasets. If a variable is in one dataset but not the other, # the plots will show NaN values for the latter. You can pass # variable names in a list to these comparison plotting functions # (otherwise all variables will plot). - varlist = ['SpeciesRst_O3', 'SpeciesRst_CO2'] + # + # NOTE: For simplicity, we will just restrict the comparisons + # to a single variable. But you can add as many variables as + # you like to varlist. + varlist = [varname] # compare_single_level has many arguments which can be optionally # specified. The first four arguments are required. They specify @@ -64,10 +86,10 @@ def main(): # variables you want to plot. compare_single_level( ref_ds, - 'Dataset 1', + 'Ref version', dev_ds, - 'Dataset 2', - ilev=ilev, + 'Dev version', + ilev=level, varlist=varlist ) plt.show() @@ -76,10 +98,10 @@ def main(): # You can also save out the plots to a PDF. compare_single_level( ref_ds, - 'Dataset 1', + 'Ref version', dev_ds, - 'Dataset 2', - ilev=ilev, + 'Dev version', + ilev=level, varlist=varlist, pdfname='single_level.pdf' ) @@ -95,15 +117,64 @@ def main(): # default every vertical level is plotted) compare_zonal_mean( ref_ds, - 'Dataset 1', + 'Ref version', dev_ds, - 'Dataset 2', + 'Dev version', pres_range=[0, 100], varlist=varlist, pdfname='zonal_mean.pdf' ) -# Only execute when we run as a standalone script -if __name__ == '__main__': +def main(): + """ + Parses command-line arguments and calls plot_comparisons + """ + + # Tell the parser which arguments to look for + parser = argparse.ArgumentParser( + description="Single-panel plotting example program" + ) + parser.add_argument( + "-r", "--ref", + metavar="REF", + type=str, + required=True, + help="path to NetCDF file for the Ref model" + ) + parser.add_argument( + "-d", "--dev", + metavar="DEV", + type=str, + required=True, + help="path to NetCDF file for the Dev model" + ) + parser.add_argument( + "-v", "--varname", + metavar="VAR", + type=str, + required=True, + help="Variable name to plot" + ) + parser.add_argument( + "-l", "--level", + metavar="LEV", + type=int, + required=True, + help="level to plot (single-level plots only), starting at 0" + ) + + # Parse command-line arguments + args = parser.parse_args() + + # Call the plot_single_panel routine + plot_comparisons( + args.ref, + args.dev, + args.varname, + args.level + ) + + +if __name__ == "__main__": main() diff --git a/gcpy/examples/plotting/plot_single_panel.py b/gcpy/examples/plotting/plot_single_panel.py index 61d5e441..d9c9ea62 100755 --- a/gcpy/examples/plotting/plot_single_panel.py +++ b/gcpy/examples/plotting/plot_single_panel.py @@ -13,19 +13,34 @@ (including full argument lists), please see the GCPy documentation at https://gcpy.readthedocs.io. """ +import argparse import xarray as xr +from matplotlib import use as mpl_use import matplotlib.pyplot as plt from gcpy.plot.single_panel import single_panel +from gcpy.util import rename_and_flip_gchp_rst_vars +# X11 backend needed for plt.show() +mpl_use("tkagg") -def main(): + +def plot_single_panel(infile, varname, level): """ Example routine to create single panel plots. + + Args: + ----- + infile (str) : Name of netCDF file to read. + varname (str) : Name of variable to plot + level (int) : Model level for single-panel plots + in Python notation (starting from 0) """ - # xarray allows us to read in any NetCDF file, the format of - # GEOS-Chem diagnostics as an xarray Dataset - dset = xr.open_dataset('GEOSChem.Restart.20160701_0000z.nc4') + # xarray allows us to read in any NetCDF file + dset = xr.open_dataset(infile) + + # Special handling if the file is a GCHP restart file + dset = rename_and_flip_gchp_rst_vars(dset) # You can easily view the variables available for plotting # using xarray. Each of these variables has its own xarray @@ -35,26 +50,21 @@ def main(): # Most variables have some sort of prefix; in this example all # variables are prefixed with 'SpeciesRst_'. We'll select the # DataArray for ozone. - darr = dset.SpeciesRst_O3 + darr = dset[varname] # Printing a DataArray gives a summary of the dimensions and attributes # of the data. print(darr) - # This Restart file has a time dimension of size 1, with 72 vertical levels, - #46 latitude indicies, and 72 longitude indices. - - # ================== # Single-level Plots # ================== # gcpy.single_panel is the core plotting function of GCPy, able to # create a one panel zonal mean or single level plot. Here we will - # create a single level plot of ozone at ~500 hPa. We must manually - # index into the level that we want to plot (index 22 in the standard - # 72-layer and 47-layer GMAO vertical grids). - slice_500 = darr.isel(lev=22) + # create a single level plot. We must manually index into the level + # (in Python notation, starting from 0). + darr_single_level = darr.isel(lev=level) # single_panel has many arguments which can be optionally specified. # The only argument you must always pass to a call to single_panel is @@ -62,14 +72,21 @@ def main(): # includes a colorbar with units read from the DataArray, an # automatic title (the data variable name in the DataArray), and # an extent equivalent to the full lat/lon extent of the DataArray - single_panel(slice_500) + single_panel( + darr_single_level, + title=f"{varname} at level {level}" + ) plt.show() - #You can specify a specific area of the globe you would like plotted + # You can specify a specific area of the globe you would like plotted # using the 'extent' argument, which uses the format [min_longitude, # max_longitude, min_latitude, max_latitude] with bounds # [-180, 180, -90, 90] - single_panel(slice_500, extent=[50, -90, -10, 60]) + single_panel( + darr_single_level, + extent=[50, -90, -10, 60], + title=f"{varname} at level {level} over N. Pacific" + ) plt.show() # Other commonly used arguments include specifying a title and a @@ -77,8 +94,8 @@ def main(): #You can find more colormaps at # https://matplotlib.org/tutorials/colors/colormaps.html single_panel( - slice_500, - title='500mb Ozone over the North Pacific', + darr_single_level, + title=f"{varname} at level {level} over N. Pacific, viridis colormap", comap=plt.get_cmap("viridis"), log_color_scale=True, extent=[80, -90, -10, 60] @@ -92,22 +109,65 @@ def main(): # Use the plot_type argument to specify zonal_mean plotting single_panel( darr, - plot_type="zonal_mean" + plot_type="zonal_mean", + title=f"Zonal mean plot for {varname}, full atmosphere" ) plt.show() - #You can specify pressure ranges in hPa for zonal mean plot + # You can specify pressure ranges in hPa for zonal mean plot # (by default every vertical level is plotted) single_panel( darr, pres_range=[0, 100], log_yaxis=True, - log_color_scale=True + log_color_scale=True, + plot_type="zonal_mean", + title=f"Zonal mean plot for {varname}, stratopshere-only" ) plt.show() +def main(): + """ + Parses command-line arguments and calls plot_single_panel. + """ + + # Tell the parser which arguments to look for + parser = argparse.ArgumentParser( + description="Single-panel plotting example program" + ) + parser.add_argument( + "-i", "--infile", + metavar="INF", + type=str, + required=True, + help="input NetCDF file" + ) + parser.add_argument( + "-v", "--varname", + metavar="VARNAME", + type=str, + required=True, + help="variable to plot" + ) + parser.add_argument( + "-l", "--level", + metavar="LEV", + type=int, + required=True, + help="level to plot (single-panel plots only), starting at 0" + ) + + # Parse command-line arguments + args = parser.parse_args() + + # Call the plot_single_panel routine + plot_single_panel( + args.infile, + args.varname, + args.level + ) + -# Only execute when we run as a standalone script -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/gcpy/file_regrid.py b/gcpy/file_regrid.py index 9832b1b9..cba259e3 100644 --- a/gcpy/file_regrid.py +++ b/gcpy/file_regrid.py @@ -1,29 +1,21 @@ """ -GCPy regridding utility: Regrids data between lat/lon, cubed-sphere, -and cubed-sphere stretched grids. +Regrids data horizontally between lat/lon and/or cubed-sphere grids +(including stretched grids). """ import argparse import os import warnings import numpy as np import xarray as xr -try: - import xesmf as xe - from distutils.version import LooseVersion - if LooseVersion(xe.__version__) < LooseVersion("0.2.1"): - raise ImportError( - "file_regrid.py requires xESMF version 0.2.1 or higher.") -except ImportError as exc: - print('file_regrid.py requires xESMF version 0.2.1 or higher!\n\nSee the installation ' + \ - 'instructions here: https://xesmf.readthedocs.io/en/latest/installation.html\n') - -from gcpy.grid import get_input_res, get_vert_grid, get_grid_extents +from gcpy.grid import get_input_res, get_grid_extents, \ + get_ilev_coord, get_lev_coord from gcpy.regrid import make_regridder_S2S, reformat_dims, \ make_regridder_L2S, make_regridder_C2L, make_regridder_L2L from gcpy.util import verify_variable_type +from gcpy.cstools import get_cubed_sphere_res, is_gchp_lev_positive_down # Ignore any FutureWarnings -warnings.simplefilter(action='ignore', category=FutureWarning) +warnings.simplefilter(action="ignore", category=FutureWarning) def file_regrid( @@ -32,74 +24,91 @@ def file_regrid( dim_format_in, dim_format_out, cs_res_out=0, - ll_res_out='0x0', + ll_res_out="0x0", sg_params_in=None, sg_params_out=None, - vert_params_out=None + verbose=False, + weightsdir="." ): """ Regrids an input file to a new horizontal grid specification and saves it as a new file. Args: - fin: str - The input filename - fout: str - The output filename (file will be overwritten if it already exists) - dim_format_in: str - Format of the input file's dimensions (choose from: classic, - checkpoint, diagnostic), where classic denotes lat/lon and - checkpoint / diagnostic are cubed-sphere formats - dim_format_out: str - Format of the output file's dimensions (choose from: classic, - checkpoint, diagnostic), where classic denotes lat/lon - and checkpoint / diagnostic are cubed-sphere formats + ----- + fin: str + The input filename + fout: str + The output filename (file will be overwritten if it already exists) + dim_format_in: str + Format of the input file's dimensions (choose from: classic, + checkpoint, diagnostic), where classic denotes lat/lon and + checkpoint / diagnostic are cubed-sphere formats + dim_format_out: str + Format of the output file's dimensions (choose from: classic, + checkpoint, diagnostic), where classic denotes lat/lon + and checkpoint / diagnostic are cubed-sphere formats Keyword Args (optional): - cs_res_out: int - The cubed-sphere resolution of the output dataset. - Not used if dim_format_out is classic - Default value: 0 - ll_res_out: str - The lat/lon resolution of the output dataset. - Not used if dim_format_out is not classic - Default value: '0x0' - sg_params_in: list[float, float, float] - Input grid stretching parameters - [stretch-factor, target longitude, target latitude]. - Not used if dim_format_in is classic - Default value: [1.0, 170.0, -90.0] (No stretching) - sg_params_out: list[float, float, float] - Output grid stretching parameters - [stretch-factor, target longitude, target latitude]. - Not used if dim_format_out is classic - Default value: [1.0, 170.0, -90.0] (No stretching) - vert_params_out: list(list, list) of list-like types - Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. - Needed for lat/lon output if not using full 72-level or 47-level grid - Default value: [[], []] - + ------------------------ + cs_res_out: int + The cubed-sphere resolution of the output dataset. + Not used if dim_format_out is classic + Default value: 0 + ll_res_out: str + The lat/lon resolution of the output dataset. + Not used if dim_format_out is not classic + Default value: "0x0" + sg_params_in: list[float, float, float] + Input grid stretching parameters + [stretch-factor, target longitude, target latitude]. + Not used if dim_format_in is classic. + Default value: [1.0, 170.0, -90.0] (No stretching) + sg_params_out: list[float, float, float] + Output grid stretching parameters + [stretch-factor, target longitude, target latitude]. + Not used if dim_format_out is classic + Default value: [1.0, 170.0, -90.0] (No stretching) + verbose : bool + Toggles verbose output on (True) or off (False). + weightsdir : str + Path to the directory containing regridding weights (or + where weights will be created). Default value: "." """ verify_variable_type(fin, str) verify_variable_type(fout, str) verify_variable_type(dim_format_in, str) verify_variable_type(dim_format_out, str) + # TODO: Consider renaming checkpoint, classic, diagnostic, + # which may be confusing to users. + + # Error check arguments + valid_formats = ["checkpoint", "classic", "diagnostic"] + if dim_format_in not in valid_formats: + msg = f"Argument 'dim_format_in' must be one of: {valid_formats}!" + raise ValueError(msg) + if dim_format_out not in valid_formats: + msg = f"Argument 'dim_format_out' must be one of: {valid_formats}!" + raise ValueError(msg) + # Assign default values for optional keywords if sg_params_in is None: sg_params_in = [1.0, 170.0, -90.0] if sg_params_out is None: sg_params_out = [1.0, 170.0, -90.0] - if vert_params_out is None: - vert_params_out = [[], []] # Load dataset - ds_in = xr.open_dataset( + dset = xr.open_dataset( fin, decode_cf=False, - engine='netcdf4' + engine="netcdf4" ).load() - cs_res_in = 0 + cs_res_in = get_cubed_sphere_res(dset) + + if verbose: + print(f"file_regrid.py: cs_res_in: {cs_res_in}") + print(f"file_regrid.py: cs_res_out: {cs_res_out}") # Make sure all xarray.Dataset global & variable attributes are kept with xr.set_options(keep_attrs=True): @@ -108,68 +117,68 @@ def file_regrid( # Regrid data # ============================================================== - # save type of data for later restoration - original_dtype = np.dtype(ds_in[list(ds_in.data_vars)[0]]) - - oface_files=[] - if cs_res_in == cs_res_out and all( - [v1 == v2 for v1, v2 in zip(sg_params_in, sg_params_out)]): - - # ---------------------------------------------------------- - # Input CS/SG grid == Output CS/SG grid - # ---------------------------------------------------------- - print('Skipping regridding since grid parameters are identical') - ds_out = ds_in + # Save type of data for later restoration + # Avoid using the dtype of GCHP cubed-sphere grid variables + dset_tmp = dset + dtype_orig = np.dtype(dset[list(dset_tmp.data_vars.keys())[-1]]) + dset_tmp = xr.Dataset() - elif dim_format_in != 'classic' and dim_format_out != 'classic': + if dim_format_in != "classic" and dim_format_out != "classic": # ---------------------------------------------------------- # Input grid is CS/SG; Output grid is CS/SG # ---------------------------------------------------------- - ds_out = regrid_cssg_to_cssg( + dset = regrid_cssg_to_cssg( fout, - ds_in, + dset, dim_format_in, sg_params_in, cs_res_out, dim_format_out, - sg_params_out + sg_params_out, + verbose=verbose, + weightsdir=weightsdir ) - elif dim_format_in == 'classic' and dim_format_out != 'classic': + elif dim_format_in == "classic" and dim_format_out != "classic": # ---------------------------------------------------------- # Input grid is LL; Output grid is CS/SG # ---------------------------------------------------------- - ds_out = regrid_ll_to_cssg( - ds_in, + dset = regrid_ll_to_cssg( + dset, cs_res_out, dim_format_out, - sg_params_out + sg_params_out, + verbose=verbose, + weightsdir=weightsdir ) - elif dim_format_in != 'classic' and dim_format_out == 'classic': + elif dim_format_in != "classic" and dim_format_out == "classic": # ---------------------------------------------------------- # Input grid is CS/SG; Output grid is LL # ---------------------------------------------------------- - ds_out = regrid_cssg_to_ll( - ds_in, + dset = regrid_cssg_to_ll( + dset, cs_res_in, dim_format_in, sg_params_in, ll_res_out, - vert_params_out=vert_params_out + verbose=verbose, + weightsdir=weightsdir ) - elif dim_format_in == 'classic' and dim_format_out == 'classic': + elif dim_format_in == "classic" and dim_format_out == "classic": # ---------------------------------------------------------- # Input grid is LL; Output grid is LL # ---------------------------------------------------------- - ds_out = regrid_ll_to_ll( - ds_in, - ll_res_out + dset = regrid_ll_to_ll( + dset, + ll_res_out, + verbose=verbose, + weightsdir=weightsdir ) # ============================================================== @@ -177,20 +186,34 @@ def file_regrid( # ============================================================== # Correct precision changes (accidental 32-bit to 64-bit) - ds_out = ds_out.astype(original_dtype) + # NOTE: Add a workaround to prevent the xr.DataArray.astype + # function from overwriting the "lev" dimension. + dset_tmp = dset.astype( + dtype=dtype_orig, + casting="same_kind", + copy=False + ) + dset = dset_tmp.assign_coords(lev=dset.lev) # Write dataset to file - ds_out.to_netcdf( + dset.to_netcdf( fout, - format='NETCDF4' + mode="w", + format="NETCDF4", + engine="netcdf4", + unlimited_dims=["time"], ) + # Free memory of the temporary dataset + dset_tmp = xr.Dataset() + # Print the resulting dataset - print(ds_out) + if verbose: + print(dset) def prepare_cssg_input_grid( - ds_in, + dset, dim_format_in ): """ @@ -199,7 +222,7 @@ def prepare_cssg_input_grid( Args: ----- - ds_in : xr.Dataset + dset : xr.Dataset Input grid (cubed-sphere or stretched grid) dim_format_in : str Either "checkpoint" (for restart files) @@ -207,42 +230,46 @@ def prepare_cssg_input_grid( Returns: -------- - ds_out : xr.Dataset + dset : xr.Dataset Data with reformatted dimensions and dropped fields cs_res_in : int Cubed-sphere/stretched grid resolution """ + # Reformat dimensions to "common dimensions (T, Z, F, Y, X) - ds_in = reformat_dims( - ds_in, + dset = reformat_dims( + dset, dim_format_in, towards_common=True ) # Drop variables that don't look like fields + # NOTE: Don't drop "lons" and "lats" if present. non_fields = [ - v for v in ds_in.variables.keys() - if len(set(ds_in[v].dims) - {'T', 'Z', 'F', 'Y', 'X'}) > 0 - or len(ds_in[v].dims) == 0] - ds_in = ds_in.drop(non_fields) + v for v in dset.variables.keys() + if len(set(dset[v].dims) - {"T", "Z", "F", "Y", "X"}) > 0 + or len(dset[v].dims) == 0] + dset_in = dset.drop(non_fields) # Transpose to T, Z, F, Y, X - ds_in = ds_in.transpose('T', 'Z', 'F', 'Y', 'X') + dset = dset_in.transpose("T", "Z", "F", "Y", "X") - assert ds_in.dims['X'] == ds_in.dims['Y'] - cs_res_in = ds_in.dims['X'] + assert dset.dims["X"] == dset.dims["Y"] + cs_res_in = dset.dims["X"] - return ds_in, cs_res_in + return dset, cs_res_in def regrid_cssg_to_cssg( fout, - ds_in, + dset, dim_format_in, sg_params_in, cs_res_out, dim_format_out, sg_params_out, + verbose=False, + weightsdir="." ): """ Regrids from the cubed-sphere/stretched grid to a different @@ -252,7 +279,7 @@ def regrid_cssg_to_cssg( ----- fout : str File name template - ds_in : xarray.Dataset + dset : xarray.Dataset Data on a cubed-sphere/stretched grid dim_format_in, dim_format_out : str Input & output grid format ("checkpoint", "diagnostic") @@ -262,20 +289,49 @@ def regrid_cssg_to_cssg( Input & output grid stretching parameters [stretch-factor, target longitude, target latitude]. + Keyword Args (optional): + ------------------------ + verbose : bool + Toggles verbose output on (True) or off (False). + weightsdir : str + Path to the directory containing regridding weights (or + where weights will be created). Default value: "." + Returns: -------- - ds_out : xarray.Dataset + dset : xarray.Dataset Data regridded to the output lat-lon grid """ + if verbose: + print("file_regrid.py: Regridding from CS/SG to CS/SG") + + # Keep all xarray attributes with xr.set_options(keep_attrs=True): # Change CS/SG dimensions to universal format # and drop non-regriddable variables - ds_in, cs_res_in = prepare_cssg_input_grid( - ds_in, + dset, cs_res_in = prepare_cssg_input_grid( + dset, dim_format_in ) + # ============================================================== + # Only regrid if the cubed-sphere grids are similar + # (i.e. same resolution & stretched-grid parameters) + # ============================================================== + if cs_res_in == cs_res_out and \ + np.array_equal(sg_params_in, sg_params_out) and \ + dim_format_in == dim_format_out: + print("Skipping regridding since grid parameters are identical") + + # Put regridded dataset back into a familiar format + dset = dset.rename({ + "y": "Y", + "x": "X", + }) + + return dset + # Make regridders regridders = make_regridder_S2S( cs_res_in, @@ -285,112 +341,146 @@ def regrid_cssg_to_cssg( tlat_in=sg_params_in[2], sf_out=sg_params_out[0], tlon_out=sg_params_out[1], - tlat_out=sg_params_out[2] + tlat_out=sg_params_out[2], + weightsdir=weightsdir ) # Save temporary output face files to minimize RAM usage - oface_files = [os.path.join('.',fout+str(x)) for x in range(6)] + oface_files = [os.path.join(".",fout+str(x)) for x in range(6)] # For each output face, sum regridded input faces for oface in range(6): oface_regridded = [] for (iface, regridder) in regridders[oface].items(): - ds_iface = ds_in.isel(F=iface) - if 'F' in ds_iface.coords: - ds_iface = ds_iface.drop('F') + dset_iface = dset.isel(F=iface) + if "F" in dset_iface.coords: + dset_iface = dset_iface.drop("F") oface_regridded.append( regridder( - ds_iface, + dset_iface, keep_attrs=True ) ) oface_regridded = xr.concat( oface_regridded, - dim='intersecting_ifaces' + dim="intersecting_ifaces" ).sum( - 'intersecting_ifaces', - keep_attrs=True).expand_dims({'F':[oface]}) + "intersecting_ifaces", + keep_attrs=True).expand_dims({"F":[oface]}) oface_regridded.to_netcdf( oface_files[oface], - format='NETCDF4' + format="NETCDF4", + engine="netcdf4", + mode="w" ) # Combine face files - ds_out=xr.open_mfdataset( + dset = xr.open_mfdataset( oface_files, - combine='nested', - concat_dim='F', - engine='netcdf4' + combine="nested", + concat_dim="F", + engine="netcdf4" ) - # lat, lon are from xESMF which we don't want - ds_out = drop_lon_and_lat(ds_out) + # Remove any temporary files + for oface in oface_files: + os.remove(oface) + + # ============================================================== + # Reshape the data if necessary + # ============================================================== # Put regridded dataset back into a familiar format - ds_out = ds_out.rename({ - 'y': 'Y', - 'x': 'X', + dset = dset.rename({ + "y": "Y", + "x": "X", }) # Reformat dimensions from "common dimension format" # to CS/GG "checkpoint" or "diagnostics" format - ds_out = reformat_dims( - ds_out, - dim_format_out, + dset = reformat_dims( + dset, + format=dim_format_out, towards_common=False ) - # Save stretched-grid metadata - ds_out = save_cssg_metadata( - ds_out, - cs_res_out, - sg_params_out + # Fix names and attributes of of coordinate variables depending + # on the format of the ouptut grid (checkpoint or diagnostic). + dset = adjust_cssg_grid_and_coords( + dset, + dim_format_in, + dim_format_out ) - # Remove any temporary files - for oface in oface_files: - os.remove(oface) + # Flip vertical levels (if necessary) and + # set the lev:positive attribute accordingly + dset = flip_lev_coord_if_necessary( + dset, + dim_format_in=dim_format_in, + dim_format_out=dim_format_out + ) + + # Save stretched-grid metadata as global attrs + dset = save_cssg_metadata( + dset, + cs_res_out, + dim_format_out, + sg_params_out, + verbose=verbose + ) - return ds_out + return dset def regrid_cssg_to_ll( - ds_in, + dset, cs_res_in, dim_format_in, sg_params_in, ll_res_out, - vert_params_out=None, + verbose=False, + + weightsdir="." ): """ Regrids from the cubed-sphere/stretched grid to the lat-lon grid. Args: ----- - ds_in : xarray.Dataset + dset : xarray.Dataset Data on a cubed-sphere/stretched grid cs_res_in : int Cubed-sphere grid resolution + dim_format_in : str + Input grid format ("checkpoint", "diagnostic") sg_params_in: list[float, float, float] Input grid stretching parameters [stretch-factor, target longitude, target latitude]. ll_res_out : str Output grid lat/lon resolution (e.g. "4x5") + Keyword Args (optional): + ------------------------ + verbose: bool + Toggles verbose printout on (True) or off (False) + weightsdir : str + Path to the directory containing regridding weights (or + where weights will be created). Default value: "." + Returns: -------- - ds_out : xarray.Dataset + dset : xarray.Dataset Data regridded to the output lat-lon grid """ - if vert_params_out is None: - vert_params_out = [[], []] + if verbose: + print("file_regrid.py: Regridding from CS/SG to LL") with xr.set_options(keep_attrs=True): # Change CS/SG dimensions to universal format # and drop non-regriddable variables - ds_in, cs_res_in = prepare_cssg_input_grid( - ds_in, + dset, cs_res_in = prepare_cssg_input_grid( + dset, dim_format_in ) @@ -398,46 +488,62 @@ def regrid_cssg_to_ll( regridders = make_regridder_C2L( cs_res_in, ll_res_out, - sg_params=sg_params_in + sg_params=sg_params_in, + weightsdir=weightsdir ) - ds_out = xr.concat( - [regridders[face](ds_in.isel(F=face), keep_attrs=True) + dset = xr.concat( + [regridders[face](dset.isel(F=face), keep_attrs=True) for face in range(6)], - dim='F' - ).sum('F', keep_attrs=True) + dim="F" + ).sum("F", keep_attrs=True) # Update dimensions and attributes on the lat-lon grid - ds_out = ds_out.rename({'T': 'time', 'Z': 'lev'}) - ds_out = drop_and_rename_classic_vars(ds_out, towards_gchp=False) - ds_out = ds_out.reindex(lev=ds_out.lev[::-1]) - _, lev_coords, _ = get_vert_grid(ds_out, vert_params_out) - ds_out = ds_out.assign_coords({'lev': lev_coords}) - ds_out['lat'].attrs = { - 'long_name': 'Latitude', - 'units': 'degrees_north', - 'axis': 'Y' - } - ds_out['lon'].attrs = { - 'long_name': 'Longitude', - 'units': 'degrees_east', - 'axis': 'X' - } + dset = dset.rename({ + "T": "time", + "Z": "lev" + }) + dset = drop_and_rename_classic_vars( + dset, + towards_gchp=False + ) + + # Flip vertical levels (if necessary) and + # set the lev:positive attribute accordingly + dset = flip_lev_coord_if_necessary( + dset, + dim_format_in=dim_format_in, + dim_format_out="classic" + ) + + # Save lat/lon coordinate metadata + dset = save_ll_metadata( + dset, + verbose=verbose + ) + + # Drop cubed-sphere variables + if "lons" in dset.data_vars: + dset = dset.drop_vars(["lons"]) + if "lats" in dset.data_vars: + dset = dset.drop_vars(["lats"]) - return ds_out + return dset def regrid_ll_to_cssg( - ds_in, + dset, cs_res_out, dim_format_out, sg_params_out, + verbose=False, + weightsdir="." ): """ Regrids from the lat-lon grid to the cubed-sphere/stretched grid. Args: ----- - ds_in : xarray.Dataset + dset : xarray.Dataset Data on a lat/lon grid cs_res_in : int Cubed-sphere grid resolution @@ -448,136 +554,404 @@ def regrid_ll_to_cssg( Output grid stretching parameters [stretch-factor, target longitude, target latitude]. + Keyword Args (optional): + ------------------------ + verbose : bool + Toggles verbose output on (True) or off (False). + weightsdir : str + Path to the directory containing regridding weights (or + where weights will be created). Default value: "." + Returns: -------- - ds_out : xarray.Dataset + dset : xarray.Dataset Data regridded to the output cubed-sphere/stretched-grid """ + if verbose: + print("file_regrid.py: Regridding from LL to CS/SG") + with xr.set_options(keep_attrs=True): # Drop non-regriddable variables when going from ll -> cs - ds_in = drop_and_rename_classic_vars(ds_in) + dset = drop_and_rename_classic_vars(dset) # Input lat/lon grid resolution - llres_in = get_input_res(ds_in)[0] + llres_in = get_input_res(dset)[0] # Regrid data to CS/SG regridders = make_regridder_L2S( llres_in, cs_res_out, - sg_params=sg_params_out + sg_params=sg_params_out, + weightsdir=weightsdir ) - ds_out = xr.concat( - [regridders[face](ds_in, keep_attrs=True) for face in range(6)], - dim='nf' + dset = xr.concat( + [regridders[face](dset, keep_attrs=True) for face in range(6)], + dim="nf" ) - # Flip vertical levels - ds_out = ds_out.reindex(lev=ds_out.lev[::-1]) - - # Drop lon & lat, which are from xESMF - ds_out = drop_lon_and_lat(ds_out) + # Flip vertical levels (if necessary) and set lev:positive + dset = flip_lev_coord_if_necessary( + dset, + dim_format_in="classic", + dim_format_out=dim_format_out + ) # Rename dimensions to the "common dimension format" - ds_out = ds_out.rename({ - 'time': 'T', - 'lev': 'Z', - 'nf': 'F', - 'y': 'Y', - 'x': 'X', + dset = dset.rename({ + "time": "T", + "lev": "Z", + "nf": "F", + "y": "Y", + "x": "X", + "lat": "Y", + "lon": "X" }) - # Reformat dimensions from "common dimension format" - # to CS/GG "checkpoint" or "diagnostics" format - ds_out = reformat_dims( - ds_out, - dim_format_out, + # Reformat dims from "common dimension format" to "diagnostic" + # (we will convert to "checkpoint" later) + dset = reformat_dims( + dset, + format="diagnostic", towards_common=False ) - # Save stretched-grid metadata - ds_out = save_cssg_metadata( - ds_out, + # Flip vertical levels (if necessary) and + # set the lev:positive attribute accordingly + dset = flip_lev_coord_if_necessary( + dset, + dim_format_in="classic", + dim_format_out=dim_format_out + ) + + # Fix names and attributes of of coordinate variables depending + # on the format of the ouptut grid (checkpoint or diagnostic). + # Also convert the "diagnostic" grid to the "checkpoint" grid + # if "checkpoint" output are requested. + dset = adjust_cssg_grid_and_coords( + dset, + dim_format_in, + dim_format_out + ) + + # Save stretched-grid metadata as global attrs + dset = save_cssg_metadata( + dset, cs_res_out, - sg_params_out + dim_format_out, + sg_params_out, + verbose=verbose ) - return ds_out + return dset def regrid_ll_to_ll( - ds_in, - ll_res_out + dset, + ll_res_out, + verbose=False, + weightsdir="." ): """ Regrid from the lat/lon grid to the cubed-sphere/stretched grid. Args: ----- - ds_in : xarray.Dataset + dset : xarray.Dataset Data on a lat/lon grid ll_res_out : str Output grid lat-lon grid resolution (e.g. "4x5") + Keyword Args (optional): + ------------------------ + verbose : bool + Toggles verbose output on (True) or off (False). + weightsdir : str + Path to the directory containing regridding weights (or + where weights will be created). Default value: "." + Returns: -------- - ds_out : xarray.Dataset + dset : xarray.Dataset Data regridded to the output lat-lon grid. """ - # Keep all xarray global & variable attributes + if verbose: + print("file_regrid.py: Regridding from LL to LL") + with xr.set_options(keep_attrs=True): # Get the input & output extents - in_extent = get_grid_extents(ds_in) + in_extent = get_grid_extents(dset) out_extent = in_extent - ll_res_in = get_input_res(ds_in)[0] - [lat_in, lon_in] = list(map(float, ll_res_in.split('x'))) - [lat_out, lon_out] = list(map(float, ll_res_out.split('x'))) + ll_res_in = get_input_res(dset)[0] + [lat_in, lon_in] = list(map(float, ll_res_in.split("x"))) + [lat_out, lon_out] = list(map(float, ll_res_out.split("x"))) # Return if the output & input grids are the same if lat_in == lat_out and lon_in == lon_out: - ds_out = ds_in - return ds_out + print("Skipping regridding since grid parameters are identical") + return dset # Drop non-regriddable variables non_fields = [ - var for var in ds_in.variables.keys() \ - if 'lat' not in ds_in[var].dims \ - and 'lon' not in ds_in[var].dims + var for var in dset.variables.keys() \ + if "lat" not in dset[var].dims \ + and "lon" not in dset[var].dims ] - ds_in = ds_in.drop(["lat_bnds", "lon_bnds"]) - non_fields_ds = ds_in[non_fields] - ds_in = ds_in.drop(non_fields) + dset = dset.drop(["lat_bnds", "lon_bnds"]) + non_fields = dset[non_fields] + dset = dset.drop(non_fields) - # Create the regridder + # Create the regridder and regrid the data regridder = make_regridder_L2L( ll_res_in, ll_res_out, reuse_weights=True, in_extent=in_extent, - out_extent=out_extent + out_extent=out_extent, + weightsdir=weightsdir + ) + dset = regridder( + dset, + keep_attrs=True ) - ds_out = regridder(ds_in, keep_attrs=True, verbose=False) # Add the non-regriddable fields back - ds_out = ds_out.merge(non_fields_ds) + dset = dset.merge(non_fields) # Change order of dimensions - ds_out = ds_out.transpose( - 'time', 'lev', 'ilev', 'lat', 'lon', ... + dset = dset.transpose( + "time", "lev", "ilev", "lat", "lon", ... + ) + + # Set the lev:positive attribute accordingly + dset = flip_lev_coord_if_necessary( + dset, + dim_format_in="classic", + dim_format_out="classic" + ) + + # Save lat/lon coordinate metadata + dset = save_ll_metadata( + dset, + verbose=verbose ) - return ds_out + return dset + + +def flip_lev_coord_if_necessary( + dset, + dim_format_in, + dim_format_out +): + """ + Flips the "lev" and "ilev" coords of an xarray.Dataset in the + vertical depending on the values of dim_format_in and + dim_format_out. Also sets the attributes "lev:positive" and + "ilev:positive" accordingly. + + Args: + ----- + dset : xarray.Dataset + The input dataset. + dim_format_in : str + Input grid format ("classic", "checkpoint", "diagnostic"). + dim_format_out : str + Output grid format ("classic", "checkpoint", "diagnostic"). + + Args: + ----- + dset : xarray.Dataset + The modified dataset. + + Remarks: + -------- + (1) classic : lev is in ascending order (lev:positive="up" ) + (2) diagnostic : lev is in ascending order* (lev:positive="up" ) + (3) checkpoint : lev is in descending order (lev:positive="down") + + *Except for the Emissions collection, which has lev arranged + in descending order. + + TODO: Make ths function more robust for all cases, since GCHP + diagnostics may or may not have lev:positive="up". + """ + verify_variable_type(dset, xr.Dataset) + verify_variable_type(dim_format_in, str) + verify_variable_type(dim_format_out, str) + + # ================================================================== + # Case 1: checkpoint/diagnostic to classic + # lev, ilev need to be in ascending order + # ================================================================== + print(f"in {dim_format_in} out {dim_format_out}") + if dim_format_in != "classic" and dim_format_out == "classic": + + # Flip lev and set to eta values at midpoints (if necessary) + if "ilev" in dset.coords: + if is_gchp_lev_positive_down(dset): + dset = dset.reindex(ilev=dset.ilev[::-1]) + if any(var > 1.0 for var in dset.ilev): + coord = get_ilev_coord( + n_lev=dset.dims["ilev"], + top_down=False + ) + dset = dset.assign_coords({"ilev": coord}) + dset.ilev.attrs["positive"] = "up" + + # Flip lev and set to eta values at midpoints (if necessary) + if "lev" in dset.coords: + if is_gchp_lev_positive_down(dset): + dset = dset.reindex(lev=dset.lev[::-1]) + if any(var > 1.0 for var in dset.lev): + coord = get_lev_coord( + n_lev=dset.dims["lev"], + top_down=False + ) + dset = dset.assign_coords({"lev": coord}) + dset.lev.attrs["positive"] = "up" + + return dset + + # ================================================================== + # Case 2: classic/diagnostic to checkpoint + # lev needs to be in descending order + # + # TODO: Check for Emissions diagnostic (not a common use case) + # ================================================================== + if dim_format_in != "checkpoint" and dim_format_out == "checkpoint": + + if "lev" in dset.coords: + if not is_gchp_lev_positive_down(dset): + dset = dset.reindex(lev=dset.lev[::-1]) + if any(var > 1.0 for var in dset.lev): + coord = get_lev_coord( + n_lev=dset.dims["lev"], + top_down=True + ) + dset = dset.assign_coords({"lev": coord}) + dset.lev.attrs["positive"] = "down" + + return dset + + # ================================================================== + # Case 3: classic/diagnostic to classic/diagnostic: + # No flipping, but add lev:positive="up" and ilev:positive="up" + # + # TODO: Check for Emissions diagnostic (not a common use case) + # ================================================================== + if dim_format_in == "classic" and dim_format_out == "diagnostic" or \ + dim_format_in == "diagnostic" and dim_format_out == "classic": + + if "ilev" in dset.coords: + dset.ilev.attrs["positive"] = "up" + if "lev" in dset.coords: + dset.lev.attrs["positive"] = "up" + return dset + + # ================================================================== + # Case 4: checkpoint to checkpoint + # No flipping needed, but add lev:positive="down" + # ================================================================== + if dim_format_in == "checkpoint" and dim_format_out == "checkpoint": + + if "lev" in dset.coords: + dset.lev.attrs["positive"] = "down" + return dset + + # ================================================================== + # Case 5: checkpoint to diagnostic: + # lev needs to be in ascending order + # ================================================================== + if dim_format_in == "checkpoint" and dim_format_out == "diagnostic": + + if "lev" in dset.coords: + dset = dset.reindex(lev=dset.lev[::-1]) + if any(var > 1.0 for var in dset.lev): + coord = get_lev_coord( + n_lev=dset.dims["lev"], + top_down=False + ) + dset = dset.assign_coords({"lev": coord}) + dset.lev.attrs["positive"] = "up" + + return dset + + return dset + + +def save_ll_metadata( + dset, + verbose=False, +): + """ + Updates the lat-lon coordinate metadata in an xarray.Dataset object. + + Args: + ----- + dset : xarray.Dataset + The input data (on lat-lon grid). + + Keyword Arguments: + ------------------ + verbose : bool + Toggles verbose printout on (True) or off (False) + + Returns: + -------- + dset : xarray.Dataset + Original data plus updated coordinate metadata. + """ + with xr.set_options(keep_attrs=True): + + dset.time.attrs = { + "axis": "T" + } + + dset.lat.attrs = { + "long_name": "Latitude", + "units": "degrees_north", + "axis": "Y" + } + + dset.lon.attrs = { + "long_name": "Longitude", + "units": "degrees_east", + "axis": "X" + } + + # ilev:positive is set by flip_lev_coord_if_necessary + if "ilev" in dset.coords: + dset.ilev.attrs["long_name"] = \ + "hybrid level at interfaces ((A/P0)+B)" + dset.ilev.attrs["units"] = "level" + dset.ilev.attrs["axis"] = "Z" + + # lev:positive is set by flip_lev_coord_if_necessary + if "lev" in dset.coords: + dset.lev.attrs["long_name"] = \ + "hybrid level at midpoints ((A/P0)+B)" + dset.lev.attrs["units"] = "level" + dset.lev.attrs["axis"] = "Z" + + if verbose: + print("file_regrid.py: In routine save_ll_metadata:") + print(dset.coords) + + return dset def save_cssg_metadata( dset, cs_res_out, - sg_params_out + dim_format_out, + sg_params_out, + verbose=False ): """ Saves the stretched-grid metadata to an xarray.Dataset object - containing cubed-sphere/stretched grid data + containing cubed-sphere/stretched grid data. Args: ----- @@ -585,20 +959,64 @@ def save_cssg_metadata( Data on the stretched grid. cs_res_out : int Cubed-sphere grid resolution. + dim_format_out : str + Either "checkpoint" (for restart files) or + "diagnostic" (for History diagnostic files). sg_params_out: list[float, float, float] Output grid stretching parameters - [stretch-factor, target longitude, target latitude] + [stretch-factor, target longitude, target latitude]. + verbose : bool + Toggles verbose printout on (True) or off (False). Returns: -------- - dset_out : xarray.Dataset + dset : xarray.Dataset The original data, plus stretched grid metadata. """ + if verbose: + print("file_regrid.py: Saving CS/SG coordinate metadata") + with xr.set_options(keep_attrs=True): - dset.attrs['stretch_factor'] = sg_params_out[0] - dset.attrs['target_longitude'] = sg_params_out[1] - dset.attrs['target_latitude'] = sg_params_out[2] - dset.attrs['cs_res'] = cs_res_out + + # Stretched-grid global attrs + dset.attrs["stretch_factor"] = sg_params_out[0] + dset.attrs["target_longitude"] = sg_params_out[1] + dset.attrs["target_latitude"] = sg_params_out[2] + dset.attrs["cs_res"] = cs_res_out + + # Special handling for "checkpoint" format + if "checkpoint" in dim_format_out: + if "lon" in dset.dims: + dset.lon.attrs = { + "standard_name": "longitude", + "long_name": "Longitude", + "units": "degrees_east", + "axis": "X" + } + if "lat" in dset.dims: + dset.lat.attrs = { + "standard_name": "latitude", + "long_name": "Latitude", + "units": "degrees_north", + "axis": "Y" + } + + # Special handling for "checkpoint" format + if "diagnostic" in dim_format_out: + if "lons" in dset.dims: + dset.lons.attrs = { + "standard_name": "longitude", + "long_name": "Longitude", + "units": "degrees_east", + "axis": "X" + } + if "lats" in dset.dims: + dset.lats.attrs = { + "standard_name": "latitude", + "long_name": "Latitude", + "units": "degrees_north", + "axis": "Y" + } return dset @@ -609,117 +1027,329 @@ def rename_restart_variables(dset, towards_gchp=True): and GCHP conventions. Args: - dset: xarray.Dataset - The input dataset + ----- + dset : xarray.Dataset + The input dataset. Keyword Args (optional): - towards_gchp: bool - Whether renaming to (True) or from (False) GCHP format - Default value: True + ------------------------ + towards_gchp: bool + Whether renaming to (True) or from (False) GCHP format + Default value: True Returns: - xarray.Dataset - Input dataset with variables renamed + -------- + dset : xarray.Dataset + The modified dataset. """ if towards_gchp: - old_str = 'SpeciesRst' - new_str = 'SPC' + old_str = "SpeciesRst" + new_str = "SPC" else: - old_str = 'SPC' - new_str = 'SpeciesRst' - return dset.rename({name: name.replace(old_str, new_str, 1) - for name in list(dset.data_vars) - if name.startswith(old_str)}) + old_str = "SPC" + new_str = "SpeciesRst" + return dset.rename({ + name: name.replace(old_str, new_str, 1) + for name in list(dset.data_vars) + if name.startswith(old_str) + }) -def drop_lon_and_lat(dset): +def adjust_cssg_grid_and_coords( + dset, + dim_format_in, + dim_format_out, +): """ - Drops lon and lat variables, which are added by xESMF. - These are not needed for GCHP data files. + Adjusts cubed-sphere/stretched-grid coordinate names and attributes. Args: - dset: xarray.Dataset - The input data. + ----- + dset : xarray.Dataset + The input data + dim_format_in, dim_format_out: str + Either "checkpoint" (for checkpoint/restart files) or + "diagnostic" (for History diagnostic files). Returns: - dset: xarray.Dataset - The input data minus "lat" and "lon" variables. - """ - verify_variable_type(dset, xr.Dataset) + -------- + dset : xarray.Dataset + The input data with updated coordinate names & attributes. + Remarks: + -------- + "diagnostic" dimension format: (time, lev, nf, Ydim, Xdim) + "checkpoint" dimension format: (time, lev, lat, lon); lat = 6*lon + """ + # Keep all xarray attributes intact with xr.set_options(keep_attrs=True): - if "lat" in dset.variables.keys(): - dset = dset.drop("lat") - if "lon" in dset.variables.keys(): - dset = dset.drop("lon") + + # ============================================================== + # Rename coordinates returned by the xESMF regridding to + # the "lons" and "lats" coordinates as saved out by MAPL. + # ============================================================== + if "diagnostic" in dim_format_in: + if "Xdim" in dset.variables: + dset = dset.rename_vars({"Xdim": "lons"}) + if "Ydim" in dset.variables: + dset = dset.rename_vars({"Ydim": "lats"}) + + if "checkpoint" in dim_format_in: + if "lon" in dset.variables: + dset = dset.rename_vars({"lon": "lons"}) + if "lat" in dset.variables: + dset = dset.rename_vars({"lat": "lats"}) + + if "lons" in dset.variables: + dset.lons.attrs = { + "standard_name": "longitude", + "long_name": "Longitude", + "units": "degrees_east" + } + + if "lats" in dset.variables: + dset.lats.attrs = { + "standard_name": "latitude", + "long_name": "latitude", + "units": "degrees_north" + } + + # ================================================================== + # For "diagnostic" dimension format only + # ================================================================== + if "diagnostic" in dim_format_out: + + # Add "fake" Xdim and Ydim coordinates as done by MAPL, + # which is needed for the GMAO GrADS visualization software. + # NOTE: Use .values to convert to numpy.ndarray type in + # order to avoid xarray from trying to redefine dim "nf". + if "lons" in dset.coords and "lats" in dset.coords: + dset = dset.assign_coords({ + "Xdim": dset.lons.isel(nf=0, Ydim=0).values, + "Ydim": dset.lats.isel(nf=0, Xdim=0).values + }) + elif "lon" in dset.variables and "lat" in dset.variables: + dset = dset.assign_coords({ + "Xdim": dset.lon.isel(nf=0, Ydim=0).values, + "Ydim": dset.lat.isel(nf=0, Xdim=0).values + }) + dset.Xdim.attrs = { + "long_name": "Fake Longitude for GrADS Compatibility", + "units": "degrees_east" + } + dset.Ydim.attrs = { + "long_name": "Fake Latitude for GrADS Compatibility", + "units": "degrees_north" + } + + # Drop dimensions that may be left over from regridding + if "lon" in dset.variables: + dset = dset.drop_vars("lon") + if "lat" in dset.variables: + dset = dset.drop_vars("lat") + + # ================================================================== + # For "checkpoint" dimension format only + # ================================================================== + if "checkpoint" in dim_format_out: + + # Reshape the grid from (time, lev, nf, Xdim, Ydim) dimensions + # to (time, lev, lat, lon) dimensions (where lat/lon = 6) + # Also drop any unnecessary variables + dset = reshape_cssg_diag_to_chkpt(dset) + if "lons" in dset.variables: + dset = dset.drop_vars("lons") + if "lats" in dset.variables: + dset = dset.drop_vars("lats") return dset def drop_and_rename_classic_vars(dset, towards_gchp=True): """ - Renames and drops certain restart variables according to GEOS-Chem Classic - and GCHP conventions. + Renames and drops certain restart variables according to + GEOS-Chem Classic and GCHP conventions. Args: - ds: xarray.Dataset - The input dataset + ----- + dset : xarray.Dataset + The input dataset. Keyword Args (optional): - towards_gchp: bool - Whether going to (True) or from (False) GCHP format - Default value: True + ------------------------ + towards_gchp: bool + Whether going to (True) or from (False) GCHP format. + Default value: True Returns: - xarray.Dataset - Input dataset with variables renamed and dropped + -------- + dset : xarray.Dataset + The modified dataset. """ with xr.set_options(keep_attrs=True): if towards_gchp: dset = dset.rename( - {name: name.replace('Met_', '', 1).replace('Chem_', '', 1) + {name: name.replace("Met_", "", 1).replace("Chem_", "", 1) for name in list(dset.data_vars) - if name.startswith('Met_') or name.startswith('Chem_')}) - if 'DELPDRY' in list(dset.data_vars): - dset = dset.rename({'DELPDRY': 'DELP_DRY'}) + if name.startswith("Met_") or name.startswith("Chem_")}) + if "DELPDRY" in list(dset.data_vars): + dset = dset.rename({"DELPDRY": "DELP_DRY"}) dset = dset.drop_vars( - ['P0', - 'hyam', - 'hybm', - 'hyai', - 'hybi', - 'AREA', - 'ilev', - 'PS1DRY', - 'PS1WET', - 'TMPU1', - 'SPHU1', - 'StatePSC', - 'lon_bnds', - 'lat_bnds' + ["P0", + "hyam", + "hybm", + "hyai", + "hybi", + "AREA", + "ilev", + "PS1DRY", + "PS1WET", + "TMPU1", + "SPHU1", + "StatePSC", + "lon_bnds", + "lat_bnds" ], - errors='ignore' + errors="ignore" ) else: renames = { - 'DELP_DRY': 'Met_DELPDRY', - 'BXHEIGHT': 'Met_BXHEIGHT', - 'TropLev': 'Met_TropLev', - 'DryDepNitrogen': 'Chem_DryDepNitrogen', - 'WetDepNitrogen': 'Chem_WetDepNitrogen', - 'H2O2AfterChem': 'Chem_H2O2AfterChem', - 'SO2AfterChem': 'Chem_SO2AfterChem', - 'KPPHvalue': 'Chem_KPPHvalue' + "DELP_DRY": "Met_DELPDRY", + "BXHEIGHT": "Met_BXHEIGHT", + "TropLev": "Met_TropLev", + "DryDepNitrogen": "Chem_DryDepNitrogen", + "WetDepNitrogen": "Chem_WetDepNitrogen", + "H2O2AfterChem": "Chem_H2O2AfterChem", + "SO2AfterChem": "Chem_SO2AfterChem", + "KPPHvalue": "Chem_KPPHvalue" } data_vars = list(dset.data_vars) new_renames = renames.copy() for items in renames.items(): if items[0] not in data_vars: - del(new_renames[items[0]]) + del new_renames[items[0]] dset = dset.rename(new_renames) - return rename_restart_variables(dset, towards_gchp=towards_gchp) + return rename_restart_variables( + dset, + towards_gchp=towards_gchp + ) + + +def order_dims_time_lev_lat_lon(dset): + """ + Transposes dims of an Dataset to be in (time, lev, lat, lon) order. + This corresponds to Fortran column-major ordering. + + + Args: + ----- + dset : xarray.Dataset + The input dataset. + + Returns: + -------- + dset : xarray.Dataset + The modified dataset. + """ + verify_variable_type(dset, xr.Dataset) + + if "lev" in dset.dims and "time" in dset.dims: + dset = dset.transpose("time", "lev", "lat", "lon") + elif "lev" in dset.dims: + dset = dset.transpose("lev", "lat", "lon") + elif "time" in dset.dims: + dset = dset.transpose("time", "lat", "lon") + else: + dset = dset.transpose("lat", "lon") + + return dset + + +def reshape_cssg_diag_to_chkpt( + dset, + verbose=False +): + """ + Reshapes a dataset from diagnostic to checkpoint dimension format. + + Args: + ----- + dset : xarray.Dataset + Dataset with dimensions (time, lev, nf, Xdim, Ydim). + + Keyword Args (optional) + ----------------------- + verbose : bool + Toggles verbose output on (True) or off (False). + + Returns: + -------- + dset : xarray.Dataset + Dataset wtih dimensions (time, lev, lat, lon), where lat/lon=6. + """ + verify_variable_type(dset, xr.Dataset) + + if verbose: + print("file_regrid.py: reshyaping diagnostic to checkpoint") + + # Keep xarray attributes unchanged + with xr.set_options(keep_attrs=True): + + # ============================================================== + # Get the size of the Xdim/YDim or lons/lats coords + # ============================================================== + if "Xdim" in dset.dims and "Ydim" in dset.dims: + xdim = dset.dims["Xdim"] + ydim = dset.dims["Ydim"] + elif "lon" in dset.dims and "lat" in dset.dims: + xdim = dset.dims["lon"] + ydim = dset.dims["lat"] + else: + msg = "Dimensions (Xdim, Ydim) or (lon,lat)' not found!" + raise ValueError(msg) + + # ============================================================== + # Create the "lon" coord as a 1-D vector of values + # ============================================================== + if "Xdim" in dset.dims: + dset = dset.rename_dims({"Xdim": "lon"}) + elif "lon" in dset.coords: + dset = dset.drop_vars("lon") + dset = dset.assign_coords({ + "lon": np.linspace(1, xdim, xdim, dtype=np.float64) + }) + + # ============================================================== + # The dset,stack operation combines the nf and Ydim + # dimensions into a MultiIndex (i.e. a list of tuples, + # where each tuple is (face number, cell number). + # We then have to unpack that into a linear list that + # ranges from 1..nf*ydim. + # ============================================================== + if "nf" in dset.dims and "Ydim" in dset.dims: + dset = dset.stack(lat=("nf", "Ydim")) + multi_index_list = dset.lat.values + lats = np.zeros(6 * ydim) # 6 cubed-sphere faces + for i, tpl in enumerate(multi_index_list): + lats[i] = (tpl[1] + (tpl[0] * ydim)) + 1 + dset = dset.assign_coords({"lat": lats}) + + # ============================================================== + # Transpose dimensions + # ============================================================== + dset = order_dims_time_lev_lat_lon(dset) + + # ============================================================== + # Drop coordinates not needed in checkpoint format files + # ============================================================== + if "lons" in dset.variables: + dset = dset.drop_vars("lons") + if "lats" in dset.variables: + dset = dset.drop_vars("lats") + + return dset def main(): @@ -755,86 +1385,98 @@ def main(): --ll_res_out Resolution for the output file in 'latxlon` format - --vert_params_out - Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format + --verbose + Toggles verbose printout on (True) or off (False). + + -w --weightsdir + Directory where regridding weights are stored (or will be created) """ - # Parse arguments from the command line + # Tell parser which arguments to expect parser = argparse.ArgumentParser( - description='General cubed-sphere to cubed-sphere regridder.' + description="General cubed-sphere to cubed-sphere regridder." ) parser.add_argument( - '-i', '--filein', - metavar='FIN', + "-i", "--filein", + metavar="FIN", type=str, required=True, - help='input NetCDF file' + help="input NetCDF file" ) parser.add_argument( - '-o', '--fileout', - metavar='FOUT', + "-o", "--fileout", + metavar="FOUT", type=str, required=True, - help='name of output file' + help="name of output file" ) parser.add_argument( - '--sg_params_in', - metavar='P', + "--sg_params_in", + metavar="P", type=float, nargs=3, default=[1.0, 170.0, -90.0], - help='input grid stretching parameters (stretch-factor, target longitude, target latitude)' + help="input grid stretching parameters (stretch-factor, target longitude, target latitude)" ) parser.add_argument( - '--sg_params_out', - metavar='P', + "--sg_params_out", + metavar="P", type=float, nargs=3, default=[1.0, 170.0, -90.0], - help='output grid stretching parameters (stretch-factor, target longitude, target latitude)' + help="output grid stretching parameters (stretch-factor, target longitude, target latitude)" ) parser.add_argument( - '--cs_res_out', - metavar='RES', + "--cs_res_out", + metavar="RES", type=int, required=False, - help='output grid\'s cubed-sphere resolution' + help="output grid\"s cubed-sphere resolution" ) parser.add_argument( - '--ll_res_out', - metavar='RES', + "--ll_res_out", + metavar="RES", type=str, required=False, - help='output grid\'s lat/lon resolution in \'latxlon\' format' + help="output grid\"s lat/lon resolution in \"latxlon\" format" ) parser.add_argument( - '--dim_format_in', - metavar='WHICH', + "--dim_format_in", + metavar="WHICH", type=str, choices=[ - 'checkpoint', - 'diagnostic', - 'classic'], + "checkpoint", + "diagnostic", + "classic"], required=True, - help='format of the input file\'s dimensions (choose from: checkpoint, diagnostic)' + help="format of the input file's dimensions (choose from: checkpoint, diagnostic)" ) parser.add_argument( - '--dim_format_out', - metavar='WHICH', + "--dim_format_out", + metavar="WHICH", type=str, choices=[ - 'checkpoint', - 'diagnostic', - 'classic'], + "checkpoint", + "diagnostic", + "classic"], required=True, - help='format of the output file\'s dimensions (choose from: checkpoint, diagnostic)' + help="format of the output file's dimensions (choose from: checkpoint, diagnostic)" ) parser.add_argument( - '--vert_params_out', - metavar='VERT', - type=list, - required=False, - help='Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format' + "--verbose", + metavar="VERB", + type=bool, + nargs=1, + default=False, + help="Toggles verbose output on (True) or off (False)" + ) + parser.add_argument( + "-w", "--weightsdir", + metavar="WGT", + type=str, + nargs=1, + default=False, + help="Directory where regridding weights are found (or will be created)" ) args = parser.parse_args() @@ -848,9 +1490,11 @@ def main(): args.ll_res_out, args.sg_params_in, args.sg_params_out, - args.vert_params_out) + args.verbose, + args.weightsdir + ) # Only call when run as standalone -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/gcpy/grid.py b/gcpy/grid.py index c4d8be5e..428ebc5b 100644 --- a/gcpy/grid.py +++ b/gcpy/grid.py @@ -1,10 +1,14 @@ +""" +Module containing variables and functions that define and +manipulate GEOS-Chem horizontal and vertical grids +""" import xarray as xr import numpy as np import scipy.sparse from itertools import product -from .util import get_shape_of_data +from gcpy.util import get_shape_of_data, verify_variable_type from .grid_stretching_transforms import scs_transform -from .constants import R_EARTH_m +from gcpy.constants import R_EARTH_m def get_troposphere_mask(ds): @@ -165,7 +169,7 @@ def call_make_grid(res, gridtype, in_extent=[-180, 180, -90, 90], Returns: [grid, grid_list]: list(dict, list(dict)) - Returns the created grid. + Returns the created grid. grid_list is a list of grids if gridtype is 'cs', else it is None """ @@ -288,6 +292,110 @@ def get_vert_grid(dataset, AP=[], BP=[]): return new_grid.p_edge(), new_grid.p_mid(), np.size(AP) +def get_ilev_coord( + n_lev=72, + AP_edge=None, + BP_edge=None, + top_down=False +): + """ + Returns the eta values (defined as (A/P0) + B) at vertical + level edges. These are used to define the "ilev" netCDF + coordinate variable. + + Keyword Args (optional): + ------------------------ + n_lev : int + Number of levels in the grid. Default = 72 + AP_edge : list-like + Hybrid grid parameter A (hPa), with values placed on level + edges. If not specified, values from the _GEOS_72L_AP array + in this module will be used. + AP_edge : list-like + Hybrid grid parameter B (unitless), with values placed on level + edges. If not specified, values from the _GEOS_72L_BP array in + this module will be used. + top_down : bool + Set this to true if the eta coordinate will be arranged from + top-of-atm downward (True) or from the surface upward (False). + + Returns: + -------- + ilev : numpy.ndarray + List of eta values at vertical grid edges + """ + if n_lev is None: + n_lev = 72 + if AP_edge is None and n_lev == 72: + AP_edge = _GEOS_72L_AP + if AP_edge is None and n_lev == 47: + AP_edge = _GEOS_47L_AP + if BP_edge is None and n_lev == 72: + BP_edge = _GEOS_72L_BP + if BP_edge is None and n_lev == 47: + BP_edge = _GEOS_47L_BP + + # Get eta values at vertical level edges + ilev = np.array((AP_edge/1000.0) + BP_edge, dtype=np.float64) + if top_down: + ilev = ilev[::-1] + return ilev + +def get_lev_coord( + n_lev=72, + AP_edge=None, + BP_edge=None, + top_down=False +): + """ + Returns the eta values (defined as (A/P0) + B) at vertical + level midpoints. These are used to define the "lev" + netCDF coordinate variable. + + Keyword Args (optional): + ------------------------ + n_lev : int + Number of levels in the grid. Default = 72 + AP_edge : list-like + Hybrid grid parameter A (hPa), with values placed on level + edges. If not specified, values from the _GEOS_72L_AP array + in this module will be used. + AP_edge : list-like + Hybrid grid parameter B (unitless), with values placed on level + edges. If not specified, values from the _GEOS_72L_BP array in + this module will be used. + top_down : bool + Set this to true if the eta coordinate will be arranged from + top-of-atm downward (True) or from the surface upward (False). + + Returns: + -------- + lev : numpy.ndarray + List of eta values at vertical grid midpoints + """ + if n_lev is None: + n_lev = 72 + if AP_edge is None and n_lev == 72: + AP_edge = _GEOS_72L_AP + if AP_edge is None and n_lev == 47: + AP_edge = _GEOS_47L_AP + if BP_edge is None and n_lev == 72: + BP_edge = _GEOS_72L_BP + if BP_edge is None and n_lev == 47: + BP_edge = _GEOS_47L_BP + + # Compute AP, BP at midpoints. + # Convert inputs to numpy.ndarray for fast computation + AP_edge = np.array(AP_edge) + BP_edge = np.array(BP_edge) + AP_mid = (AP_edge[0:n_lev:1] + AP_edge[1:n_lev+1:1]) * 0.5 + BP_mid = (BP_edge[0:n_lev:1] + BP_edge[1:n_lev+1:1]) * 0.5 + lev = np.array((AP_mid / 1000.0) + BP_mid, dtype=np.float64) + if top_down: + lev = lev[::-1] + return lev + + def get_pressure_indices(pedge, pres_range): """ Get indices where edge pressure values are within a given pressure range @@ -1121,7 +1229,7 @@ def _initialize(self): pp[:, i, j] = latlon_to_cartesian( lambda_rad[i, j], theta_rad[i, j]) - # Map the edges on the sphere back to the cube. + # Map the edges on the sphere back to the cube. #Note that all intersections are at x = -rsq3 # print("EDGES") for ij in range(1, c + 1): diff --git a/gcpy/regrid.py b/gcpy/regrid.py index 09fbe4db..3e9e4fc3 100644 --- a/gcpy/regrid.py +++ b/gcpy/regrid.py @@ -1,20 +1,25 @@ -''' Functions for creating xesmf regridder objects ''' - +""" +Module containing functions for creating xESMF regridder objects. +""" import os -import xesmf as xe -from .grid import make_grid_LL, make_grid_CS, make_grid_SG, get_input_res, call_make_grid, \ - get_grid_extents, get_vert_grid +import warnings import hashlib +import xesmf as xe import numpy as np import xarray as xr import pandas as pd import scipy.sparse -import warnings +from gcpy.grid import make_grid_LL, make_grid_CS, make_grid_SG, \ + get_input_res, call_make_grid, get_grid_extents, get_vert_grid def make_regridder_L2L( - llres_in, llres_out, weightsdir='.', reuse_weights=False, + llres_in, + llres_out, + weightsdir='.', + reuse_weights=False, in_extent=[-180, 180, -90, 90], - out_extent=[-180, 180, -90, 90]): + out_extent=[-180, 180, -90, 90] +): """ Create an xESMF regridder between two lat/lon grids @@ -64,11 +69,11 @@ def make_regridder_L2L( weightsfile = os.path.join( weightsdir, 'conservative_{}_{}_{}_{}.nc'.format( llres_in, llres_out, in_extent_str, out_extent_str)) - + if not os.path.isfile(weightsfile) and reuse_weights: #prevent error with more recent versions of xesmf reuse_weights=False - + try: regridder = xe.Regridder( llgrid_in, @@ -86,8 +91,13 @@ def make_regridder_L2L( return regridder -def make_regridder_C2L(csres_in, llres_out, weightsdir='.', - reuse_weights=True, sg_params=[1, 170, -90]): +def make_regridder_C2L( + csres_in, + llres_out, + weightsdir='.', + reuse_weights=True, + sg_params=[1, 170, -90] +): """ Create an xESMF regridder from a cubed-sphere to lat/lon grid @@ -132,7 +142,7 @@ def make_regridder_C2L(csres_in, llres_out, weightsdir='.', else: weights_fname = f'conservative_sg{sg_hash(csres_in, sf_in, tlat_in, tlon_in)}_ll{llres_out}_F{i}.nc' weightsfile = os.path.join(weightsdir, weights_fname) - + if not os.path.isfile(weightsfile) and reuse_weights: #prevent error with more recent versions of xesmf reuse_weights=False @@ -239,8 +249,13 @@ def make_regridder_S2S( return regridder_list -def make_regridder_L2S(llres_in, csres_out, weightsdir='.', - reuse_weights=True, sg_params=[1, 170, -90]): +def make_regridder_L2S( + llres_in, + csres_out, + weightsdir='.', + reuse_weights=True, + sg_params=[1, 170, -90] +): """ Create an xESMF regridder from a lat/lon to a cubed-sphere grid @@ -309,9 +324,15 @@ def make_regridder_L2S(llres_in, csres_out, weightsdir='.', def create_regridders( - refds, devds, weightsdir='.', reuse_weights=True, cmpres=None, - zm=False, sg_ref_params=[1, 170, -90], - sg_dev_params=[1, 170, -90]): + refds, + devds, + weightsdir='.', + reuse_weights=True, + cmpres=None, + zm=False, + sg_ref_params=[1, 170, -90], + sg_dev_params=[1, 170, -90] +): """ Internal function used for creating regridders between two datasets. Follows decision logic needed for plotting functions. @@ -328,19 +349,22 @@ def create_regridders( Directory in which to create xESMF regridder NetCDF files Default value: '.' reuse_weights: bool - Set this flag to True to reuse existing xESMF regridder NetCDF files - Default value: False + Set this flag to True to reuse existing xESMF regridder + NetCDF files. Default value: False cmpres: int or str - Specific target resolution for comparison grid used in difference and ratio plots - Default value: None (will follow logic chain below) + Specific target resolution for comparison grid used in + difference and ratio plots. Default value: None (will + follow logic chain below) zm: bool - Set this flag to True if regridders will be used in zonal mean plotting - Default value: False - sg_ref_params: list[float, float, float] (stretch_factor, target_longitude, target_latitude) + Set this flag to True if regridders will be used in zonal mean + plotting. Default value: False + sg_ref_params: list[float, float, float] + (stretch_factor, target_longitude, target_latitude) Ref grid stretched-grid parameters in the format [stretch_factor, target_longitude, target_latitude]. Default value: [1, 170, -90] (no stretching) - sg_dev_params: list[float, float, float] (stretch_factor, target_longitude, target_latitude) + sg_dev_params: list[float, float, float] + (stretch_factor, target_longitude, target_latitude) Dev grid stretched-grid parameters in the format [stretch_factor, target_longitude, target_latitude]. Default value: [1, 170, -90] (no stretching) @@ -359,8 +383,9 @@ def create_regridders( Regridder object between refgrid or devgrid and cmpgrid (will be None if input grid is not lat/lon) refregridder_list, devregridder_list: list[6 xESMF regridders] - List of regridder objects for each face between refgrid or devgrid and cmpgrid - (will be None if input grid is not cubed-sphere) + List of regridder objects for each face between refgrid + or devgrid and cmpgrid (will be None if input grid is + not cubed-sphere) """ # Take two lat/lon or cubed-sphere xarray datasets and regrid them if @@ -682,13 +707,18 @@ def regrid_comparison_data( new_data = reformat_dims( new_data, format=data_format, towards_common=False) return new_data - else: - return data + return data -def reformat_dims(ds, format, towards_common): + +def reformat_dims( + ds, + format, + towards_common +): """ - Reformat dimensions of a cubed-sphere / stretched-grid grid between different GCHP formats + Reformat dimensions of a cubed-sphere / stretched-grid grid + between different GCHP formats Args: ds: xarray Dataset @@ -713,7 +743,7 @@ def unravel_checkpoint_lat(ds_in): np.linspace(1, 6, 6), np.linspace(1, cs_res, cs_res) ]) - ds_in = ds_in.assign_coords({'lat': mi}) + ds_in = ds_in.assign_coords({"lat": mi}) ds_in = ds_in.unstack('lat') return ds_in @@ -724,7 +754,7 @@ def ravel_checkpoint_lat(ds_out): cs_res = ds_out['lon'].size ds_out = ds_out.stack(lat=['lat_level_0', 'lat_level_1']) ds_out = ds_out.assign_coords({ - 'lat': np.linspace(1, 6 * cs_res, 6 * cs_res) + 'lat': np.linspace(1, 6 * cs_res, 6 * cs_res), }) return ds_out @@ -749,9 +779,11 @@ def ravel_checkpoint_lat(ds_out): 'Ydim': 'Y', 'time': 'T', }, - 'transpose': ('time', 'lev', 'nf', 'Ydim', 'Xdim') + 'transpose': ('time', 'lev', 'nf', 'Xdim', 'Ydim') } } + + # %%%% Renaming toward the common format %%%% if towards_common: # Unravel dimensions for unravel_callback in dim_formats[format].get('unravel', []): @@ -759,31 +791,32 @@ def ravel_checkpoint_lat(ds_out): # Rename dimensions ds = ds.rename(dim_formats[format].get('rename', {})) - - return ds - else: - # Reverse rename - ds = ds.rename( - {v: k for k, v in dim_formats[format].get('rename', {}).items()}) - - # Ravel dimensions - for ravel_callback in dim_formats[format].get('ravel', []): - ds = ravel_callback(ds) - - # Transpose - if len(ds.dims) == 5 or (len(ds.dims) == 4 and 'lev' in list( - ds.dims) and 'time' in list(ds.dims)): - # full dim dataset - ds = ds.transpose(*dim_formats[format].get('transpose', [])) - elif len(ds.dims) == 4: - # single time - ds = ds.transpose(*dim_formats[format].get('transpose', [])[1:]) - elif len(ds.dims) == 3: - # single level / time - ds = ds.transpose(*dim_formats[format].get('transpose', [])[2:]) return ds + # %%%% Renaming from the common format %%%% + # Reverse rename + ds = ds.rename( + {v: k for k, v in dim_formats[format].get('rename', {}).items()}) + + # Ravel dimensions + for ravel_callback in dim_formats[format].get('ravel', []): + ds = ravel_callback(ds) + + # Transpose + if len(ds.dims) == 5 or (len(ds.dims) == 4 and 'lev' in list( + ds.dims) and 'time' in list(ds.dims)): + # full dim dataset + ds = ds.transpose(*dim_formats[format].get('transpose', [])) + elif len(ds.dims) == 4: + # single time + ds = ds.transpose(*dim_formats[format].get('transpose', [])[1:]) + elif len(ds.dims) == 3: + # single level / time + ds = ds.transpose(*dim_formats[format].get('transpose', [])[2:]) + return ds + + def sg_hash( cs_res, stretch_factor: float, @@ -797,13 +830,19 @@ def sg_hash( cs_res=cs_res).encode()).hexdigest()[ :7] -def regrid_vertical_datasets(ref, dev, target_grid_choice='ref', ref_vert_params=[[],[]], - dev_vert_params=[[],[]], target_vert_params=[[],[]]): +def regrid_vertical_datasets( + ref, + dev, + target_grid_choice='ref', + ref_vert_params=[[],[]], + dev_vert_params=[[],[]], + target_vert_params=[[],[]] +): """ - Perform complete vertical regridding of GEOS-Chem datasets to - the vertical grid of one of the datasets or an entirely different + Perform complete vertical regridding of GEOS-Chem datasets to + the vertical grid of one of the datasets or an entirely different vertical grid. - + Args: ref: xarray.Dataset First dataset @@ -814,15 +853,15 @@ def regrid_vertical_datasets(ref, dev, target_grid_choice='ref', ref_vert_params unless target_vert_params is provided Default value: 'ref' ref_vert_params (optional): list(list, list) of list-like types - Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. + Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. Needed if ref grid is not 47 or 72 levels Default value: [[], []] dev_vert_params (optional): list(list, list) of list-like types - Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. + Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. Needed if dev grid is not 47 or 72 levels Default value: [[], []] target_vert_params (optional): list(list, list) of list-like types - Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. + Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. Will override target_grid_choice as target grid Default value: [[], []] Returns: @@ -835,30 +874,40 @@ def regrid_vertical_datasets(ref, dev, target_grid_choice='ref', ref_vert_params # Get mid-point pressure and edge pressures for this grid ref_pedge, ref_pmid, _ = get_vert_grid(ref, *ref_vert_params) dev_pedge, dev_pmid, _ = get_vert_grid(dev, *dev_vert_params) - + new_ref, new_dev = ref, dev - + if len(ref_pedge) != len(dev_pedge) or target_vert_params != [[],[]]: if target_vert_params != [[],[]]: #use a specific target grid for regridding if passed target_grid = vert_grid(*target_vert_params) - target_pedge, target_pmid = target_grid.p_edge(), target_grid.p_mid() + target_pedge = target_grid.p_edge() + target_pmid = target_grid.p_mid() elif target_grid_choice == 'ref': - target_pedge, target_pmid = ref_pedge, ref_pmid + target_pedge = ref_pedge + target_pmid = ref_pmid else: - target_pedge, target_pmid = dev_pedge, dev_pmid - - def regrid_one_vertical_dataset(ds, ds_pedge, target_pedge, target_pmid): + target_pedge = dev_pedge + target_pmid = dev_pmid + + def regrid_one_vertical_dataset( + ds, + ds_pedge, + target_pedge, + target_pmid + ): new_ds = ds if len(ds_pedge) != len(target_pedge): #regrid all 3D (plus possible time dimension) variables xmat_ds = gen_xmat(ds_pedge, target_pedge) - regrid_variables = [v for v in ds.data_vars if (("lat" in ds[v].dims or "Xdim" in ds[v].dims) - and ("lon" in ds[v].dims or "Ydim" in ds[v].dims) - and ("lev" in ds[v].dims))] + regrid_variables = [v for v in ds.data_vars if ( + ("lat" in ds[v].dims or "Xdim" in ds[v].dims) and \ + ("lon" in ds[v].dims or "Ydim" in ds[v].dims) and \ + ("lev" in ds[v].dims) + )] new_ds = xr.Dataset() #currently drop data vars that have lev but don't also have x and y coordinates - for v in (set(ds.data_vars)-set(regrid_variables)): + for v in (set(ds.data_vars)-set(regrid_variables)): if 'lev' not in ds[v].dims: new_ds[v] = ds[v] new_ds.attrs = ds.attrs @@ -866,15 +915,29 @@ def regrid_one_vertical_dataset(ds, ds_pedge, target_pedge, target_pmid): if "time" in ds[v].dims: new_ds_temp = [] for time in range(len(ds[v].time)): - new_ds_v = regrid_vertical(ds[v].isel(time=time), xmat_ds, target_pmid) + new_ds_v = regrid_vertical( + ds[v].isel(time=time), + xmat_ds, + target_pmid + ) new_ds_temp.append(new_ds_v.expand_dims("time")) new_ds[v] = xr.concat(new_ds_temp, "time") else: new_ds[v] = regrid_vertical(ds[v], xmat, target_pmid) return new_ds - - new_ref = regrid_one_vertical_dataset(ref, ref_pedge, target_pedge, target_pmid) - new_dev = regrid_one_vertical_dataset(dev, dev_pedge, target_pedge, target_pmid) + + new_ref = regrid_one_vertical_dataset( + ref, + ref_pedge, + target_pedge, + target_pmid + ) + new_dev = regrid_one_vertical_dataset( + dev, + dev_pedge, + target_pedge, + target_pmid + ) return new_ref, new_dev @@ -949,7 +1012,7 @@ def regrid_vertical(src_data_3D, xmat_regrid, target_levs=[]): new_coords['lons'] = ( ('lat', 'lon'), src_data_3D.coords['lons'].data) out_data = xr.DataArray(out_data, - dims=tuple([dim for dim in src_data_3D.dims]), + dims=tuple(list(src_data_3D.dims)), coords=new_coords, attrs=src_data_3D.attrs) @@ -997,7 +1060,6 @@ def gen_xmat(p_edge_from, p_edge_to): while p_edge_to[i_to + 1] > p_edge_from[0]: i_to += 1 - frac_to_total = 0.0 i_weight = 0 for i_from in range(first_from, n_from): @@ -1007,7 +1069,6 @@ def gen_xmat(p_edge_from, p_edge_to): # Climb the "to" pressures until you intersect with this box while i_to < n_to and p_base_from <= p_edge_to[i_to + 1]: i_to += 1 - frac_to_total = 0.0 # Now, loop over output layers as long as there is any overlap, # i.e. as long as the base of the "to" layer is below the diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py index aaecec0c..c0c96878 100644 --- a/gcpy/regrid_restart_file.py +++ b/gcpy/regrid_restart_file.py @@ -277,12 +277,14 @@ def rename_variables(dataset, to_gchp=True): return dataset.rename(rename_dict) -def reverse_lev(dataset): +def reverse_lev(dataset, to_gchp): """ - Reverse the level index of the passed dataset. + Reverse the level index of the passed dataset and adjusts the + "lev:positive" attribute index accordingly. Args: dataset (xarray.Dataset): The dataset to have its level index reversed. + to_gchp (bool): True if we are saving out a GCHP restart file. Returns: xarray.Dataset: The input dataset with a reversed level index. @@ -290,6 +292,17 @@ def reverse_lev(dataset): logging.info("Reversing coordinate 'lev'") dataset = dataset.reindex(lev=dataset.lev[::-1]) dataset = dataset.assign_coords(lev=dataset.lev.values[::-1]) + + # GCHP restart files are indexed from top-of-atm downward. + # GCClassic restart files are indexed from surface upward. + # + # TODO: Make this more robust, to prevent a situation where + # the already down data is flipped to up, but labeled as down. + if to_gchp: + dataset["lev"].attrs["positive"] = "down" + else: + dataset["lev"].attrs["positive"] = "up" + return dataset @@ -503,7 +516,7 @@ def regrid_restart_file( if is_conversion: to_gchp = output_is_gchp dataset = rename_variables(dataset, to_gchp) - dataset = reverse_lev(dataset) + dataset = reverse_lev(dataset, to_gchp) dataset, output_template = drop_variables(dataset, output_template) dataset = regrid(dataset, output_template, weights_file=regrid_weights) @@ -520,6 +533,8 @@ def regrid_restart_file( "Error when processing your stretched-grid parameters - are they correct?" ) from exception + dataset = check_lev_attribute(output_is_gchp) + dataset.to_netcdf("new_restart_file.nc") info_message = "Wrote 'new_restart_file.nc' with %d variables" diff --git a/gcpy/util.py b/gcpy/util.py index a895c4aa..a8fced06 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -2,7 +2,6 @@ Internal utilities for helping to manage xarray and numpy objects used throughout GCPy """ - import os import warnings import shutil @@ -637,26 +636,24 @@ def add_missing_variables( return refdata, devdata -def reshape_MAPL_CS( - darr, - multi_index_lat=True -): +def reshape_MAPL_CS(darr): """ Reshapes data if contains dimensions indicate MAPL v1.0.0+ output - Args: - darr: xarray DataArray - Data array variable + (i.e. reshapes from "diagnostic" to "checkpoint" dimension format.) - Keyword Args (Optional): - multi_index_lat : bool - Determines if the returned "lat" index of the DataArray - object will be a MultiIndex (true) or a simple list of - latitude values (False). - Default value: True + Args: + ----- + darr: xarray DataArray + The input data array. Returns: - data: xarray DataArray - Data with dimensions renamed and transposed to match old MAPL format + -------- + darr: xarray DataArray + The modified data array (w/ dimensions renamed & transposed). + + Remarks: + -------- + Currently only used for GCPy plotting code. """ # Suppress annoying future warnings for now warnings.filterwarnings("ignore", category=FutureWarning) @@ -665,21 +662,10 @@ def reshape_MAPL_CS( # (otherwise just fall through and return the original argument as-is) if isinstance(darr, xr.DataArray): with xr.set_options(keep_attrs=True): - if "nf" in darr.dims and "Xdim" in darr.dims and "Ydim" in darr.dims: + if "nf" in darr.dims and \ + "Xdim" in darr.dims and "Ydim" in darr.dims: darr = darr.stack(lat=("nf", "Ydim")) darr = darr.rename({"Xdim": "lon"}) - # NOTE: The darr.stack operation will return the darr.lat - # dimension as a MultiIndex. In other words, each - # element of da.lat is a tuple (face number, latitude - # in degrees). To disable this behavior, set keyword - # argument multi_index_lat=False. This will return - # da.lat as an array of latitude values, which is - # needed for backwards compatibility. - # -- Bob Yantosca (07 Jul 2023) - if not multi_index_lat: - darr = darr.assign_coords( - {"lat": [list(tpl)[1] for tpl in darr.lat.values]} - ) if "lev" in darr.dims and "time" in darr.dims: darr = darr.transpose("time", "lev", "lat", "lon") elif "lev" in darr.dims: