From b599c35696be563437cfa3757bf8bdd831ebab30 Mon Sep 17 00:00:00 2001
From: Gabriele Bozzola <sbozzolator@gmail.com>
Date: Fri, 6 Dec 2024 09:22:12 -0800
Subject: [PATCH] Restructure output directory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

And don't use `atmos.p.output_dir`

The output structure looks like this:
```
coupler_output_dir_amip/
├── checkpoints
│       └── checkpoints for the various models
├── output_0000/
│   ├── atmos/
│   │   └── output of the atmos model
│   └── ocean/
│       └── output of the ocean model
├── output_0001/
│   └── ... component model outputs in their folders ...
├── output_0002/
│   └── ... component model outputs in their folders ...
└── output_active -> output_0002/
```
---
 NEWS.md                                       | 29 +++++++++---
 Project.toml                                  |  2 +
 .../components/atmosphere/climaatmos.jl       |  6 +--
 .../ClimaEarth/hierarchy/climate_plots.jl     |  2 +-
 experiments/ClimaEarth/run_amip.jl            | 39 ++++++++--------
 .../ClimaEarth/run_cloudless_aquaplanet.jl    | 17 +++----
 .../ClimaEarth/run_cloudy_aquaplanet.jl       | 17 +++----
 .../ClimaEarth/run_cloudy_slabplanet.jl       | 18 ++++----
 experiments/ClimaEarth/run_dry_held_suarez.jl | 17 +++----
 .../ClimaEarth/run_moist_held_suarez.jl       | 18 ++++----
 src/Utilities.jl                              | 44 ++++++++++++++-----
 11 files changed, 130 insertions(+), 79 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index f22f34dc5..ade665753 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -27,14 +27,33 @@ As a part of the post processing pipeline, bias plots for variables at the
 pressure levels of 850.0, 500.0, 250.0 hPa and bias plots over latitude and
 pressure levels are being created.
 
+### Code cleanup
 
+#### Output path updates - PRs [#1106](https://github.com/CliMA/ClimaCoupler.jl/pull/1058),
+    [#1106](https://github.com/CliMA/ClimaCoupler.jl/pull/1106)
 
-### Code cleanup
-#### Output path update - PR [#1058](https://github.com/CliMA/ClimaCoupler.jl/pull/1058)
 Previously, ClimaEarth simulation outputs were saved in a path
-`experiments/ClimaEarth/output/$mode_name/$job_id/artifacts/`.
-This PR removes `mode_name` has from this pattern, so output will now be in
-`experiments/ClimaEarth/output/$job_id/artifacts/`.
+`experiments/ClimaEarth/output/$mode_name/$job_id/artifacts/`. Now, `ClimaEarth`
+creates output folders with an increment (increasing the counter every time the
+simulation is run). This is in preparation to restarts. The output now looks
+like
+```
+coupler_output_dir_amip/
+├── checkpoints
+│       └── checkpoints for the various models
+├── artifacts
+│       └── plots produced by the postporcessing step
+├── output_0000/
+│   ├── atmos/
+│   │   └── output of the atmos model
+│   └── ocean/
+│       └── output of the ocean model
+├── output_0001/
+│   └── ... component model outputs in their folders ...
+├── output_0002/
+│   └── ... component model outputs in their folders ...
+└── output_active -> output_0002/
+``
 Note that any external scripts that assume an output path will need to be updated.
 
 #### Remove ClimaCoupler.Diagnostics module - PR [#953](https://github.com/CliMA/ClimaCoupler.jl/pull/953)
diff --git a/Project.toml b/Project.toml
index fb77ccfa5..4db0bfd46 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,6 +7,7 @@ version = "0.1.1"
 ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d"
 ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884"
 ClimaCoreTempestRemap = "d934ef94-cdd4-4710-83d6-720549644b70"
+ClimaUtilities = "b3f4f4ca-9299-4f7f-bd9b-81e1242a7513"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
@@ -20,6 +21,7 @@ Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c"
 ClimaComms = "0.5.6, 0.6"
 ClimaCore = "0.14.19"
 ClimaCoreTempestRemap = "0.3"
+ClimaUtilities = "0.1.14"
 Dates = "1"
 JLD2 = "0.4, 0.5"
 Logging = "1"
diff --git a/experiments/ClimaEarth/components/atmosphere/climaatmos.jl b/experiments/ClimaEarth/components/atmosphere/climaatmos.jl
index cad9f7ec4..9966385e0 100644
--- a/experiments/ClimaEarth/components/atmosphere/climaatmos.jl
+++ b/experiments/ClimaEarth/components/atmosphere/climaatmos.jl
@@ -293,7 +293,7 @@ FluxCalculator.get_surface_params(sim::ClimaAtmosSimulation) = CAP.surface_fluxe
 ### ClimaAtmos.jl model-specific functions (not explicitly required by ClimaCoupler.jl)
 ###
 """
-    get_atmos_config_dict(coupler_dict::Dict, job_id::String)
+    get_atmos_config_dict(coupler_dict::Dict, job_id::String, atmos_output_dir)
 
 Returns the specified atmospheric configuration (`atmos_config`) overwitten by arguments
 in the coupler dictionary (`config_dict`).
@@ -313,7 +313,7 @@ The TOML parameter file to use is chosen using the following priority:
 If a coupler TOML file is provided, it is used. Otherwise we use an atmos TOML
 file if it's provided. If neither is provided, we use a default coupler TOML file.
 """
-function get_atmos_config_dict(coupler_dict::Dict, job_id::String)
+function get_atmos_config_dict(coupler_dict::Dict, job_id::String, atmos_output_dir)
     atmos_config_file = coupler_dict["atmos_config_file"]
     atmos_config_repo = coupler_dict["atmos_config_repo"]
     # override default or specified configs with coupler arguments, and set the correct atmos config_file
@@ -357,7 +357,7 @@ function get_atmos_config_dict(coupler_dict::Dict, job_id::String)
     end
 
     # Specify atmos output directory to be inside the coupler output directory
-    atmos_output_dir = joinpath(coupler_dict["coupler_output_dir"], job_id, "clima_atmos")
+    atmos_config["output_dir_style"] = "RemovePreexisting"
     atmos_config["output_dir"] = atmos_output_dir
 
     # Access extra atmosphere diagnostics from coupler so we can rename for atmos code
diff --git a/experiments/ClimaEarth/hierarchy/climate_plots.jl b/experiments/ClimaEarth/hierarchy/climate_plots.jl
index 1bd3298a2..85463b13d 100644
--- a/experiments/ClimaEarth/hierarchy/climate_plots.jl
+++ b/experiments/ClimaEarth/hierarchy/climate_plots.jl
@@ -14,7 +14,7 @@ for job_id in ["dry_held_suarez", "moist_held_suarez"]
         DATA_DIR = "experiments/ClimaEarth/$job_id/$job_id/clima_atmos/output_active/"
     else
         build = ENV["BUILDKITE_BUILD_NUMBER"]
-        DATA_DIR = "/central/scratch/esm/slurm-buildkite/climacoupler-ci/$build/climacoupler-ci/$job_id/$job_id/clima_atmos/output_active/"
+        DATA_DIR = "/central/scratch/esm/slurm-buildkite/climacoupler-ci/$build/climacoupler-ci/$job_id/output_active/clima_atmos/"
     end
 
     reduction = "6h_inst"
diff --git a/experiments/ClimaEarth/run_amip.jl b/experiments/ClimaEarth/run_amip.jl
index 1bf77e411..504cf17cf 100644
--- a/experiments/ClimaEarth/run_amip.jl
+++ b/experiments/ClimaEarth/run_amip.jl
@@ -125,10 +125,25 @@ add_extra_diagnostics!(config_dict)
     plot_diagnostics,
 ) = get_coupler_args(config_dict)
 
+#=
+### I/O Directory Setup `setup_output_dirs` returns `dir_paths.output =
+COUPLER_OUTPUT_DIR`, which is the directory where the output of the simulation
+will be saved, `dir_paths.artifacts` is the directory where the plots (from
+postprocessing and the conservation checks) of the simulation will be saved,
+#and `dir_paths.checkpoints`, where restart files are saved.
+=#
+
+COUPLER_OUTPUT_DIR = joinpath(output_dir_root, job_id)
+dir_paths = Utilities.setup_output_dirs(output_dir = COUPLER_OUTPUT_DIR, comms_ctx = comms_ctx)
+@info "Coupler output directory $(dir_paths.output)"
+@info "Coupler artifacts directory $(dir_paths.artifacts)"
+@info "Coupler checkpoint directory $(dir_paths.checkpoints)"
+
 ## get component model dictionaries (if applicable)
 ## Note this step must come after parsing the coupler config dictionary, since
 ##  some parameters are passed from the coupler config to the component model configs
-atmos_config_dict = get_atmos_config_dict(config_dict, job_id)
+atmos_output_dir = joinpath(dir_paths.output, "clima_atmos")
+atmos_config_dict = get_atmos_config_dict(config_dict, job_id, atmos_output_dir)
 (; dt_rad, output_default_diagnostics) = get_atmos_args(atmos_config_dict)
 
 ## set unique random seed if desired, otherwise use default
@@ -137,18 +152,6 @@ Random.seed!(random_seed)
 
 tspan = (t_start, t_end)
 
-#=
-### I/O Directory Setup
-`Utilities.setup_output_dirs` returns `dir_paths.output = COUPLER_OUTPUT_DIR`, which is the directory where the output of the simulation will be saved, and `dir_paths.artifacts` is the directory where
-the plots (from postprocessing and the conservation checks) of the simulation will be saved. `dir_paths.regrid` is the directory where the regridding
-temporary files will be saved.
-=#
-
-COUPLER_OUTPUT_DIR = joinpath(output_dir_root, job_id)
-dir_paths = Utilities.setup_output_dirs(output_dir = COUPLER_OUTPUT_DIR, comms_ctx = comms_ctx)
-@info "Coupler output directory $(dir_paths.output)"
-@info "Coupler artifacts directory $(dir_paths.artifacts)"
-
 #=
 ## Data File Paths
 =#
@@ -848,13 +851,12 @@ if ClimaComms.iamroot(comms_ctx)
 
             # define variable names and output directories for each diagnostic
             amip_short_names_atmos = ["ta", "ua", "hus", "clw", "pr", "ts", "toa_fluxes_net"]
-            output_dir_atmos = atmos_sim.integrator.p.output_dir
             amip_short_names_coupler = ["F_turb_energy"]
             output_dir_coupler = dir_paths.output
 
             # Check if all output variables are available in the specified directories
             make_diagnostics_plots(
-                output_dir_atmos,
+                atmos_output_dir,
                 dir_paths.artifacts,
                 short_names = amip_short_names_atmos,
                 output_prefix = "atmos_",
@@ -870,17 +872,16 @@ if ClimaComms.iamroot(comms_ctx)
         # Check this because we only want monthly data for making plots
         if t_end > 84600 * 31 * 3 && output_default_diagnostics
             include("leaderboard/leaderboard.jl")
-            diagnostics_folder_path = atmos_sim.integrator.p.output_dir
             leaderboard_base_path = dir_paths.artifacts
-            compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
-            compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path)
+            compute_leaderboard(leaderboard_base_path, atmos_output_dir)
+            compute_pfull_leaderboard(leaderboard_base_path, atmos_output_dir)
         end
     end
     ## plot extra atmosphere diagnostics if specified
     if plot_diagnostics
         @info "Plotting diagnostics"
         include("user_io/diagnostics_plots.jl")
-        make_diagnostics_plots(atmos_sim.integrator.p.output_dir, dir_paths.artifacts)
+        make_diagnostics_plots(atmos_output_dir, dir_paths.artifacts)
     end
 
     ## plot all model states and coupler fields (useful for debugging)
diff --git a/experiments/ClimaEarth/run_cloudless_aquaplanet.jl b/experiments/ClimaEarth/run_cloudless_aquaplanet.jl
index b70ca5030..7ca6c71e5 100644
--- a/experiments/ClimaEarth/run_cloudless_aquaplanet.jl
+++ b/experiments/ClimaEarth/run_cloudless_aquaplanet.jl
@@ -55,6 +55,13 @@ start_date = "19790301"
 hourly_checkpoint = true
 dt_rad = "6hours"
 
+#=
+### I/O Directory Setup
+=#
+
+dir_paths = Utilities.setup_output_dirs(output_dir = coupler_output_dir, comms_ctx = ClimaComms.context())
+@info(config_dict)
+
 ## namelist
 config_dict = Dict(
     # general
@@ -109,7 +116,8 @@ config_dict = Dict(
 )
 
 ## merge dictionaries of command line arguments, coupler dictionary and component model dictionaries
-atmos_config_dict = get_atmos_config_dict(config_dict, job_id)
+atmos_output_dir = joinpath(dir_paths.output, "clima_atmos")
+atmos_config_dict = get_atmos_config_dict(config_dict, job_id, atmos_output_dir)
 atmos_config_object = CA.AtmosConfig(atmos_config_dict)
 
 #=
@@ -118,13 +126,6 @@ atmos_config_object = CA.AtmosConfig(atmos_config_dict)
 
 comms_ctx = Utilities.get_comms_context(Dict("device" => "auto"))
 
-#=
-### I/O Directory Setup
-=#
-
-dir_paths = Utilities.setup_output_dirs(output_dir = coupler_output_dir, comms_ctx = comms_ctx)
-@info(config_dict)
-
 #=
 ## Component Model Initialization
 =#
diff --git a/experiments/ClimaEarth/run_cloudy_aquaplanet.jl b/experiments/ClimaEarth/run_cloudy_aquaplanet.jl
index f7d59f719..560194850 100644
--- a/experiments/ClimaEarth/run_cloudy_aquaplanet.jl
+++ b/experiments/ClimaEarth/run_cloudy_aquaplanet.jl
@@ -55,6 +55,13 @@ start_date = "19790301"
 hourly_checkpoint = true
 dt_rad = "6hours"
 
+#=
+### I/O Directory Setup
+=#
+
+dir_paths = Utilities.setup_output_dirs(output_dir = coupler_output_dir, comms_ctx = ClimaComms.context())
+@info(config_dict)
+
 ## namelist
 config_dict = Dict(
     # general
@@ -122,7 +129,8 @@ config_dict = Dict(
 )
 
 ## merge dictionaries of command line arguments, coupler dictionary and component model dictionaries
-atmos_config_dict = get_atmos_config_dict(config_dict, job_id)
+atmos_output_dir = joinpath(dir_paths.output, "clima_atmos")
+atmos_config_dict = get_atmos_config_dict(config_dict, job_id, atmos_output_dir)
 atmos_config_object = CA.AtmosConfig(atmos_config_dict)
 
 ## override default toml parameters
@@ -143,13 +151,6 @@ comms_ctx = Utilities.get_comms_context(Dict("device" => "auto"))
 ClimaComms.init(comms_ctx)
 
 
-#=
-### I/O Directory Setup
-=#
-
-dir_paths = Utilities.setup_output_dirs(output_dir = coupler_output_dir, comms_ctx = comms_ctx)
-@info(config_dict)
-
 #=
 ## Component Model Initialization
 =#
diff --git a/experiments/ClimaEarth/run_cloudy_slabplanet.jl b/experiments/ClimaEarth/run_cloudy_slabplanet.jl
index 53c1ffdad..898c0a711 100644
--- a/experiments/ClimaEarth/run_cloudy_slabplanet.jl
+++ b/experiments/ClimaEarth/run_cloudy_slabplanet.jl
@@ -60,6 +60,14 @@ start_date = "19790321"
 hourly_checkpoint = true
 dt_rad = "6hours"
 
+#=
+### I/O Directory Setup
+=#
+
+dir_paths = Utilities.setup_output_dirs(output_dir = coupler_output_dir, comms_ctx = ClimaComms.context())
+@info(config_dict)
+
+
 ## namelist
 config_dict = Dict(
     # general
@@ -126,7 +134,8 @@ config_dict = Dict(
 )
 
 ## merge dictionaries of command line arguments, coupler dictionary and component model dictionaries
-atmos_config_dict = get_atmos_config_dict(config_dict, job_id)
+atmos_output_dir = joinpath(dir_paths.output, "clima_atmos")
+atmos_config_dict = get_atmos_config_dict(config_dict, job_id, atmos_output_dir)
 atmos_config_object = CA.AtmosConfig(atmos_config_dict)
 
 # override default toml parameters
@@ -145,13 +154,6 @@ atmos_config_object.toml_dict["max_area_limiter_scale"]["value"] = 0
 
 comms_ctx = Utilities.get_comms_context(Dict("device" => "auto"))
 
-#=
-### I/O Directory Setup
-=#
-
-dir_paths = Utilities.setup_output_dirs(output_dir = coupler_output_dir, comms_ctx = comms_ctx)
-@info(config_dict)
-
 #=
 ## Data File Paths
 =#
diff --git a/experiments/ClimaEarth/run_dry_held_suarez.jl b/experiments/ClimaEarth/run_dry_held_suarez.jl
index b76b61548..9e09fc2b2 100644
--- a/experiments/ClimaEarth/run_dry_held_suarez.jl
+++ b/experiments/ClimaEarth/run_dry_held_suarez.jl
@@ -55,6 +55,13 @@ tspan = (Float64(0.0), Float64(Utilities.time_to_seconds(t_end)))
 start_date = "19790301"
 hourly_checkpoint = true
 
+#=
+### I/O Directory Setup
+=#
+
+dir_paths = Utilities.setup_output_dirs(output_dir = coupler_output_dir, comms_ctx = ClimaComms.context())
+@info(config_dict)
+
 ## namelist
 config_dict = Dict(
     # general
@@ -101,7 +108,8 @@ config_dict = Dict(
 )
 
 ## merge dictionaries of command line arguments, coupler dictionary and component model dictionaries
-atmos_config_dict = get_atmos_config_dict(config_dict, job_id)
+atmos_output_dir = joinpath(dir_paths.output, "clima_atmos")
+atmos_config_dict = get_atmos_config_dict(config_dict, job_id, atmos_output_dir)
 atmos_config_object = CA.AtmosConfig(atmos_config_dict)
 
 #=
@@ -110,13 +118,6 @@ atmos_config_object = CA.AtmosConfig(atmos_config_dict)
 
 comms_ctx = Utilities.get_comms_context(Dict("device" => "auto"))
 
-#=
-### I/O Directory Setup
-=#
-
-dir_paths = Utilities.setup_output_dirs(output_dir = coupler_output_dir, comms_ctx = comms_ctx)
-@info(config_dict)
-
 #=
 ## Component Model Initialization
 =#
diff --git a/experiments/ClimaEarth/run_moist_held_suarez.jl b/experiments/ClimaEarth/run_moist_held_suarez.jl
index 7fefba21f..0c89aa356 100644
--- a/experiments/ClimaEarth/run_moist_held_suarez.jl
+++ b/experiments/ClimaEarth/run_moist_held_suarez.jl
@@ -57,6 +57,14 @@ tspan = (Float64(0.0), Float64(Utilities.time_to_seconds(t_end)))
 start_date = "19790301"
 hourly_checkpoint = true
 
+#=
+### I/O Directory Setup
+=#
+
+
+dir_paths = Utilities.setup_output_dirs(output_dir = coupler_output_dir, comms_ctx = ClimaComms.context())
+@info(config_dict)
+
 ## namelist
 config_dict = Dict(
     # general
@@ -109,7 +117,8 @@ config_dict = Dict(
 # TODO: may need to switch to Bulk fluxes
 
 ## merge dictionaries of command line arguments, coupler dictionary and component model dictionaries
-atmos_config_dict = get_atmos_config_dict(config_dict, job_id)
+atmos_output_dir = joinpath(dir_paths.output, "clima_atmos")
+atmos_config_dict = get_atmos_config_dict(config_dict, job_id, atmos_output_dir)
 atmos_config_object = CA.AtmosConfig(atmos_config_dict)
 
 #=
@@ -118,13 +127,6 @@ atmos_config_object = CA.AtmosConfig(atmos_config_dict)
 
 comms_ctx = Utilities.get_comms_context(Dict("device" => "auto"))
 
-#=
-### I/O Directory Setup
-=#
-
-dir_paths = Utilities.setup_output_dirs(output_dir = coupler_output_dir, comms_ctx = comms_ctx)
-@info(config_dict)
-
 #=
 ## Component Model Initialization
 =#
diff --git a/src/Utilities.jl b/src/Utilities.jl
index 735171f71..afdd74923 100644
--- a/src/Utilities.jl
+++ b/src/Utilities.jl
@@ -9,6 +9,7 @@ module Utilities
 import ClimaComms
 import ClimaCore as CC
 import Logging
+import ClimaUtilities.OutputPathGenerator: generate_output_path
 
 export swap_space!, get_device, get_comms_context, show_memory_usage, setup_output_dirs, time_to_seconds
 
@@ -99,33 +100,52 @@ Create output directories for the experiment. If `comms_ctx` is provided, only t
 By default, the regrid directory is created as a temporary directory inside the output directory,
 and the artifacts directory is created inside the output directory with the name `artifacts/`.
 
+`ClimaUtilities.OutputPathGenerator` is used so that simulations can be re-run and re-started.
+The output path looks like:
+```
+coupler_output_dir_amip/
+├── checkpoints
+│       └── checkpoints for the various models
+├── artifacts
+│       └── plots produced by the postporcessing step
+├── output_0000/
+│   ├── atmos/
+│   │   └── output of the atmos model
+│   └── ocean/
+│       └── output of the ocean model
+├── output_0001/
+│   └── ... component model outputs in their folders ...
+├── output_0002/
+│   └── ... component model outputs in their folders ...
+└── output_active -> output_0002/
+```
+
 # Arguments
 - `output_dir::String`: The directory where the output files will be stored. Default is the current directory.
 - `regrid_dir::String`: The directory where the regridded files will be stored. Default is `output_dir/regrid_tmp/`.
+- `checkpoint_dir::String`: The directory where the checkpoint files will be stored. Default is `output_dir/checkpoints/`.
 - `artifacts_dir::String`: The directory where the artifacts will be stored. Default is `output_dir/artifacts/`.
 - `comms_ctx::Union{Nothing, ClimaComms.AbstractCommsContext}`: The communicator context. If provided, only the root process will create the directories.
 
 # Returns
 - A tuple with the paths to the output, regrid, and artifacts directories.
 """
-function setup_output_dirs(; output_dir = nothing, artifacts_dir = nothing, comms_ctx)
-    if output_dir === nothing
-        output_dir = "."
-    end
-    if artifacts_dir === nothing
-        artifacts_dir = joinpath(output_dir, "artifacts")
-    end
-
-    @info(output_dir)
+function setup_output_dirs(;
+    output_dir = pwd(),
+    artifacts_dir = joinpath(output_dir, "artifacts"),
+    checkpoints_dir = joinpath(output_dir, "checkpoints"),
+    comms_ctx,
+)
+    output_dir = generate_output_path(output_dir, context = comms_ctx)
     regrid_dir = nothing
     if ClimaComms.iamroot(comms_ctx)
-        mkpath(output_dir)
         mkpath(artifacts_dir)
+        mkpath(checkpoints_dir)
         regrid_dir = mktempdir(output_dir, prefix = "regrid_tmp_")
     end
     regrid_dir = ClimaComms.bcast(comms_ctx, regrid_dir)
 
-    return (; output = output_dir, artifacts = artifacts_dir, regrid = regrid_dir)
+    return (; output = output_dir, artifacts = artifacts_dir, regrid = regrid_dir, checkpoints = checkpoints_dir)
 end
 
 """
@@ -151,4 +171,6 @@ function time_to_seconds(s::String)
     end
     error("Uncaught case in computing time from given string.")
 end
+
+
 end # module