Skip to content

Commit

Permalink
Merge pull request #4452 from ESMCI/fix_hist_utils
Browse files Browse the repository at this point in the history
Fixes hist utils

Fixes regex pattern used to match history files. Only considers .nc
files when copying history files for a tests COMPARE_PHASE. Adds
a new optional attribute exclude_testing for comp_archive_spec
which allows a components history files to be archived but excluded
from the COMPARE_PHASE of tests when active.

Test suite: pytest CIME/tests/test*
Test baseline: n/a
Test namelist changes: n/a
Test status: n/a

Fixes #4438 #4387

User interface changes?: N
Update gh-pages html (Y/N)?: N
  • Loading branch information
jgfouca authored Jul 10, 2023
2 parents bc19d71 + 75c7fbe commit 9cbc1b8
Show file tree
Hide file tree
Showing 6 changed files with 229 additions and 20 deletions.
3 changes: 2 additions & 1 deletion CIME/SystemTests/system_tests_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,8 @@ def _coupler_log_indicates_run_complete(self):
return allgood == 0

def _component_compare_copy(self, suffix):
comments, num_copied = copy_histfiles(self._case, suffix)
# Only match .nc files
comments, num_copied = copy_histfiles(self._case, suffix, match_suffix="nc")
self._expected_num_cmp = num_copied

append_testlog(comments, self._orig_caseroot)
Expand Down
36 changes: 34 additions & 2 deletions CIME/XML/archive_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,39 @@
"""
from CIME.XML.standard_module_setup import *
from CIME.XML.generic_xml import GenericXML
from CIME.utils import convert_to_type

logger = logging.getLogger(__name__)


class ArchiveBase(GenericXML):
def exclude_testing(self, compname):
"""
Checks if component should be excluded from testing.
"""
value = self._get_attribute(compname, "exclude_testing")

if value is None:
return False

return convert_to_type(value, "logical")

def _get_attribute(self, compname, attr_name):
attrib = self.get_entry_attributes(compname)

if attrib is None:
return None

return attrib.get(attr_name, None)

def get_entry_attributes(self, compname):
entry = self.get_entry(compname)

if entry is None:
return None

return self.attrib(entry)

def get_entry(self, compname):
"""
Returns an xml node corresponding to compname in comp_archive_spec
Expand Down Expand Up @@ -115,9 +143,13 @@ def get_all_hist_files(self, casename, model, from_dir, suffix="", ref_case=None
for ext in extensions:
if ext.endswith("$") and has_suffix:
ext = ext[:-1]
string = model + r"\d?_?(\d{4})?\." + ext + r".*?"
string = model + r"\d?_?(\d{4})?\." + ext
if has_suffix:
string += r"\." + suffix + "$"
if not suffix in string:
string += r".*\." + suffix + "$"

if not string.endswith("$"):
string += "$"

logger.debug("Regex is {}".format(string))
pfile = re.compile(string)
Expand Down
3 changes: 2 additions & 1 deletion CIME/data/config/xml_schemas/config_archive.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<xs:attribute name="compclass" type="xs:string"/>
<xs:attribute name="version" type="xs:string"/>
<xs:attribute name="disposition" type="xs:string"/>

<xs:attribute name="exclude_testing" type="xs:boolean"/>

<!-- definition of simple elements -->
<xs:element name="rest_file_extension" type="xs:string"/>
Expand Down Expand Up @@ -50,6 +50,7 @@
</xs:sequence>
<xs:attribute ref="compname" use="required"/>
<xs:attribute ref="compclass" use="required"/>
<xs:attribute ref="exclude_testing"/>
</xs:complexType>
</xs:element>

Expand Down
6 changes: 4 additions & 2 deletions CIME/hist_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def _iter_model_file_substrs(case):
yield model


def copy_histfiles(case, suffix):
def copy_histfiles(case, suffix, match_suffix=None):
"""Copy the most recent batch of hist files in a case, adding the given suffix.
This can allow you to temporarily "save" these files so they won't be blown
Expand All @@ -71,9 +71,11 @@ def copy_histfiles(case, suffix):
comments = "Copying hist files to suffix '{}'\n".format(suffix)
num_copied = 0
for model in _iter_model_file_substrs(case):
if case.get_value("TEST") and archive.exclude_testing(model):
continue
comments += " Copying hist files for model '{}'\n".format(model)
test_hists = archive.get_latest_hist_files(
casename, model, rundir, ref_case=ref_case
casename, model, rundir, suffix=match_suffix, ref_case=ref_case
)
num_copied += len(test_hists)
for test_hist in test_hists:
Expand Down
66 changes: 66 additions & 0 deletions CIME/tests/test_unit_hist_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import io
import unittest
from unittest import mock

from CIME.hist_utils import copy_histfiles
from CIME.XML.archive import Archive


class TestHistUtils(unittest.TestCase):
@mock.patch("CIME.hist_utils.safe_copy")
def test_copy_histfiles_exclude(self, safe_copy):
case = mock.MagicMock()

case.get_env.return_value.get_latest_hist_files.side_effect = [
["/tmp/testing.cpl.hi.nc"],
["/tmp/testing.atm.hi.nc"],
]

case.get_env.return_value.exclude_testing.side_effect = [True, False]

case.get_value.side_effect = [
"/tmp", # RUNDIR
None, # RUN_REFCASE
"testing", # CASE
True, # TEST
True, # TEST
]

case.get_compset_components.return_value = ["atm"]

test_files = [
"testing.cpl.hi.nc",
]

with mock.patch("os.listdir", return_value=test_files):
comments, num_copied = copy_histfiles(case, "base")

assert num_copied == 1

@mock.patch("CIME.hist_utils.safe_copy")
def test_copy_histfiles(self, safe_copy):
case = mock.MagicMock()

case.get_env.return_value.get_latest_hist_files.return_value = [
"/tmp/testing.cpl.hi.nc",
]

case.get_env.return_value.exclude_testing.return_value = False

case.get_value.side_effect = [
"/tmp", # RUNDIR
None, # RUN_REFCASE
"testing", # CASE
True, # TEST
]

case.get_compset_components.return_value = []

test_files = [
"testing.cpl.hi.nc",
]

with mock.patch("os.listdir", return_value=test_files):
comments, num_copied = copy_histfiles(case, "base")

assert num_copied == 1
135 changes: 121 additions & 14 deletions CIME/tests/test_unit_xml_archive_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,30 @@

from CIME.XML.archive_base import ArchiveBase

TEST_CONFIG_ARCHIVE_XML = """<components version="2.0">
TEST_CONFIG = """<components version="2.0">
<comp_archive_spec compname="eam" compclass="atm">
<hist_file_extension>unique\.name\.unique</hist_file_extension>
</comp_archive_spec>
</components>"""

EXACT_TEST_CONFIG = """<components version="2.0">
<comp_archive_spec compname="eam" compclass="atm">
<hist_file_extension>unique\.name\.unique.nc</hist_file_extension>
</comp_archive_spec>
</components>"""

EXCLUDE_TEST_CONFIG = """<components version="2.0">
<comp_archive_spec compname="eam" compclass="atm">
<hist_file_extension>unique\.name\.unique.nc</hist_file_extension>
</comp_archive_spec>
<comp_archive_spec compname="cpl" compclass="drv" exclude_testing="True">
<hist_file_extension>unique\.name\.unique.nc</hist_file_extension>
</comp_archive_spec>
<comp_archive_spec compname="mpasso" compclass="drv" exclude_testing="False">
<hist_file_extension>unique\.name\.unique.nc</hist_file_extension>
</comp_archive_spec>
</components>"""


class TestXMLArchiveBase(unittest.TestCase):
@contextmanager
Expand All @@ -26,52 +44,141 @@ def _setup_environment(self, test_files):

yield temp_dir

def test_get_all_hist_files(self):
def test_exclude_testing(self):
archiver = ArchiveBase()

archiver.read_fd(io.StringIO(EXCLUDE_TEST_CONFIG))

# no attribute
assert not archiver.exclude_testing("eam")

# not in config
assert not archiver.exclude_testing("mpassi")

# set false
assert not archiver.exclude_testing("mpasso")

# set true
assert archiver.exclude_testing("cpl")

def test_match_files(self):
archiver = ArchiveBase()

archiver.read_fd(io.StringIO(TEST_CONFIG_ARCHIVE_XML))
archiver.read_fd(io.StringIO(TEST_CONFIG))

fail_files = [
"othername.eam.unique.name.unique.0001-01-01-0000.nc", # casename mismatch
"casename.satm.unique.name.unique.0001-01-01-0000.nc", # model (component?) mismatch
"casename.eam.0001-01-01-0000.nc", # missing hist_file_extension
"casename.eam.unique.name.unique.0001-01-01-0000.nc",
"casename.eam.unique.name.unique.some.extra.0001-01-01-0000.nc",
]

test_files = [
"casename.eam.0001-01-01-0000.nc",
"casename.eam1.unique.name.unique.0001-01-01-0000.nc",
"casename.eam1_.unique.name.unique.0001-01-01-0000.nc",
"casename.eam_.unique.name.unique.0001-01-01-0000.nc",
"casename.eam1990.unique.name.unique.0001-01-01-0000.nc",
"casename.eam_1990.unique.name.unique.0001-01-01-0000.nc",
"casename.eam1_1990.unique.name.unique.0001-01-01-0000.nc",
"casename.eam11990.unique.name.unique.0001-01-01-0000.nc",
"casename.eam.unique.name.unique.0001-01-01-0000.nc",
"casename.eam.unique.name.unique.some.extra.0001-01-01-0000.nc",
"casename.eam.unique.name.unique.0001-01-01-0000.nc.base",
"casename.eam.unique.name.unique.some.extra.0001-01-01-0000.nc.base",
]

with self._setup_environment(test_files) as temp_dir:
with self._setup_environment(fail_files + test_files) as temp_dir:
hist_files = archiver.get_all_hist_files(
"casename", "eam", from_dir=temp_dir
)

test_files.sort()
hist_files.sort()

assert len(hist_files) == 4
assert len(hist_files) == len(test_files)

# assert all match except first
for x, y in zip(test_files[1:], hist_files):
for x, y in zip(test_files, hist_files):
assert x == y, f"{x} != {y}"

def test_get_all_hist_files_with_suffix(self):
def test_extension_included(self):
archiver = ArchiveBase()

archiver.read_fd(io.StringIO(TEST_CONFIG_ARCHIVE_XML))
archiver.read_fd(io.StringIO(EXACT_TEST_CONFIG))

fail_files = [
"othername.eam.unique.name.unique.0001-01-01-0000.nc", # casename mismatch
"casename.satm.unique.name.unique.0001-01-01-0000.nc", # model (component?) mismatch
"casename.eam.0001-01-01-0000.nc", # missing hist_file_extension
"casename.eam.unique.name.unique.0001-01-01-0000.nc",
"casename.eam.unique.name.unique.some.extra.0001-01-01-0000.nc",
"casename.eam.unique.name.unique.0001-01-01-0000.nc.base",
"casename.eam.unique.name.unique.some.extra.0001-01-01-0000.nc.base",
]

test_files = [
"casename.eam.0001-01-01-0000.nc",
"casename.eam1.unique.name.unique.nc",
"casename.eam1_.unique.name.unique.nc",
"casename.eam_.unique.name.unique.nc",
"casename.eam1990.unique.name.unique.nc",
"casename.eam_1990.unique.name.unique.nc",
"casename.eam1_1990.unique.name.unique.nc",
"casename.eam11990.unique.name.unique.nc",
"casename.eam.unique.name.unique.nc",
]

with self._setup_environment(fail_files + test_files) as temp_dir:
hist_files = archiver.get_all_hist_files(
"casename", "eam", suffix="nc", from_dir=temp_dir
)

test_files.sort()
hist_files.sort()

assert len(hist_files) == len(test_files)

# assert all match except first
for x, y in zip(test_files, hist_files):
assert x == y, f"{x} != {y}"

def test_suffix(self):
archiver = ArchiveBase()

archiver.read_fd(io.StringIO(TEST_CONFIG))

fail_files = [
"othername.eam.unique.name.unique.0001-01-01-0000.nc", # casename mismatch
"casename.satm.unique.name.unique.0001-01-01-0000.nc", # model (component?) mismatch
"casename.eam.0001-01-01-0000.nc", # missing hist_file_extension
"casename.eam.unique.name.unique.0001-01-01-0000.nc",
"casename.eam.unique.name.unique.some.extra.0001-01-01-0000.nc",
# ensure these do not match when suffix is provided
"casename.eam1.unique.name.unique.0001-01-01-0000.nc",
"casename.eam1_.unique.name.unique.0001-01-01-0000.nc",
"casename.eam_.unique.name.unique.0001-01-01-0000.nc",
"casename.eam1990.unique.name.unique.0001-01-01-0000.nc",
"casename.eam_1990.unique.name.unique.0001-01-01-0000.nc",
"casename.eam1_1990.unique.name.unique.0001-01-01-0000.nc",
"casename.eam11990.unique.name.unique.0001-01-01-0000.nc",
"casename.eam.unique.name.unique.0001-01-01-0000.nc",
"casename.eam.unique.name.unique.some.extra.0001-01-01-0000.nc",
]

test_files = [
"casename.eam.unique.name.unique.0001-01-01-0000.nc.base",
"casename.eam.unique.name.unique.some.extra.0001-01-01-0000.nc.base",
]

with self._setup_environment(test_files) as temp_dir:
with self._setup_environment(fail_files + test_files) as temp_dir:
hist_files = archiver.get_all_hist_files(
"casename", "eam", suffix="base", from_dir=temp_dir
)

assert len(hist_files) == 2
assert len(hist_files) == len(test_files)

hist_files.sort()
test_files.sort()

assert test_files[3] in hist_files
assert test_files[4] in hist_files
for x, y in zip(hist_files, test_files):
assert x == y, f"{x} != {y}"

0 comments on commit 9cbc1b8

Please sign in to comment.