Skip to content

Commit

Permalink
Merge branch 'azamat/tests/check-tput' into master (PR #1770)
Browse files Browse the repository at this point in the history
Add mach-specific throughput checking to test machines

Throughput comparison was previously done at 25% tolerance/deviation
from baseline. This PR adds machine-specific throughput tolerance of
10% for all test machines. This should enable detection of performance
regression of greater than 10% on machines and tests that have
baselines (e.g. melvin, skybridge, chama).

[BFB]

* origin/azamat/tests/check-tput:
  Add mach-specific throughput checking to test machines
  • Loading branch information
jgfouca committed Sep 12, 2017
2 parents 5777176 + c711000 commit 1d38402
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 2 deletions.
11 changes: 11 additions & 0 deletions config/acme/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
<BASELINE_ROOT>/project/projectdirs/acme/baselines</BASELINE_ROOT>
<CCSM_CPRNC>/project/projectdirs/acme/tools/cprnc.edison/cprnc</CCSM_CPRNC>
<SAVE_TIMING_DIR>/project/projectdirs/$PROJECT</SAVE_TIMING_DIR>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>
<mpirun mpilib="default">
<executable>srun</executable>
<arguments>
Expand Down Expand Up @@ -208,6 +209,7 @@
<PROJECT>acme</PROJECT>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<PIO_CONFIG_OPTS> -D PIO_BUILD_TIMING:BOOL=ON </PIO_CONFIG_OPTS>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>
<mpirun mpilib="default">
<executable>srun</executable>
<arguments>
Expand Down Expand Up @@ -348,6 +350,7 @@
<PROJECT>acme</PROJECT>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<PIO_CONFIG_OPTS> -D PIO_BUILD_TIMING:BOOL=ON </PIO_CONFIG_OPTS>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>
<mpirun mpilib="default">
<executable>srun</executable>
<arguments>
Expand Down Expand Up @@ -558,6 +561,7 @@
<MAX_TASKS_PER_NODE>64</MAX_TASKS_PER_NODE>
<PES_PER_NODE>64</PES_PER_NODE>
<BATCH_SYSTEM>none</BATCH_SYSTEM>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>
<mpirun mpilib="default">
<executable>mpirun</executable>
<arguments>
Expand Down Expand Up @@ -742,6 +746,7 @@
<PIO_BUFFER_SIZE_LIMIT>1</PIO_BUFFER_SIZE_LIMIT>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<PROJECT>fy150001</PROJECT>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>

<mpirun mpilib="default">
<executable>mpiexec</executable>
Expand Down Expand Up @@ -810,6 +815,7 @@
<PIO_BUFFER_SIZE_LIMIT>1</PIO_BUFFER_SIZE_LIMIT>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<PROJECT>fy150001</PROJECT>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>

<mpirun mpilib="default">
<executable>mpiexec</executable>
Expand Down Expand Up @@ -943,6 +949,7 @@
<PES_PER_NODE>16</PES_PER_NODE>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<PROJECT>ACME</PROJECT>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>
<mpirun mpilib="mvapich">
<executable>mpiexec</executable>
<arguments>
Expand Down Expand Up @@ -1150,6 +1157,7 @@
<PROJECT>acme</PROJECT>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<PIO_CONFIG_OPTS> -D PIO_BUILD_TIMING:BOOL=ON </PIO_CONFIG_OPTS>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>
<mpirun mpilib="default">
<executable>mpirun</executable>
<arguments>
Expand Down Expand Up @@ -1227,6 +1235,7 @@
<MAX_TASKS_PER_NODE>64</MAX_TASKS_PER_NODE>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<PROJECT>ClimateEnergy_2</PROJECT>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>
<PIO_CONFIG_OPTS> -D PIO_BUILD_TIMING:BOOL=ON </PIO_CONFIG_OPTS>
<mpirun mpilib="default">
<executable>/usr/bin/runjob</executable>
Expand Down Expand Up @@ -1444,6 +1453,7 @@
<MAX_TASKS_PER_NODE>64</MAX_TASKS_PER_NODE>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<PROJECT>ClimateEnergy_2</PROJECT>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>
<PIO_CONFIG_OPTS> -D PIO_BUILD_TIMING:BOOL=ON </PIO_CONFIG_OPTS>
<mpirun mpilib="default">
<executable>/usr/bin/runjob</executable>
Expand Down Expand Up @@ -1990,6 +2000,7 @@
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<PROJECT>cli115</PROJECT>
<PIO_CONFIG_OPTS> -D PIO_BUILD_TIMING:BOOL=ON </PIO_CONFIG_OPTS>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>
<mpirun mpilib="default">
<executable args="default">aprun</executable>
<arguments>
Expand Down
8 changes: 6 additions & 2 deletions scripts/lib/CIME/SystemTests/system_tests_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,10 +387,14 @@ def _compare_baseline(self):
#comparing ypd so bigger is better
if baseline is not None and current is not None:
diff = (baseline - current)/baseline
if(diff < 0.25):
tolerance = self._case.get_value("TEST_TPUT_TOLERANCE")
if tolerance is None:
tolerance = 0.25
expect(tolerance > 0.0, "Bad value for throughput tolerance in test")
if diff < tolerance:
self._test_status.set_status(THROUGHPUT_PHASE, TEST_PASS_STATUS)
else:
comment = "Error: Computation time increase > 25% from baseline"
comment = "Error: Computation time increase > %f pct from baseline" % tolerance*100
self._test_status.set_status(THROUGHPUT_PHASE, TEST_FAIL_STATUS, comments=comment)
append_testlog(comment)

Expand Down
1 change: 1 addition & 0 deletions scripts/lib/CIME/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ def _xml_phase(self, test):
envtest.set_value("GENERATE_BASELINE", self._baseline_gen_name is not None)
envtest.set_value("COMPARE_BASELINE", self._baseline_cmp_name is not None)
envtest.set_value("CCSM_CPRNC", self._machobj.get_value("CCSM_CPRNC", resolved=False))
envtest.set_value("TEST_TPUT_TOLERANCE", self._machobj.get_value("TEST_TPUT_TOLERANCE", resolved=False))

# Add the test instructions from config_test to env_test in the case
config_test = Tests()
Expand Down
8 changes: 8 additions & 0 deletions src/drivers/mct/cime_config/config_component.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2367,6 +2367,14 @@
<desc>Expected relative memory usage growth for test</desc>
</entry>

<entry id="TEST_TPUT_TOLERANCE">
<type>real</type>
<default_value>0.25</default_value>
<group>test</group>
<file>env_test.xml</file>
<desc>Expected throughput deviation</desc>
</entry>

<entry id="GENERATE_BASELINE">
<type>logical</type>
<valid_values>TRUE,FALSE</valid_values>
Expand Down

0 comments on commit 1d38402

Please sign in to comment.