Add scripts for using git bisect

A config file determines the "good" and "bad" commits, and provides the paths to the MPAS code, the laod script, etc. It also provides the commands for building the code, setting up the test case(s) and running them. A driver script sets things up and makes the calls to `git bisect`, passing along the `bisect/bisect_step.py` script for running each step of the bisection. The `bisect/bisect_step.py` script updates and builds the MPAS code, then sets up the compass test case(s) and runs compass.
MPAS-Dev · Jul 6, 2022 · 2b19d91 · 2b19d91
1 parent 8f0e817
commit 2b19d91
Show file tree

Hide file tree

Showing 4 changed files with 278 additions and 0 deletions.
diff --git a/utils/bisect/README.md b/utils/bisect/README.md
@@ -0,0 +1,36 @@
+Using `git bisect`
+====================
+
+The command `git bisect` is a handy tool for finding the first commit that
+breaks a code in some way.  `git bisect run` can call a script that
+succeeds when a given commit is "good" but fails when it is "bad".  The script
+`bisect_step.py` provided here is one such script.
+
+To further encapsulate the process of using `git bisect`, we provide a driver
+script `bisect.py` that makes use of config options in a file similar to
+`example.cfg`.
+
+Instructions
+------------
+
+1. Copy `example.cfg` to the base of the branch:
+   ```shell
+   cp utils/bisect/example.cfg bisect.cfg
+   ```
+2. Modify the config options with the appropriate "good" and "bad" E3SM commit hash
+or tag.
+
+3. Modify the various paths and commands as needed.
+
+4. In a batch job or interactive session on a compute node, run:
+   ```shell
+   ./utils/bisect/bisect.py -f bisect.cfg
+   ```
+
+Note
+----
+
+Since the code will be compiled on a compute node, any baseline use for
+comparison should also be built on a compute node.  Otherwise, you may get
+non-bit-for-bit results simply because of where the code was compiled.  This
+has been seen with Intel on Anvil.
diff --git a/utils/bisect/bisect.py b/utils/bisect/bisect.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+import argparse
+import configparser
+import os
+import subprocess
+
+
+def bisect(good, bad, e3sm_path, load_script, config_file, first_parent):
+    """
+    The driver function for calling ``git bisect`` to find the first "bad"
+    commit between a known "good" and "bad" E3SM commit.
+
+    The function uses ``git bisect run`` to call
+    ``utils/bisect/bisect_step.py`` repeatedly to test whether a given commit
+    is good or bad.
+
+    Parameters
+    ----------
+    good : str
+        The hash or tag of a "good" E3SM commit that passes the test(s)
+        specified in the config file
+    bad : str
+        The hash or tag of a "bad" E3SM commit that fails the test(s)
+    e3sm_path : str
+        The path to the E3SM branch to test.  If you are pointing to the
+        ``E3SM-Project`` or ``MALI-Dev`` submodules, make sure they have been
+        initialized with ``git submodule update --init``.
+    load_script : str
+        The relative or absolute path to the load script used to activate
+        the compass conda environment and set environment variables used to
+        build the MPAS component to test.
+    config_file : str
+        The relative or absolute path to a config file containing config
+        options similar to ``utils/bisect/example.cfg`` that control the
+        bisection process.
+    first_parent : bool
+        Whether to only follow the first parent for merge commits.  This is
+        typically desirable because there may be broken commits within a branch
+        that are fixed by the time the branch is merged.
+    """
+
+    e3sm_path = os.path.abspath(e3sm_path)
+    load_script = os.path.abspath(load_script)
+    config_file = os.path.abspath(config_file)
+
+    cwd = os.getcwd()
+
+    if first_parent:
+        flags = '--first-parent'
+    else:
+        flags = ''
+
+    commands = f'source {load_script}; ' \
+               f'cd {e3sm_path}; ' \
+               f'git bisect start {flags}; ' \
+               f'git bisect good {good}; ' \
+               f'git bisect bad {bad}; ' \
+               f'git bisect run {cwd}/utils/bisect/bisect_step.py' \
+               f'  -f {config_file}'
+    print('\nRunning:')
+    print_commands = commands.replace('; ', '\n  ')
+    print(f'  {print_commands}\n\n')
+    subprocess.check_call(commands, shell=True)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Use "git bisect" to find the first E3SM commit for which '
+                    'a given test fails')
+    parser.add_argument("-f", "--config_file", dest="config_file",
+                        required=True,
+                        help="Configuration file with bisect options",
+                        metavar="FILE")
+
+    args = parser.parse_args()
+
+    config = configparser.ConfigParser(
+        interpolation=configparser.ExtendedInterpolation())
+    config.read(args.config_file)
+
+    section = config['bisect']
+
+    bisect(good=section['good'], bad=section['bad'],
+           e3sm_path=section['e3sm_path'],
+           load_script=section['load_script'],
+           config_file=args.config_file,
+           first_parent=section.getboolean('first_parent'))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/utils/bisect/bisect_step.py b/utils/bisect/bisect_step.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+
+import argparse
+import configparser
+import os
+import subprocess
+
+
+def run(launch_path, mpas_path, work_base, load_script, make_command,
+        setup_command, run_command):
+    """
+    This function runs a single step in the bisection process.  It is typically
+    called through ``git bisect run`` within the ``utils/bisect/bisect.py`` but
+    could be called on its own for testing purposes.
+
+    Parameters
+    ----------
+    launch_path : str
+        The path from which relative paths in the config file are defined,
+        typically the root of the compass branch where the config file
+        resides and where ``utils/bisect/bisect.py`` was called.
+    mpas_path : str
+        The relative or absolute path to the mpas component to be built.
+    work_base : str
+        The base directory for creating work directories for testing the code.
+        Subdirectories called ``e3sm_hash<hash>`` will be created with each
+        E3SM commit hash that is tested.
+    load_script : str
+        The relative or absolute path to the load script used to activate
+        the compass conda environment and set environment variables used to
+        build the MPAS component to test.
+    make_command : str
+        The command to use to build the MPAS component
+    setup_command : str
+        The command to use to set up the compass test case(s)
+    run_command : str
+        The command (typically just ``compass run``) use to run the compass
+        test case(s)
+    """
+
+    mpas_path = to_abs(mpas_path, launch_path)
+    work_base = to_abs(work_base, launch_path)
+    load_script = to_abs(load_script, launch_path)
+
+    commands = f'cd {mpas_path}; ' \
+               f'source {load_script}; ' \
+               f'git rev-parse --short HEAD'
+    git_hash = subprocess.check_output(
+        commands, shell=True).decode('utf-8').strip('\n')
+    git_hash = git_hash.split('\n')[-1]
+
+    work_path = os.path.join(work_base, f'e3sm_hash_{git_hash}')
+
+    try:
+        os.makedirs(work_path)
+    except FileExistsError:
+        pass
+
+    os.chdir(mpas_path)
+    commands = f'source {load_script}; ' \
+               f'git submodule update --init --recursive; ' \
+               f'make clean >& {work_path}/clean.log; ' \
+               f'{make_command} >& {work_path}/make.log; ' \
+               f'{setup_command} -p {mpas_path} -w {work_path}; ' \
+               f'cd {work_path}; ' \
+               f'{run_command}'
+    print('\nRunning:')
+    print_commands = commands.replace('; ', '\n  ')
+    print(f'  {print_commands}\n\n')
+    subprocess.check_call(commands, shell=True)
+
+
+def to_abs(path, launch_path):
+    """
+    Convert a relative path to an absolute path
+
+    Parameters
+    ----------
+    path : str
+        A relative or absolute path
+    launch_path : str
+        The base path to use to convert relative paths to absolute paths
+
+    Returns
+    -------
+    path : str
+        The original ``path`` as an absolute path
+    """
+    if not os.path.isabs(path):
+        path = os.path.normpath(os.path.join(launch_path, path))
+    return path
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Used internally by "git bisect run" to find the first '
+                    'E3SM commit for which a given test fails')
+    parser.add_argument("-f", "--config_file", dest="config_file",
+                        required=True,
+                        help="Configuration file with bisect options",
+                        metavar="FILE")
+
+    args = parser.parse_args()
+
+    config = configparser.ConfigParser(
+        interpolation=configparser.ExtendedInterpolation())
+    config.read(args.config_file)
+
+    launch_path = os.path.dirname(args.config_file)
+
+    section = config['bisect']
+    run(launch_path=launch_path, mpas_path=section['mpas_path'],
+        work_base=section['work_base'], load_script=section['load_script'],
+        make_command=section['make_command'],
+        setup_command=section['setup_command'],
+        run_command=section['run_command'])
+
+
+if __name__ == '__main__':
+    main()
diff --git a/utils/bisect/example.cfg b/utils/bisect/example.cfg
@@ -0,0 +1,30 @@
+# config options related to using "git bisect" to find the first E3SM commit that
+# causes a set of one or more tests to fail
+[bisect]
+
+# The hash or tag of a good E3SM commit where the tests pass
+good = 44814ae
+# The hash or tag of a bad E3SM commit where the tests fail
+bad = 7b87d1f
+# whether to only follow the first parent for merge commits.  This is typically
+# desirable because there may be broken commits within a branch that are fixed
+# by the time the branch is merged.
+first_parent = True
+# the absolute or relative path to the base of the E3SM branch you want to build
+e3sm_path = E3SM-Project
+# the absolute or relative path to the MPAS model directory you want to build
+mpas_path = E3SM-Project/components/mpas-ocean
+# the absolute or relative path for test results (subdirectories will be
+# created within this path for each git hash)
+work_base = /lcrc/group/e3sm/ac.xylar/compass_1.0/anvil/test_20220408/bisect
+# the make command to run to build the MPAS model
+make_command = make intel-mpi
+# the command to set up one or more test cases or a test suite
+# note: the mpas model and work directories will be appended automatically so
+#       don't include -p or -w flags
+setup_command = compass setup -n 39 -b /lcrc/group/e3sm/ac.xylar/compass_1.0/anvil/test_20220318/pr_baseline
+# the absolute or relative path to the load script use to activate the
+# compass environment
+load_script = load_compass_bisect_anvil_intel_impi.sh
+# the command to run compass within the work directory
+run_command = compass run