Merge pull request #354 from xylar/add_compass_bisect_script

Add scripts for using `git bisect`
MPAS-Dev · Oct 13, 2022 · a64342f · a64342f
2 parents a402621 + e5d48a0
commit a64342f
Show file tree

Hide file tree

Showing 4 changed files with 285 additions and 0 deletions.
diff --git a/utils/bisect/README.md b/utils/bisect/README.md
@@ -0,0 +1,36 @@
+Using `git bisect`
+====================
+
+The command `git bisect` is a handy tool for finding the first commit that
+breaks a code in some way.  `git bisect run` can call a script that
+succeeds when a given commit is "good" but fails when it is "bad".  The script
+`bisect_step.py` provided here is one such script.
+
+To further encapsulate the process of using `git bisect`, we provide a driver
+script `bisect.py` that makes use of config options in a file similar to
+`example.cfg`.
+
+Instructions
+------------
+
+1. Copy `example.cfg` to the base of the branch:
+   ```shell
+   cp utils/bisect/example.cfg bisect.cfg
+   ```
+2. Modify the config options with the appropriate "good" and "bad" E3SM commit hash
+or tag.
+
+3. Modify the various paths and commands as needed.
+
+4. In a batch job or interactive session on a compute node, run:
+   ```shell
+   ./utils/bisect/bisect.py -f bisect.cfg
+   ```
+
+Note
+----
+
+Since the code will be compiled on a compute node, any baseline use for
+comparison should also be built on a compute node.  Otherwise, you may get
+non-bit-for-bit results simply because of where the code was compiled.  This
+has been seen with Intel on Anvil.
diff --git a/utils/bisect/bisect.py b/utils/bisect/bisect.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+import argparse
+import configparser
+import os
+import subprocess
+
+
+def bisect(good, bad, e3sm_path, load_script, config_file, first_parent):
+    """
+    The driver function for calling ``git bisect`` to find the first "bad"
+    commit between a known "good" and "bad" E3SM commit.
+
+    The function uses ``git bisect run`` to call
+    ``utils/bisect/bisect_step.py`` repeatedly to test whether a given commit
+    is good or bad.
+
+    Parameters
+    ----------
+    good : str
+        The hash or tag of a "good" E3SM commit that passes the test(s)
+        specified in the config file
+    bad : str
+        The hash or tag of a "bad" E3SM commit that fails the test(s)
+    e3sm_path : str
+        The path to the E3SM branch to test.  If you are pointing to the
+        ``E3SM-Project`` or ``MALI-Dev`` submodules, make sure they have been
+        initialized with ``git submodule update --init``.
+    load_script : str
+        The relative or absolute path to the load script used to activate
+        the compass conda environment and set environment variables used to
+        build the MPAS component to test.
+    config_file : str
+        The relative or absolute path to a config file containing config
+        options similar to ``utils/bisect/example.cfg`` that control the
+        bisection process.
+    first_parent : bool
+        Whether to only follow the first parent for merge commits.  This is
+        typically desirable because there may be broken commits within a branch
+        that are fixed by the time the branch is merged.
+    """
+
+    e3sm_path = os.path.abspath(e3sm_path)
+    load_script = os.path.abspath(load_script)
+    config_file = os.path.abspath(config_file)
+
+    cwd = os.getcwd()
+
+    if first_parent:
+        flags = '--first-parent'
+    else:
+        flags = ''
+
+    commands = f'cd {e3sm_path}; ' \
+               f'git bisect start {flags}; ' \
+               f'git bisect good {good}; ' \
+               f'git bisect bad {bad}; ' \
+               f'git bisect run {cwd}/utils/bisect/bisect_step.py' \
+               f'  -f {config_file}'
+    print('\n')
+    print(72*'-')
+    print('Biscect Initialization')
+    print(72*'-')
+    print('\nRunning:')
+    print_commands = commands.replace('; ', '\n  ')
+    print(f'  {print_commands}\n\n')
+    subprocess.check_call(commands, shell=True)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Use "git bisect" to find the first E3SM commit for which '
+                    'a given test fails')
+    parser.add_argument("-f", "--config_file", dest="config_file",
+                        required=True,
+                        help="Configuration file with bisect options",
+                        metavar="FILE")
+
+    args = parser.parse_args()
+
+    config = configparser.ConfigParser(
+        interpolation=configparser.ExtendedInterpolation())
+    config.read(args.config_file)
+
+    section = config['bisect']
+
+    bisect(good=section['good'], bad=section['bad'],
+           e3sm_path=section['e3sm_path'],
+           load_script=section['load_script'],
+           config_file=args.config_file,
+           first_parent=section.getboolean('first_parent'))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/utils/bisect/bisect_step.py b/utils/bisect/bisect_step.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+
+import argparse
+import configparser
+import os
+import subprocess
+
+
+def run(launch_path, mpas_path, work_base, load_script, make_command,
+        setup_command, run_command):
+    """
+    This function runs a single step in the bisection process.  It is typically
+    called through ``git bisect run`` within the ``utils/bisect/bisect.py`` but
+    could be called on its own for testing purposes.
+
+    Parameters
+    ----------
+    launch_path : str
+        The path from which relative paths in the config file are defined,
+        typically the root of the compass branch where the config file
+        resides and where ``utils/bisect/bisect.py`` was called.
+    mpas_path : str
+        The relative or absolute path to the mpas component to be built.
+    work_base : str
+        The base directory for creating work directories for testing the code.
+        Subdirectories called ``e3sm_hash<hash>`` will be created with each
+        E3SM commit hash that is tested.
+    load_script : str
+        The relative or absolute path to the load script used to activate
+        the compass conda environment and set environment variables used to
+        build the MPAS component to test.
+    make_command : str
+        The command to use to build the MPAS component
+    setup_command : str
+        The command to use to set up the compass test case(s)
+    run_command : str
+        The command (typically just ``compass run``) use to run the compass
+        test case(s)
+    """
+
+    mpas_path = to_abs(mpas_path, launch_path)
+    work_base = to_abs(work_base, launch_path)
+    load_script = to_abs(load_script, launch_path)
+
+    commands = f'cd {mpas_path}; ' \
+               f'git rev-parse --short HEAD'
+    git_hash = subprocess.check_output(
+        commands, shell=True).decode('utf-8').strip('\n')
+    git_hash = git_hash.split('\n')[-1]
+
+    work_path = os.path.join(work_base, f'e3sm_hash_{git_hash}')
+
+    try:
+        os.makedirs(work_path)
+    except FileExistsError:
+        pass
+
+    os.chdir(mpas_path)
+    commands = f'source {load_script}; ' \
+               f'git submodule update --init --recursive; ' \
+               f'make clean >& {work_path}/clean.log; ' \
+               f'{make_command} >& {work_path}/make.log; ' \
+               f'{setup_command} -p {mpas_path} -w {work_path}; ' \
+               f'cd {work_path}; ' \
+               f'{run_command}'
+    print('\n')
+    print(72*'-')
+    print('Biscect Step')
+    print(72*'-')
+    print('\nRunning:')
+    print_commands = commands.replace('; ', '\n  ')
+    print(f'  {print_commands}\n\n')
+    subprocess.check_call(commands, shell=True)
+
+
+def to_abs(path, launch_path):
+    """
+    Convert a relative path to an absolute path
+
+    Parameters
+    ----------
+    path : str
+        A relative or absolute path
+    launch_path : str
+        The base path to use to convert relative paths to absolute paths
+
+    Returns
+    -------
+    path : str
+        The original ``path`` as an absolute path
+    """
+    if not os.path.isabs(path):
+        path = os.path.normpath(os.path.join(launch_path, path))
+    return path
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Used internally by "git bisect run" to find the first '
+                    'E3SM commit for which a given test fails')
+    parser.add_argument("-f", "--config_file", dest="config_file",
+                        required=True,
+                        help="Configuration file with bisect options",
+                        metavar="FILE")
+
+    args = parser.parse_args()
+
+    config = configparser.ConfigParser(
+        interpolation=configparser.ExtendedInterpolation())
+    config.read(args.config_file)
+
+    launch_path = os.path.dirname(args.config_file)
+
+    section = config['bisect']
+    run(launch_path=launch_path, mpas_path=section['mpas_path'],
+        work_base=section['work_base'], load_script=section['load_script'],
+        make_command=section['make_command'],
+        setup_command=section['setup_command'],
+        run_command=section['run_command'])
+    print ('\n')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/utils/bisect/example.cfg b/utils/bisect/example.cfg
@@ -0,0 +1,30 @@
+# config options related to using "git bisect" to find the first E3SM commit that
+# causes a set of one or more tests to fail
+[bisect]
+
+# The hash or tag of a good E3SM commit where the tests pass
+good = 44814ae
+# The hash or tag of a bad E3SM commit where the tests fail
+bad = 7b87d1f
+# whether to only follow the first parent for merge commits.  This is typically
+# desirable because there may be broken commits within a branch that are fixed
+# by the time the branch is merged.
+first_parent = True
+# the absolute or relative path to the base of the E3SM branch you want to build
+e3sm_path = E3SM-Project
+# the absolute or relative path to the MPAS model directory you want to build
+mpas_path = E3SM-Project/components/mpas-ocean
+# the absolute or relative path for test results (subdirectories will be
+# created within this path for each git hash)
+work_base = /lcrc/group/e3sm/ac.xylar/compass_1.0/anvil/test_20220408/bisect
+# the make command to run to build the MPAS model
+make_command = make intel-mpi
+# the command to set up one or more test cases or a test suite
+# note: the mpas model and work directories will be appended automatically so
+#       don't include -p or -w flags
+setup_command = compass setup --copy_executable -n 39 -b /lcrc/group/e3sm/ac.xylar/compass_1.0/anvil/test_20220318/pr_baseline
+# the absolute or relative path to the load script use to activate the
+# compass environment
+load_script = load_compass_bisect_anvil_intel_impi.sh
+# the command to run compass within the work directory
+run_command = compass run