cat-log: change interface for scheduler logs to include the dirname

* Support specifying the job submit number in the ID. * Support specifying the most recent workflow log file via a one letter filename as with job logs.
cylc · Apr 6, 2023 · 8c09b9a · 8c09b9a
1 parent 803bfc1
commit 8c09b9a
Show file tree

Hide file tree

Showing 5 changed files with 130 additions and 47 deletions.
diff --git a/cylc/flow/scripts/cat_log.py b/cylc/flow/scripts/cat_log.py
@@ -45,14 +45,20 @@
   $ cylc cat-log foo
 
   # Print specific workflow log:
-  $ cylc cat-log foo -f 02-start-01.log
+  $ cylc cat-log foo -f scheduler/02-start-01.log
 
   # Print task stdout:
   $ cylc cat-log foo//2020/bar
   $ cylc cat-log -f o foo//2020/bar
 
   # Print task stderr:
   $ cylc cat-log -f e foo//2020/bar
+
+  # Print a custom file in a job's log directory:
+  $ cylc cat-log -f my-log-file foo//2020/bar
+
+  # Follow a log file:
+  $ cylc cat-log foo//2020/bar -m f
 """
 
 import os
@@ -68,7 +74,6 @@
 import cylc.flow.flags
 from cylc.flow.hostuserutil import is_remote_platform
 from cylc.flow.id_cli import parse_id
-from cylc.flow.loggingutil import LOG_FILE_EXTENSION
 from cylc.flow.option_parsers import (
     ID_MULTI_ARG_DOC,
     CylcOptionParser as COP,
@@ -79,8 +84,8 @@
     get_remote_workflow_run_job_dir,
     get_workflow_run_job_dir,
     get_workflow_run_pub_db_path,
-    get_workflow_run_scheduler_log_dir,
-    get_workflow_run_scheduler_log_path)
+    get_workflow_run_dir,
+)
 from cylc.flow.remote import remote_cylc_cmd, watch_and_kill
 from cylc.flow.rundb import CylcWorkflowDAO
 from cylc.flow.task_job_logs import (
@@ -93,6 +98,37 @@
     from optparse import Values
 
 
+WORKFLOW_LOG_OPTS = {
+    'c': ('workflow configuration file (raw)', r'config/*-start-*.cylc'),
+    'p': (
+        'workflow configuration file (processed)',
+        r'config/flow-processed.cylc'
+    ),
+    'i': ('install log', r'install/*-*install.log'),
+    's': ('scheduler log', r'scheduler/*-*start*.log'),
+}
+
+
+# add workflow and job log file options to the CLI help output
+__doc__ += r'''
+
+Log Files:
+  Select the log file to view with the --file option.
+  Either provie the file path or use one of the short options:
+
+  Job Logs:
+''' + '    ' + '\n    '.join(
+    f'{key:4} {value}'
+    for key, value in JOB_LOG_OPTS.items()
+) + '''
+
+  Workflow Logs:
+''' + '    ' + '\n    '.join(
+    f'{key:4} {value[0]}'
+    for key, value in WORKFLOW_LOG_OPTS.items()
+) + '\n\n  Use "--mode=l" to list available log files for a workflow/job.'
+
+
 # Immortal tail-follow processes on job hosts can be cleaned up by killing
 # my subprocesses if my PPID or PPPID changes (due to parent ssh connection
 # dying). This works even if the sshd-invoked
@@ -267,11 +303,16 @@ def get_option_parser() -> COP:
 
     parser.add_option(
         "-f", "--file",
-        help="  Job log: %s; default o(out)." % (
-             ', '.join(['%s(%s)' % (i, j)
-                       for i, j in JOB_LOG_OPTS.items()])) +
-             "  Or <filename> for custom (and standard) job logs.",
-        metavar="LOG", action="store", default=None, dest="filename")
+        help=(
+            'The file to view. Default for job logs "out", default for'
+            ' workflow logs "scheduler/log". See "Log Files" above for'
+            ' possible values.'
+        ),
+        metavar="LOG",
+        action="store",
+        default=None,
+        dest="filename",
+    )
 
     parser.add_option(
         "-m", "--mode",
@@ -284,7 +325,7 @@ def get_option_parser() -> COP:
         "-r", "--rotation",
         help="Workflow log integer rotation number. 0 for current, 1 for "
         "next oldest, etc.",
-        metavar="INT", action="store", dest="rotation_num")
+        metavar="INT", action="store", dest="rotation_num", type=int)
 
     parser.add_option(
         "-o", "--force-remote",
@@ -295,7 +336,7 @@ def get_option_parser() -> COP:
     parser.add_option(
         "-s", "--submit-number", "-t", "--try-number",
         help="Job submit number (default=%s, i.e. latest)." % NN,
-        metavar="INT", action="store", dest="submit_num", default=NN)
+        metavar="INT", action="store", dest="submit_num", default=None)
 
     parser.add_option(
         "--remote-arg",
@@ -371,28 +412,47 @@ def main(
     except KeyError:
         mode = options.mode
 
+    if tokens and tokens.get('cycle') and not tokens.get('task'):
+        print('Please provide a workflow, task or job ID', file=sys.stderr)
+        sys.exit(1)
+
     if not tokens or not tokens.get('task'):
-        logpath = get_workflow_run_scheduler_log_path(workflow_id)
-        if options.rotation_num:
-            log_dir = Path(logpath).parent
-            logs = glob(f'{log_dir}/*{LOG_FILE_EXTENSION}')
-            logs.sort(key=os.path.getmtime, reverse=True)
+        # no task provided - user has requested a workflow log
+        log_dir: str = get_workflow_run_dir(workflow_id, 'log')
+        file_name: str = options.filename or 's'
+        log_file_path: Path
+
+        if mode == 'list-dir':
+            # list workflow logs
+            print('\n'.join(sorted(
+                str(path.relative_to(log_dir))
+                for dirpath in {
+                    # set of log/<x> directories to scan for files in
+                    Path(log_dir, _file_name).parent
+                    for _, _file_name in WORKFLOW_LOG_OPTS.values()
+                }
+                for path in dirpath.iterdir()
+                # strip out file aliases such as scheduler/log
+                if not path.is_symlink()
+            )))
+            return
+
+        if file_name in WORKFLOW_LOG_OPTS:
+            rotation_number = options.rotation_num or 0
+            pattern = WORKFLOW_LOG_OPTS[file_name][1]
+            logs = list(reversed(glob(str(Path(log_dir, pattern)))))
             try:
-                logpath = logs[int(options.rotation_num)]
+                log_file_path = Path(logs[rotation_number])
             except IndexError:
-                raise InputError(
-                    "max rotation %d" % (len(logs) - 1))
-        # Cat workflow logs, local only.
-        if options.filename is not None and not options.rotation_num:
-            logpath = os.path.join(get_workflow_run_scheduler_log_dir(
-                workflow_id), str(options.filename))
+                raise InputError("max rotation %d" % (len(logs) - 1))
+        else:
+            log_file_path = Path(log_dir, file_name)
+
         tail_tmpl = os.path.expandvars(
             get_platform()["tail command template"]
         )
-        out = view_log(logpath, mode, tail_tmpl, color=color)
-        if out == 1:
-            sys.exit(1)
-        return
+        out = view_log(log_file_path, mode, tail_tmpl, color=color)
+        sys.exit(out)
 
     else:
         # Cat task job logs, may be on workflow or job host.
@@ -401,11 +461,13 @@ def main(
                 "only workflow (not job) logs get rotated")
         task = tokens['task']
         point = tokens['cycle']
-        if options.submit_num != NN:
+
+        submit_num = options.submit_num or tokens.get('job') or NN
+        if submit_num != NN:
             try:
-                options.submit_num = "%02d" % int(options.submit_num)
+                submit_num = "%02d" % int(submit_num)
             except ValueError:
-                parser.error("Illegal submit number: %s" % options.submit_num)
+                parser.error("Illegal submit number: %s" % submit_num)
         if options.filename is None:
             options.filename = JOB_LOG_OUT
         else:
@@ -414,7 +476,7 @@ def main(
                 options.filename = JOB_LOG_OPTS[options.filename]
                 # KeyError: Is already long form (standard log, or custom).
         platform_name, job_runner_name, live_job_id = get_task_job_attrs(
-            workflow_id, point, task, options.submit_num)
+            workflow_id, point, task, submit_num)
         platform = get_platform(platform_name)
         batchview_cmd = None
         if live_job_id is not None:
@@ -445,7 +507,7 @@ def main(
                             and live_job_id is None)
         if log_is_remote and (not log_is_retrieved or options.force_remote):
             logpath = os.path.normpath(get_remote_workflow_run_job_dir(
-                workflow_id, point, task, options.submit_num,
+                workflow_id, point, task, submit_num,
                 options.filename))
             tail_tmpl = platform["tail command template"]
             # Reinvoke the cat-log command on the remote account.
@@ -470,7 +532,7 @@ def main(
         else:
             # Local task job or local job log.
             logpath = os.path.normpath(get_workflow_run_job_dir(
-                workflow_id, point, task, options.submit_num,
+                workflow_id, point, task, submit_num,
                 options.filename))
             tail_tmpl = os.path.expandvars(platform["tail command template"])
             out = view_log(logpath, mode, tail_tmpl, batchview_cmd,

diff --git a/tests/functional/cylc-cat-log/00-local.t b/tests/functional/cylc-cat-log/00-local.t
@@ -18,7 +18,7 @@
 # Test "cylc cat-log" on the workflow host.
 . "$(dirname "$0")/test_header"
 #-------------------------------------------------------------------------------
-set_test_number 31
+set_test_number 37
 install_workflow "${TEST_NAME_BASE}" "${TEST_NAME_BASE}"
 #-------------------------------------------------------------------------------
 TEST_NAME="${TEST_NAME_BASE}-validate"
@@ -32,11 +32,33 @@ contains_ok "${TEST_NAME}.stdout" "${WORKFLOW_RUN_DIR}/log/scheduler/log"
 #-------------------------------------------------------------------------------
 TEST_NAME=${TEST_NAME_BASE}-workflow-log-ok
 LOG_DIR="$(dirname "$(cylc cat-log -m p "${WORKFLOW_NAME}")")"
+echo "This is file 02-restart-02.log" > "${LOG_DIR}/02-restart-02.log"
 echo "This is file 03-restart-02.log" > "${LOG_DIR}/03-restart-02.log"
-run_ok "${TEST_NAME}" cylc cat-log -f 03-restart-02.log "${WORKFLOW_NAME}"
+# it should accept file paths relative to the scheduler log directory
+run_ok "${TEST_NAME}" cylc cat-log -f scheduler/03-restart-02.log "${WORKFLOW_NAME}"
 contains_ok "${TEST_NAME}.stdout" - << __END__
 This is file 03-restart-02.log
 __END__
+# it should pick the latest log file if no rotation number is provided
+run_ok "${TEST_NAME}" cylc cat-log -f s "${WORKFLOW_NAME}"
+contains_ok "${TEST_NAME}.stdout" - << __END__
+This is file 03-restart-02.log
+__END__
+# it should apply rotation number to scheduler log files
+run_ok "${TEST_NAME}" cylc cat-log -f s -r 1 "${WORKFLOW_NAME}"
+contains_ok "${TEST_NAME}.stdout" - << __END__
+This is file 02-restart-02.log
+__END__
+# it should list scheduler log files
+run_ok "${TEST_NAME}" cylc cat-log -m l "${WORKFLOW_NAME}"
+cmp_ok "${TEST_NAME}.stdout" - << __END__
+config/01-start-01.cylc
+config/flow-processed.cylc
+install/01-install.log
+scheduler/01-start-01.log
+scheduler/02-restart-02.log
+scheduler/03-restart-02.log
+__END__
 #-------------------------------------------------------------------------------
 TEST_NAME=${TEST_NAME_BASE}-task-out
 run_ok "${TEST_NAME}" cylc cat-log -f o "${WORKFLOW_NAME}//1/a-task"

diff --git a/tests/functional/cylc-cat-log/00-local/flow.cylc b/tests/functional/cylc-cat-log/00-local/flow.cylc
@@ -10,12 +10,12 @@
 [runtime]
     [[a-task]]
         script = """
-# Write to task stdout log
-echo "the quick brown fox"
-# Write to task stderr log
-echo "jumped over the lazy dog" >&2
-# Write to a custom log file
-echo "drugs and money" > ${CYLC_TASK_LOG_ROOT}.custom-log
-# Generate a warning message in the workflow log.
-cylc message -p WARNING 'marmite and squashed bananas'
-"""
+            # Write to task stdout log
+            echo "the quick brown fox"
+            # Write to task stderr log
+            echo "jumped over the lazy dog" >&2
+            # Write to a custom log file
+            echo "drugs and money" > ${CYLC_TASK_LOG_ROOT}.custom-log
+            # Generate a warning message in the workflow log.
+            cylc message -p WARNING 'marmite and squashed bananas'
+        """
diff --git a/tests/functional/cylc-cat-log/06-log-rotation.t b/tests/functional/cylc-cat-log/06-log-rotation.t
@@ -21,7 +21,7 @@ set_test_number 1
 init_workflow "${TEST_NAME_BASE}" '/dev/null'
 
 # Populate its cylc-run dir with empty log files.
-LOG_DIR="$(dirname "$(cylc cat-log -m p "${WORKFLOW_NAME}")")"
+LOG_DIR="$HOME/cylc-run/$WORKFLOW_NAME/log/scheduler"
 mkdir -p "${LOG_DIR}"
 touch -t '201001011200.00' "${LOG_DIR}/01-start-01.log"
 touch -t '201001011200.01' "${LOG_DIR}/02-start-01.log"

diff --git a/tests/functional/job-submission/16-timeout.t b/tests/functional/job-submission/16-timeout.t
@@ -40,8 +40,7 @@ workflow_run_ok "${TEST_NAME_BASE}-workflow-run" \
 cylc cat-log "${WORKFLOW_NAME}" \
     | grep -E -m 1 -A 2 "ERROR - \[jobs-submit cmd\]" \
        | sed -e 's/^.* \(ERROR\)/\1/' > log
-
-WORKFLOW_LOG_DIR=$(cylc cat-log -m p "${WORKFLOW_NAME}")
+WORKFLOW_LOG_DIR=$(cylc cat-log -m p "${WORKFLOW_NAME}" | sed 's/01-start-01\.//')
 JOB_LOG_DIR="${WORKFLOW_LOG_DIR%scheduler/log}"
 JOB_LOG_DIR="${JOB_LOG_DIR/$HOME/\$HOME}"