From 8cc4fc3351cfc607e2356eb3bc5ebf2b460467ec Mon Sep 17 00:00:00 2001 From: Mel Hall <37735232+datamel@users.noreply.github.com> Date: Tue, 14 Mar 2023 09:42:35 +0000 Subject: [PATCH 1/2] cat-log: support other workflow files * Update cat log to request specific workflow log file with -f option --- CHANGES.md | 6 ++++++ cylc/flow/scripts/cat_log.py | 12 ++++++++---- tests/functional/cylc-cat-log/00-local.t | 10 ++++++---- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 014dc887841..4ca2902a059 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,8 +10,14 @@ creating a new release entry be sure to copy & paste the span tag with the updated. Only the first match gets replaced, so it's fine to leave the old ones in. --> ------------------------------------------------------------------------------- + ## __cylc-8.1.3 (Upcoming)__ +### Enhancements + +[#5414](https://github.com/cylc/cylc-flow/pull/5414) - +Enable cat-log to view workflow logs with -f option. + ### Fixes [5398](https://github.com/cylc/cylc-flow/pull/5398) - Fix platform from diff --git a/cylc/flow/scripts/cat_log.py b/cylc/flow/scripts/cat_log.py index eb9747dc89d..b10217c2123 100755 --- a/cylc/flow/scripts/cat_log.py +++ b/cylc/flow/scripts/cat_log.py @@ -44,6 +44,9 @@ # Print workflow log: $ cylc cat-log foo + # Print specific workflow log: + $ cylc cat-log foo -f 02-start-01.log + # Print task stdout: $ cylc cat-log foo//2020/bar $ cylc cat-log -f o foo//2020/bar @@ -76,6 +79,7 @@ get_remote_workflow_run_job_dir, get_workflow_run_job_dir, get_workflow_run_pub_db_path, + get_workflow_run_scheduler_log_dir, get_workflow_run_scheduler_log_path) from cylc.flow.remote import remote_cylc_cmd, watch_and_kill from cylc.flow.rundb import CylcWorkflowDAO @@ -368,10 +372,6 @@ def main( mode = options.mode if not tokens or not tokens.get('task'): - # Cat workflow logs, local only. - if options.filename is not None: - raise InputError("The '-f' option is for job logs only.") - logpath = get_workflow_run_scheduler_log_path(workflow_id) if options.rotation_num: log_dir = Path(logpath).parent @@ -382,6 +382,10 @@ def main( except IndexError: raise InputError( "max rotation %d" % (len(logs) - 1)) + # Cat workflow logs, local only. + if options.filename is not None and not options.rotation_num: + logpath = os.path.join(get_workflow_run_scheduler_log_dir( + workflow_id), str(options.filename)) tail_tmpl = os.path.expandvars( get_platform()["tail command template"] ) diff --git a/tests/functional/cylc-cat-log/00-local.t b/tests/functional/cylc-cat-log/00-local.t index c427c13c1b5..44134052e6f 100755 --- a/tests/functional/cylc-cat-log/00-local.t +++ b/tests/functional/cylc-cat-log/00-local.t @@ -30,10 +30,12 @@ TEST_NAME=${TEST_NAME_BASE}-workflow-log-log run_ok "${TEST_NAME}" cylc cat-log "${WORKFLOW_NAME}" contains_ok "${TEST_NAME}.stdout" "${WORKFLOW_RUN_DIR}/log/scheduler/log" #------------------------------------------------------------------------------- -TEST_NAME=${TEST_NAME_BASE}-workflow-log-fail -run_fail "${TEST_NAME}" cylc cat-log -f e "${WORKFLOW_NAME}" -contains_ok "${TEST_NAME}.stderr" - << __END__ -InputError: The '-f' option is for job logs only. +TEST_NAME=${TEST_NAME_BASE}-workflow-log-ok +LOG_DIR="$(dirname "$(cylc cat-log -m p "${WORKFLOW_NAME}")")" +echo "This is file 03-restart-02.log" > "${LOG_DIR}/03-restart-02.log" +run_ok "${TEST_NAME}" cylc cat-log -f 03-restart-02.log "${WORKFLOW_NAME}" +contains_ok "${TEST_NAME}.stdout" - << __END__ +This is file 03-restart-02.log __END__ #------------------------------------------------------------------------------- TEST_NAME=${TEST_NAME_BASE}-task-out From e01ce1b75373e3d9c2f78c46295f03f7bc05f9f5 Mon Sep 17 00:00:00 2001 From: Oliver Sanders Date: Thu, 30 Mar 2023 14:48:36 +0100 Subject: [PATCH 2/2] cat-log: change interface for scheduler logs to include the dirname * Support specifying the job submit number in the ID. * Support specifying the most recent workflow log file via a one letter filename as with job logs. * Add --prepend-path option which print the host:filepath before cat or tail operations. This is used by the UI to display the host:path where the log file is located. --- CHANGES.md | 5 +- cylc/flow/scripts/cat_log.py | 193 ++++++++++++++---- tests/functional/cylc-cat-log/00-local.t | 32 ++- .../cylc-cat-log/00-local/flow.cylc | 18 +- .../functional/cylc-cat-log/06-log-rotation.t | 2 +- tests/functional/cylc-clean/06-nfs.t | 2 +- tests/functional/job-submission/16-timeout.t | 3 +- 7 files changed, 194 insertions(+), 61 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 4ca2902a059..a4aa28b9954 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,8 +15,9 @@ ones in. --> ### Enhancements -[#5414](https://github.com/cylc/cylc-flow/pull/5414) - -Enable cat-log to view workflow logs with -f option. +[#5453](https://github.com/cylc/cylc-flow/pull/5453) - `cylc cat-log` can now +list and view workflow log files including install logs and workflow +configuration files. ### Fixes diff --git a/cylc/flow/scripts/cat_log.py b/cylc/flow/scripts/cat_log.py index b10217c2123..5b2223bacb0 100755 --- a/cylc/flow/scripts/cat_log.py +++ b/cylc/flow/scripts/cat_log.py @@ -45,7 +45,7 @@ $ cylc cat-log foo # Print specific workflow log: - $ cylc cat-log foo -f 02-start-01.log + $ cylc cat-log foo -f scheduler/02-start-01.log # Print task stdout: $ cylc cat-log foo//2020/bar @@ -53,6 +53,12 @@ # Print task stderr: $ cylc cat-log -f e foo//2020/bar + + # Print a custom file in a job's log directory: + $ cylc cat-log -f my-log-file foo//2020/bar + + # Follow a log file: + $ cylc cat-log foo//2020/bar -m f """ import os @@ -68,7 +74,6 @@ import cylc.flow.flags from cylc.flow.hostuserutil import is_remote_platform from cylc.flow.id_cli import parse_id -from cylc.flow.loggingutil import LOG_FILE_EXTENSION from cylc.flow.option_parsers import ( ID_MULTI_ARG_DOC, CylcOptionParser as COP, @@ -79,8 +84,8 @@ get_remote_workflow_run_job_dir, get_workflow_run_job_dir, get_workflow_run_pub_db_path, - get_workflow_run_scheduler_log_dir, - get_workflow_run_scheduler_log_path) + get_workflow_run_dir, +) from cylc.flow.remote import remote_cylc_cmd, watch_and_kill from cylc.flow.rundb import CylcWorkflowDAO from cylc.flow.task_job_logs import ( @@ -93,6 +98,37 @@ from optparse import Values +WORKFLOW_LOG_OPTS = { + 'c': ('workflow configuration file (raw)', r'config/*-start-*.cylc'), + 'p': ( + 'workflow configuration file (processed)', + r'config/flow-processed.cylc' + ), + 'i': ('install log', r'install/*-*install.log'), + 's': ('scheduler log', r'scheduler/*-*start*.log'), +} + + +# add workflow and job log file options to the CLI help output +__doc__ += r''' + +Log Files: + Select the log file to view with the --file option. + Either provide the file path or use one of the short options: + + Job Logs: +''' + ' ' + '\n '.join( + f'{key:4} {value}' + for key, value in JOB_LOG_OPTS.items() +) + ''' + + Workflow Logs: +''' + ' ' + '\n '.join( + f'{key:4} {value[0]}' + for key, value in WORKFLOW_LOG_OPTS.items() +) + '\n\n Use "--mode=l" to list available log files for a workflow/job.' + + # Immortal tail-follow processes on job hosts can be cleaned up by killing # my subprocesses if my PPID or PPPID changes (due to parent ssh connection # dying). This works even if the sshd-invoked @@ -202,8 +238,15 @@ def _check_fs_path(path): ) -def view_log(logpath, mode, tailer_tmpl, batchview_cmd=None, remote=False, - color=False): +def view_log( + logpath, + mode, + tailer_tmpl, + batchview_cmd=None, + remote=False, + color=False, + prepend_path=False, +): """View (by mode) local log file. This is only called on the file host. batchview_cmd is a job-runner-specific job stdout or stderr cat or tail @@ -218,19 +261,22 @@ def view_log(logpath, mode, tailer_tmpl, batchview_cmd=None, remote=False, # Print location even if the workflow does not exist yet. print(logpath) return 0 - elif not os.path.exists(logpath) and batchview_cmd is None: + if not os.path.exists(logpath) and batchview_cmd is None: # Note: batchview_cmd may not need to have access to logpath, so don't # test for existence of path if it is set. sys.stderr.write('file not found: %s\n' % logpath) return 1 - elif mode == 'print-dir': + if mode == 'print-dir': print(os.path.dirname(logpath)) return 0 - elif mode == 'list-dir': + if mode == 'list-dir': for entry in sorted(os.listdir(os.path.dirname(logpath))): print(entry) return 0 - elif mode == 'cat': + if prepend_path: + from cylc.flow.hostuserutil import get_host + print(f'# {get_host()}:{logpath}') + if mode == 'cat': # print file contents to stdout. if batchview_cmd is not None: cmd = shlex.split(batchview_cmd) @@ -244,7 +290,7 @@ def view_log(logpath, mode, tailer_tmpl, batchview_cmd=None, remote=False, # * batchview command is user configurable colorise_cat_log(proc1, color=color) return 0 - elif mode == 'tail': + if mode == 'tail': if batchview_cmd is not None: cmd = batchview_cmd else: @@ -267,11 +313,16 @@ def get_option_parser() -> COP: parser.add_option( "-f", "--file", - help=" Job log: %s; default o(out)." % ( - ', '.join(['%s(%s)' % (i, j) - for i, j in JOB_LOG_OPTS.items()])) + - " Or for custom (and standard) job logs.", - metavar="LOG", action="store", default=None, dest="filename") + help=( + 'The file to view. Default for job logs "out", default for' + ' workflow logs "scheduler/log". See "Log Files" above for' + ' possible values.' + ), + metavar="LOG", + action="store", + default=None, + dest="filename", + ) parser.add_option( "-m", "--mode", @@ -284,7 +335,7 @@ def get_option_parser() -> COP: "-r", "--rotation", help="Workflow log integer rotation number. 0 for current, 1 for " "next oldest, etc.", - metavar="INT", action="store", dest="rotation_num") + metavar="INT", action="store", dest="rotation_num", type=int) parser.add_option( "-o", "--force-remote", @@ -295,13 +346,20 @@ def get_option_parser() -> COP: parser.add_option( "-s", "--submit-number", "-t", "--try-number", help="Job submit number (default=%s, i.e. latest)." % NN, - metavar="INT", action="store", dest="submit_num", default=NN) + metavar="INT", action="store", dest="submit_num", default=None) parser.add_option( "--remote-arg", help="(for internal use: continue processing on job host)", action="append", dest="remote_args") + parser.add_option( + '--prepend-path', + help='Prepend the file path to the output in the format :', + action='store_true', + default=False, + ) + return parser @@ -357,8 +415,15 @@ def main( batchview_cmd = options.remote_args[3] except IndexError: batchview_cmd = None - res = view_log(logpath, mode, tail_tmpl, batchview_cmd, remote=True, - color=color) + res = view_log( + logpath, + mode, + tail_tmpl, + batchview_cmd, + remote=True, + color=color, + prepend_path=options.prepend_path, + ) if res == 1: sys.exit(res) return @@ -371,28 +436,58 @@ def main( except KeyError: mode = options.mode + if tokens and tokens.get('cycle') and not tokens.get('task'): + print('Please provide a workflow, task or job ID', file=sys.stderr) + sys.exit(1) + if not tokens or not tokens.get('task'): - logpath = get_workflow_run_scheduler_log_path(workflow_id) - if options.rotation_num: - log_dir = Path(logpath).parent - logs = glob(f'{log_dir}/*{LOG_FILE_EXTENSION}') - logs.sort(key=os.path.getmtime, reverse=True) + # no task provided - user has requested a workflow log + log_dir: str = get_workflow_run_dir(workflow_id, 'log') + file_name: str = options.filename or 's' + log_file_path: Path + + if mode == 'list-dir': + # list workflow logs + print('\n'.join(sorted( + str(path.relative_to(log_dir)) + for dirpath in { + # set of log/ directories to scan for files in + Path(log_dir, _file_name).parent + for _, _file_name in WORKFLOW_LOG_OPTS.values() + } + for path in dirpath.iterdir() + # strip out file aliases such as scheduler/log + if not path.is_symlink() + ))) + return + + if file_name in WORKFLOW_LOG_OPTS: + rotation_number = options.rotation_num or 0 + pattern = WORKFLOW_LOG_OPTS[file_name][1] + logs = sorted( + glob( + str(Path(log_dir, pattern)) + ), + reverse=True + ) try: - logpath = logs[int(options.rotation_num)] + log_file_path = Path(logs[rotation_number]) except IndexError: - raise InputError( - "max rotation %d" % (len(logs) - 1)) - # Cat workflow logs, local only. - if options.filename is not None and not options.rotation_num: - logpath = os.path.join(get_workflow_run_scheduler_log_dir( - workflow_id), str(options.filename)) + raise InputError("max rotation %d" % (len(logs) - 1)) + else: + log_file_path = Path(log_dir, file_name) + tail_tmpl = os.path.expandvars( get_platform()["tail command template"] ) - out = view_log(logpath, mode, tail_tmpl, color=color) - if out == 1: - sys.exit(1) - return + out = view_log( + log_file_path, + mode, + tail_tmpl, + color=color, + prepend_path=options.prepend_path, + ) + sys.exit(out) else: # Cat task job logs, may be on workflow or job host. @@ -401,11 +496,13 @@ def main( "only workflow (not job) logs get rotated") task = tokens['task'] point = tokens['cycle'] - if options.submit_num != NN: + + submit_num = options.submit_num or tokens.get('job') or NN + if submit_num != NN: try: - options.submit_num = "%02d" % int(options.submit_num) + submit_num = "%02d" % int(submit_num) except ValueError: - parser.error("Illegal submit number: %s" % options.submit_num) + parser.error("Illegal submit number: %s" % submit_num) if options.filename is None: options.filename = JOB_LOG_OUT else: @@ -414,7 +511,7 @@ def main( options.filename = JOB_LOG_OPTS[options.filename] # KeyError: Is already long form (standard log, or custom). platform_name, job_runner_name, live_job_id = get_task_job_attrs( - workflow_id, point, task, options.submit_num) + workflow_id, point, task, submit_num) platform = get_platform(platform_name) batchview_cmd = None if live_job_id is not None: @@ -445,7 +542,7 @@ def main( and live_job_id is None) if log_is_remote and (not log_is_retrieved or options.force_remote): logpath = os.path.normpath(get_remote_workflow_run_job_dir( - workflow_id, point, task, options.submit_num, + workflow_id, point, task, submit_num, options.filename)) tail_tmpl = platform["tail command template"] # Reinvoke the cat-log command on the remote account. @@ -454,6 +551,8 @@ def main( cmd.append('--remote-arg=%s' % shlex.quote(item)) if batchview_cmd: cmd.append('--remote-arg=%s' % shlex.quote(batchview_cmd)) + if options.prepend_path: + cmd.append('--prepend-path') cmd.append(workflow_id) # TODO: Add Intelligent Host selection to this with suppress(KeyboardInterrupt): @@ -470,9 +569,15 @@ def main( else: # Local task job or local job log. logpath = os.path.normpath(get_workflow_run_job_dir( - workflow_id, point, task, options.submit_num, + workflow_id, point, task, submit_num, options.filename)) tail_tmpl = os.path.expandvars(platform["tail command template"]) - out = view_log(logpath, mode, tail_tmpl, batchview_cmd, - color=color) + out = view_log( + logpath, + mode, + tail_tmpl, + batchview_cmd, + color=color, + prepend_path=options.prepend_path, + ) sys.exit(out) diff --git a/tests/functional/cylc-cat-log/00-local.t b/tests/functional/cylc-cat-log/00-local.t index 44134052e6f..bc0e97f29c4 100755 --- a/tests/functional/cylc-cat-log/00-local.t +++ b/tests/functional/cylc-cat-log/00-local.t @@ -18,7 +18,7 @@ # Test "cylc cat-log" on the workflow host. . "$(dirname "$0")/test_header" #------------------------------------------------------------------------------- -set_test_number 31 +set_test_number 40 install_workflow "${TEST_NAME_BASE}" "${TEST_NAME_BASE}" #------------------------------------------------------------------------------- TEST_NAME="${TEST_NAME_BASE}-validate" @@ -32,11 +32,33 @@ contains_ok "${TEST_NAME}.stdout" "${WORKFLOW_RUN_DIR}/log/scheduler/log" #------------------------------------------------------------------------------- TEST_NAME=${TEST_NAME_BASE}-workflow-log-ok LOG_DIR="$(dirname "$(cylc cat-log -m p "${WORKFLOW_NAME}")")" +echo "This is file 02-restart-02.log" > "${LOG_DIR}/02-restart-02.log" echo "This is file 03-restart-02.log" > "${LOG_DIR}/03-restart-02.log" -run_ok "${TEST_NAME}" cylc cat-log -f 03-restart-02.log "${WORKFLOW_NAME}" +# it should accept file paths relative to the scheduler log directory +run_ok "${TEST_NAME}" cylc cat-log -f scheduler/03-restart-02.log "${WORKFLOW_NAME}" contains_ok "${TEST_NAME}.stdout" - << __END__ This is file 03-restart-02.log __END__ +# it should pick the latest scheduler log file if no rotation number is provided +run_ok "${TEST_NAME}" cylc cat-log --file s "${WORKFLOW_NAME}" +contains_ok "${TEST_NAME}.stdout" - << __END__ +This is file 03-restart-02.log +__END__ +# it should apply rotation number to scheduler log files +run_ok "${TEST_NAME}" cylc cat-log -f s -r 1 "${WORKFLOW_NAME}" +contains_ok "${TEST_NAME}.stdout" - << __END__ +This is file 02-restart-02.log +__END__ +# it should list scheduler log files +run_ok "${TEST_NAME}" cylc cat-log -m l "${WORKFLOW_NAME}" +cmp_ok "${TEST_NAME}.stdout" - << __END__ +config/01-start-01.cylc +config/flow-processed.cylc +install/01-install.log +scheduler/01-start-01.log +scheduler/02-restart-02.log +scheduler/03-restart-02.log +__END__ #------------------------------------------------------------------------------- TEST_NAME=${TEST_NAME_BASE}-task-out run_ok "${TEST_NAME}" cylc cat-log -f o "${WORKFLOW_NAME}//1/a-task" @@ -114,5 +136,11 @@ grep_ok "${WORKFLOW_NAME}/log/job/1/a-task/NN/job$" "${TEST_NAME}.stdout" TEST_NAME=${TEST_NAME_BASE}-un-norm-path run_fail "${TEST_NAME}" cylc cat-log -f j/../02/j "${WORKFLOW_NAME}//1/a-task" grep_ok 'InputError' "${TEST_NAME}.stderr" +#------------------------------------------------------------------------------- +TEST_NAME=${TEST_NAME_BASE}-prepend-path +run_ok "${TEST_NAME}-get-path" cylc cat-log -m p "${WORKFLOW_NAME}//1/a-task" +run_ok "${TEST_NAME}" cylc cat-log --prepend-path "${WORKFLOW_NAME}//1/a-task" +grep_ok "$(cat "#.*${TEST_NAME}-get-path.stdout")" "${TEST_NAME}.stdout" +#------------------------------------------------------------------------------- purge exit diff --git a/tests/functional/cylc-cat-log/00-local/flow.cylc b/tests/functional/cylc-cat-log/00-local/flow.cylc index 57acf58d2b6..383443109a0 100644 --- a/tests/functional/cylc-cat-log/00-local/flow.cylc +++ b/tests/functional/cylc-cat-log/00-local/flow.cylc @@ -10,12 +10,12 @@ [runtime] [[a-task]] script = """ -# Write to task stdout log -echo "the quick brown fox" -# Write to task stderr log -echo "jumped over the lazy dog" >&2 -# Write to a custom log file -echo "drugs and money" > ${CYLC_TASK_LOG_ROOT}.custom-log -# Generate a warning message in the workflow log. -cylc message -p WARNING 'marmite and squashed bananas' -""" + # Write to task stdout log + echo "the quick brown fox" + # Write to task stderr log + echo "jumped over the lazy dog" >&2 + # Write to a custom log file + echo "drugs and money" > ${CYLC_TASK_LOG_ROOT}.custom-log + # Generate a warning message in the workflow log. + cylc message -p WARNING 'marmite and squashed bananas' + """ diff --git a/tests/functional/cylc-cat-log/06-log-rotation.t b/tests/functional/cylc-cat-log/06-log-rotation.t index 888bb16fbc9..cff6e88f423 100755 --- a/tests/functional/cylc-cat-log/06-log-rotation.t +++ b/tests/functional/cylc-cat-log/06-log-rotation.t @@ -21,7 +21,7 @@ set_test_number 1 init_workflow "${TEST_NAME_BASE}" '/dev/null' # Populate its cylc-run dir with empty log files. -LOG_DIR="$(dirname "$(cylc cat-log -m p "${WORKFLOW_NAME}")")" +LOG_DIR="$HOME/cylc-run/$WORKFLOW_NAME/log/scheduler" mkdir -p "${LOG_DIR}" touch -t '201001011200.00' "${LOG_DIR}/01-start-01.log" touch -t '201001011200.01' "${LOG_DIR}/02-start-01.log" diff --git a/tests/functional/cylc-clean/06-nfs.t b/tests/functional/cylc-clean/06-nfs.t index 991242cea25..e9d94da2d4b 100644 --- a/tests/functional/cylc-clean/06-nfs.t +++ b/tests/functional/cylc-clean/06-nfs.t @@ -45,7 +45,7 @@ init_workflow "${TEST_NAME_BASE}" <<< '# blank workflow' WORKFLOW_LOG_DIR="${WORKFLOW_RUN_DIR}/log/scheduler" mkdir -p "$WORKFLOW_LOG_DIR" LOG_STUFF='foo bar baz' -echo "${LOG_STUFF}" > "${WORKFLOW_LOG_DIR}/log" +echo "${LOG_STUFF}" > "${WORKFLOW_LOG_DIR}/01-start-01.log" # start cat-log running - this runs "tail -f" cylc cat-log -m t "$WORKFLOW_NAME" > out 2>err & PID="$!" diff --git a/tests/functional/job-submission/16-timeout.t b/tests/functional/job-submission/16-timeout.t index c7eefb7af81..266b04b0164 100755 --- a/tests/functional/job-submission/16-timeout.t +++ b/tests/functional/job-submission/16-timeout.t @@ -40,8 +40,7 @@ workflow_run_ok "${TEST_NAME_BASE}-workflow-run" \ cylc cat-log "${WORKFLOW_NAME}" \ | grep -E -m 1 -A 2 "ERROR - \[jobs-submit cmd\]" \ | sed -e 's/^.* \(ERROR\)/\1/' > log - -WORKFLOW_LOG_DIR=$(cylc cat-log -m p "${WORKFLOW_NAME}") +WORKFLOW_LOG_DIR=$(cylc cat-log -m p "${WORKFLOW_NAME}" | sed 's/01-start-01\.//') JOB_LOG_DIR="${WORKFLOW_LOG_DIR%scheduler/log}" JOB_LOG_DIR="${JOB_LOG_DIR/$HOME/\$HOME}"