Skip to content

Commit

Permalink
cat-log: change interface for scheduler logs to include the dirname
Browse files Browse the repository at this point in the history
* Support specifying the job submit number in the ID.
* Support specifying the most recent workflow log file via a
  one letter filename as with job logs.
* Add --prepend-path option which print the host:filepath before
  cat or tail operations. This is used by the UI to display the
  host:path where the log file is located.
  • Loading branch information
oliver-sanders committed Apr 19, 2023
1 parent 8cc4fc3 commit dd8cfff
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 61 deletions.
5 changes: 3 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ ones in. -->

### Enhancements

[#5414](https://github.com/cylc/cylc-flow/pull/5414) -
Enable cat-log to view workflow logs with -f option.
[#5453](https://github.com/cylc/cylc-flow/pull/5453) - `cylc cat-log` can now
list and view workflow log files including install logs and workflow
configuration files.

### Fixes

Expand Down
195 changes: 151 additions & 44 deletions cylc/flow/scripts/cat_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,20 @@
$ cylc cat-log foo
# Print specific workflow log:
$ cylc cat-log foo -f 02-start-01.log
$ cylc cat-log foo -f scheduler/02-start-01.log
# Print task stdout:
$ cylc cat-log foo//2020/bar
$ cylc cat-log -f o foo//2020/bar
# Print task stderr:
$ cylc cat-log -f e foo//2020/bar
# Print a custom file in a job's log directory:
$ cylc cat-log -f my-log-file foo//2020/bar
# Follow a log file:
$ cylc cat-log foo//2020/bar -m f
"""

import os
Expand All @@ -68,7 +74,6 @@
import cylc.flow.flags
from cylc.flow.hostuserutil import is_remote_platform
from cylc.flow.id_cli import parse_id
from cylc.flow.loggingutil import LOG_FILE_EXTENSION
from cylc.flow.option_parsers import (
ID_MULTI_ARG_DOC,
CylcOptionParser as COP,
Expand All @@ -79,8 +84,8 @@
get_remote_workflow_run_job_dir,
get_workflow_run_job_dir,
get_workflow_run_pub_db_path,
get_workflow_run_scheduler_log_dir,
get_workflow_run_scheduler_log_path)
get_workflow_run_dir,
)
from cylc.flow.remote import remote_cylc_cmd, watch_and_kill
from cylc.flow.rundb import CylcWorkflowDAO
from cylc.flow.task_job_logs import (
Expand All @@ -93,6 +98,37 @@
from optparse import Values


WORKFLOW_LOG_OPTS = {
'c': ('workflow configuration file (raw)', r'config/*-start-*.cylc'),
'p': (
'workflow configuration file (processed)',
r'config/flow-processed.cylc'
),
'i': ('install log', r'install/*-*install.log'),
's': ('scheduler log', r'scheduler/*-*start*.log'),
}


# add workflow and job log file options to the CLI help output
__doc__ += r'''
Log Files:
Select the log file to view with the --file option.
Either provide the file path or use one of the short options:
Job Logs:
''' + ' ' + '\n '.join(
f'{key:4} {value}'
for key, value in JOB_LOG_OPTS.items()
) + '''
Workflow Logs:
''' + ' ' + '\n '.join(
f'{key:4} {value[0]}'
for key, value in WORKFLOW_LOG_OPTS.items()
) + '\n\n Use "--mode=l" to list available log files for a workflow/job.'


# Immortal tail-follow processes on job hosts can be cleaned up by killing
# my subprocesses if my PPID or PPPID changes (due to parent ssh connection
# dying). This works even if the sshd-invoked
Expand Down Expand Up @@ -202,8 +238,15 @@ def _check_fs_path(path):
)


def view_log(logpath, mode, tailer_tmpl, batchview_cmd=None, remote=False,
color=False):
def view_log(
logpath,
mode,
tailer_tmpl,
batchview_cmd=None,
remote=False,
color=False,
prepend_path=False,
):
"""View (by mode) local log file. This is only called on the file host.
batchview_cmd is a job-runner-specific job stdout or stderr cat or tail
Expand All @@ -218,19 +261,22 @@ def view_log(logpath, mode, tailer_tmpl, batchview_cmd=None, remote=False,
# Print location even if the workflow does not exist yet.
print(logpath)
return 0
elif not os.path.exists(logpath) and batchview_cmd is None:
if not os.path.exists(logpath) and batchview_cmd is None:
# Note: batchview_cmd may not need to have access to logpath, so don't
# test for existence of path if it is set.
sys.stderr.write('file not found: %s\n' % logpath)
return 1
elif mode == 'print-dir':
if mode == 'print-dir':
print(os.path.dirname(logpath))
return 0
elif mode == 'list-dir':
if mode == 'list-dir':
for entry in sorted(os.listdir(os.path.dirname(logpath))):
print(entry)
return 0
elif mode == 'cat':
if prepend_path:
from cylc.flow.hostuserutil import get_host
print(f'# {get_host()}:{logpath}')
if mode == 'cat':
# print file contents to stdout.
if batchview_cmd is not None:
cmd = shlex.split(batchview_cmd)
Expand All @@ -244,7 +290,7 @@ def view_log(logpath, mode, tailer_tmpl, batchview_cmd=None, remote=False,
# * batchview command is user configurable
colorise_cat_log(proc1, color=color)
return 0
elif mode == 'tail':
if mode == 'tail':
if batchview_cmd is not None:
cmd = batchview_cmd
else:
Expand All @@ -267,11 +313,16 @@ def get_option_parser() -> COP:

parser.add_option(
"-f", "--file",
help=" Job log: %s; default o(out)." % (
', '.join(['%s(%s)' % (i, j)
for i, j in JOB_LOG_OPTS.items()])) +
" Or <filename> for custom (and standard) job logs.",
metavar="LOG", action="store", default=None, dest="filename")
help=(
'The file to view. Default for job logs "out", default for'
' workflow logs "scheduler/log". See "Log Files" above for'
' possible values.'
),
metavar="LOG",
action="store",
default=None,
dest="filename",
)

parser.add_option(
"-m", "--mode",
Expand All @@ -284,7 +335,7 @@ def get_option_parser() -> COP:
"-r", "--rotation",
help="Workflow log integer rotation number. 0 for current, 1 for "
"next oldest, etc.",
metavar="INT", action="store", dest="rotation_num")
metavar="INT", action="store", dest="rotation_num", type=int)

parser.add_option(
"-o", "--force-remote",
Expand All @@ -295,13 +346,20 @@ def get_option_parser() -> COP:
parser.add_option(
"-s", "--submit-number", "-t", "--try-number",
help="Job submit number (default=%s, i.e. latest)." % NN,
metavar="INT", action="store", dest="submit_num", default=NN)
metavar="INT", action="store", dest="submit_num", default=None)

parser.add_option(
"--remote-arg",
help="(for internal use: continue processing on job host)",
action="append", dest="remote_args")

parser.add_option(
'--prepend-path',
help='Prepend the file path to the output in the format <host>:<path>',
action='store_true',
default=False,
)

return parser


Expand Down Expand Up @@ -357,8 +415,15 @@ def main(
batchview_cmd = options.remote_args[3]
except IndexError:
batchview_cmd = None
res = view_log(logpath, mode, tail_tmpl, batchview_cmd, remote=True,
color=color)
res = view_log(
logpath,
mode,
tail_tmpl,
batchview_cmd,
remote=True,
color=color,
prepend_path=options.prepend_path,
)
if res == 1:
sys.exit(res)
return
Expand All @@ -371,28 +436,60 @@ def main(
except KeyError:
mode = options.mode

if tokens and tokens.get('cycle') and not tokens.get('task'):
print('Please provide a workflow, task or job ID', file=sys.stderr)
sys.exit(1)

if not tokens or not tokens.get('task'):
logpath = get_workflow_run_scheduler_log_path(workflow_id)
if options.rotation_num:
log_dir = Path(logpath).parent
logs = glob(f'{log_dir}/*{LOG_FILE_EXTENSION}')
logs.sort(key=os.path.getmtime, reverse=True)
# no task provided - user has requested a workflow log
log_dir: str = get_workflow_run_dir(workflow_id, 'log')
file_name: str = options.filename or 's'
log_file_path: Path

if mode == 'list-dir':
# list workflow logs
print('\n'.join(sorted(
str(path.relative_to(log_dir))
for dirpath in {
# set of log/<x> directories to scan for files in
Path(log_dir, _file_name).parent
for _, _file_name in WORKFLOW_LOG_OPTS.values()
}
for path in dirpath.iterdir()
# strip out file aliases such as scheduler/log
if not path.is_symlink()
)))
return

if file_name in WORKFLOW_LOG_OPTS:
rotation_number = options.rotation_num or 0
pattern = WORKFLOW_LOG_OPTS[file_name][1]
logs = list(
sorted(
glob(
str(Path(log_dir, pattern))
),
reverse=True
)
)
try:
logpath = logs[int(options.rotation_num)]
log_file_path = Path(logs[rotation_number])
except IndexError:
raise InputError(
"max rotation %d" % (len(logs) - 1))
# Cat workflow logs, local only.
if options.filename is not None and not options.rotation_num:
logpath = os.path.join(get_workflow_run_scheduler_log_dir(
workflow_id), str(options.filename))
raise InputError("max rotation %d" % (len(logs) - 1))
else:
log_file_path = Path(log_dir, file_name)

tail_tmpl = os.path.expandvars(
get_platform()["tail command template"]
)
out = view_log(logpath, mode, tail_tmpl, color=color)
if out == 1:
sys.exit(1)
return
out = view_log(
log_file_path,
mode,
tail_tmpl,
color=color,
prepend_path=options.prepend_path,
)
sys.exit(out)

else:
# Cat task job logs, may be on workflow or job host.
Expand All @@ -401,11 +498,13 @@ def main(
"only workflow (not job) logs get rotated")
task = tokens['task']
point = tokens['cycle']
if options.submit_num != NN:

submit_num = options.submit_num or tokens.get('job') or NN
if submit_num != NN:
try:
options.submit_num = "%02d" % int(options.submit_num)
submit_num = "%02d" % int(submit_num)
except ValueError:
parser.error("Illegal submit number: %s" % options.submit_num)
parser.error("Illegal submit number: %s" % submit_num)
if options.filename is None:
options.filename = JOB_LOG_OUT
else:
Expand All @@ -414,7 +513,7 @@ def main(
options.filename = JOB_LOG_OPTS[options.filename]
# KeyError: Is already long form (standard log, or custom).
platform_name, job_runner_name, live_job_id = get_task_job_attrs(
workflow_id, point, task, options.submit_num)
workflow_id, point, task, submit_num)
platform = get_platform(platform_name)
batchview_cmd = None
if live_job_id is not None:
Expand Down Expand Up @@ -445,7 +544,7 @@ def main(
and live_job_id is None)
if log_is_remote and (not log_is_retrieved or options.force_remote):
logpath = os.path.normpath(get_remote_workflow_run_job_dir(
workflow_id, point, task, options.submit_num,
workflow_id, point, task, submit_num,
options.filename))
tail_tmpl = platform["tail command template"]
# Reinvoke the cat-log command on the remote account.
Expand All @@ -454,6 +553,8 @@ def main(
cmd.append('--remote-arg=%s' % shlex.quote(item))
if batchview_cmd:
cmd.append('--remote-arg=%s' % shlex.quote(batchview_cmd))
if options.prepend_path:
cmd.append('--prepend-path')
cmd.append(workflow_id)
# TODO: Add Intelligent Host selection to this
with suppress(KeyboardInterrupt):
Expand All @@ -470,9 +571,15 @@ def main(
else:
# Local task job or local job log.
logpath = os.path.normpath(get_workflow_run_job_dir(
workflow_id, point, task, options.submit_num,
workflow_id, point, task, submit_num,
options.filename))
tail_tmpl = os.path.expandvars(platform["tail command template"])
out = view_log(logpath, mode, tail_tmpl, batchview_cmd,
color=color)
out = view_log(
logpath,
mode,
tail_tmpl,
batchview_cmd,
color=color,
prepend_path=options.prepend_path,
)
sys.exit(out)
Loading

0 comments on commit dd8cfff

Please sign in to comment.