From 70cd024e849f6c6ebc79fded7a7f541eff9029cf Mon Sep 17 00:00:00 2001 From: Javier Gonzalez Date: Mon, 15 Apr 2024 10:38:47 -0400 Subject: [PATCH] Improve logging to make it easier to re-run with the same configuration (#168) * Improve logging to make it easier to re-run with the same configuration * make sure the args log file is not overwritten * run update_supplement.update according to args, regardless of agasc_ids in args file --- agasc/scripts/update_mag_supplement.py | 93 +++++++++++++------ .../magnitudes/update_mag_supplement.py | 14 +++ 2 files changed, 79 insertions(+), 28 deletions(-) diff --git a/agasc/scripts/update_mag_supplement.py b/agasc/scripts/update_mag_supplement.py index 9d3eb0a..118b141 100755 --- a/agasc/scripts/update_mag_supplement.py +++ b/agasc/scripts/update_mag_supplement.py @@ -9,6 +9,7 @@ import logging import os from pathlib import Path +from pprint import pformat import pyyaks.logger import yaml @@ -112,8 +113,7 @@ def get_parser(): return parser -def main(): - +def get_args(): logger = logging.getLogger("agasc.supplement") the_parser = get_parser() args = the_parser.parse_args() @@ -163,11 +163,14 @@ def main(): star_obs_catalogs.load(args.stop) - # set the list of AGASC IDs from file if specified. If not, it will include all. - agasc_ids = [] - if args.agasc_id_file: - with open(args.agasc_id_file, "r") as f: - agasc_ids = [int(line.strip()) for line in f.readlines()] + if "agasc_ids" in file_args: + agasc_ids = file_args["agasc_ids"] + else: + # set the list of AGASC IDs from file if specified. If not, it will include all. + agasc_ids = [] + if args.agasc_id_file: + with open(args.agasc_id_file, "r") as f: + agasc_ids = [int(line.strip()) for line in f.readlines()] # update 'bad' and 'obs' tables in supplement agasc_ids += update_supplement.update(args) @@ -187,39 +190,73 @@ def main(): ) report_date = None - if args.report: + if "report_date" in file_args: + report_date = CxoTime(file_args["report_date"]) + elif args.report: report_date = CxoTime(args.stop) # the nominal date for reports is the first Monday after the stop date. # this is not perfect, because it needs to agree with nav_links in update_mag_supplement.do report_date += ((7 - report_date.datetime.weekday()) % 7) * u.day report_date = CxoTime(report_date.date[:8]) - args_log_file = args.output_dir / "call_args.yml" + args_log_file = get_next_file_name(args.output_dir / "call_args.yml") if not args.output_dir.exists(): args.output_dir.mkdir(parents=True) + + # there must be a better way to do this... + yaml_args = { + k: str(v) if issubclass(type(v), Path) else v for k, v in vars(args).items() + } + yaml_args["report_date"] = report_date.date + yaml_args["agasc_ids"] = agasc_ids + logger.info(f"Writing input arguments to {args_log_file}") with open(args_log_file, "w") as fh: - # there must be a better way to do this... - yaml_args = { - k: str(v) if issubclass(type(v), Path) else v for k, v in vars(args).items() - } yaml.dump(yaml_args, fh) - update_mag_supplement.do( - output_dir=args.output_dir, - reports_dir=args.reports_dir, - report_date=report_date, - agasc_ids=agasc_ids if agasc_ids else None, - multi_process=args.multi_process, - start=args.start, - stop=args.stop, - report=args.report, - include_bad=args.include_bad, - dry_run=args.dry_run, - no_progress=args.no_progress, - ) - if args.report and (args.reports_dir / f"{report_date.date[:8]}").exists(): + logger.info("Input arguments") + for line in pformat(yaml_args).split("\n"): + logger.info(line.rstrip()) + + return { + "output_dir": args.output_dir, + "reports_dir": args.reports_dir, + "report_date": report_date, + "agasc_ids": agasc_ids if agasc_ids else None, + "multi_process": args.multi_process, + "start": args.start, + "stop": args.stop, + "report": args.report, + "include_bad": args.include_bad, + "dry_run": args.dry_run, + "no_progress": args.no_progress, + "args_log_file": args_log_file, + } + + +def get_next_file_name(file_name): + if not file_name.exists(): + return file_name + i = 1 + while True: + new_file_name = file_name.with_suffix(f".{i}{file_name.suffix}") + if not new_file_name.exists(): + return new_file_name + i += 1 + + +def main(): + + args = get_args() + args_log_file = args.pop("args_log_file") + + update_mag_supplement.do(**args) + + if ( + args["report"] + and (args["reports_dir"] / f"{args['report_date'].date[:8]}").exists() + ): args_log_file.replace( - args.reports_dir / f"{report_date.date[:8]}" / args_log_file.name + args["reports_dir"] / f"{args['report_date'].date[:8]}" / args_log_file.name ) diff --git a/agasc/supplement/magnitudes/update_mag_supplement.py b/agasc/supplement/magnitudes/update_mag_supplement.py index fc90e64..76bc6c6 100755 --- a/agasc/supplement/magnitudes/update_mag_supplement.py +++ b/agasc/supplement/magnitudes/update_mag_supplement.py @@ -615,6 +615,20 @@ def do( # do the processing logger.info(f"Will process {len(agasc_ids)} stars on {len(stars_obs)} observations") logger.info(f"from {start} to {stop}") + + obs_times = CxoTime(star_obs_catalogs.STARS_OBS["mp_starcat_time"]) + latest = np.sort( + np.unique( + star_obs_catalogs.STARS_OBS[["mp_starcat_time", "obsid"]][ + obs_times > obs_times.max() - 1 * u.day + ] + ) + )[-10:] + logger.info("latest observations:") + for row in latest: + logger.info( + f" mp_starcat_time: {row['mp_starcat_time']}, OBSID {row['obsid']}" + ) if dry_run: return