Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve logging to make it easier to re-run with the same configuration #168

Merged
merged 3 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 67 additions & 30 deletions agasc/scripts/update_mag_supplement.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging
import os
from pathlib import Path
from pprint import pformat

import pyyaks.logger
import yaml
Expand Down Expand Up @@ -112,8 +113,7 @@ def get_parser():
return parser


def main():

def get_args():
logger = logging.getLogger("agasc.supplement")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that a this chunk of "main" is now in get_args, maybe logger should just be global?

Copy link
Contributor Author

@javierggt javierggt Apr 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not used anywhere else. I think it is better to not make things global until you have to.

the_parser = get_parser()
args = the_parser.parse_args()
Expand Down Expand Up @@ -163,14 +163,17 @@ def main():

star_obs_catalogs.load(args.stop)

# set the list of AGASC IDs from file if specified. If not, it will include all.
agasc_ids = []
if args.agasc_id_file:
with open(args.agasc_id_file, "r") as f:
agasc_ids = [int(line.strip()) for line in f.readlines()]
if "agasc_ids" in file_args:
agasc_ids = file_args["agasc_ids"]
else:
# set the list of AGASC IDs from file if specified. If not, it will include all.
agasc_ids = []
if args.agasc_id_file:
with open(args.agasc_id_file, "r") as f:
agasc_ids = [int(line.strip()) for line in f.readlines()]

# update 'bad' and 'obs' tables in supplement
agasc_ids += update_supplement.update(args)
# update 'bad' and 'obs' tables in supplement
agasc_ids += update_supplement.update(args)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this new indent on purpose? I would think you'd still want to do the update_supplement.update(args) even if you've been given list of agasc_ids in file_args, but maybe I'm missing a logical change here.

Copy link
Contributor Author

@javierggt javierggt Apr 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha! good eye. Yes, I changed this on purpose, but I think you might have a point. I twas thinking of the agasc_ids. Also, if one is running with the same arguments, presumably this update to the supplement was already made, although one could be running with a copy of the supplement from the snapshot.

Note that there is another change. This change in indentation goes with the if "agasc_ids" in file_args: statement, which was not there to begin with, so everything after agasc_ids = [] is getting extra indentation.

This adds a feature that I did not document in the PR description. You can make a call_args.yml file by hand which includes the agasc_ids key, with a list of AGASC IDs. Note that there is no --agasc-ids command line argument, this is a feature of the call_args.yml file.

The truth is that we have never used the update_supplement part together with the update magnitudes part.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can change the indentation back. I don't think I have strong opinion on that, and maybe it is best to leave it as it was to make sure the same process is run.


# set start/stop times
if args.whole_history:
Expand All @@ -187,39 +190,73 @@ def main():
)

report_date = None
if args.report:
if "report_date" in file_args:
report_date = CxoTime(file_args["report_date"])
elif args.report:
report_date = CxoTime(args.stop)
# the nominal date for reports is the first Monday after the stop date.
# this is not perfect, because it needs to agree with nav_links in update_mag_supplement.do
report_date += ((7 - report_date.datetime.weekday()) % 7) * u.day
report_date = CxoTime(report_date.date[:8])

args_log_file = args.output_dir / "call_args.yml"
args_log_file = get_next_file_name(args.output_dir / "call_args.yml")
if not args.output_dir.exists():
args.output_dir.mkdir(parents=True)

# there must be a better way to do this...
yaml_args = {
k: str(v) if issubclass(type(v), Path) else v for k, v in vars(args).items()
}
yaml_args["report_date"] = report_date.date
yaml_args["agasc_ids"] = agasc_ids
logger.info(f"Writing input arguments to {args_log_file}")
with open(args_log_file, "w") as fh:
# there must be a better way to do this...
yaml_args = {
k: str(v) if issubclass(type(v), Path) else v for k, v in vars(args).items()
}
yaml.dump(yaml_args, fh)

update_mag_supplement.do(
output_dir=args.output_dir,
reports_dir=args.reports_dir,
report_date=report_date,
agasc_ids=agasc_ids if agasc_ids else None,
multi_process=args.multi_process,
start=args.start,
stop=args.stop,
report=args.report,
include_bad=args.include_bad,
dry_run=args.dry_run,
no_progress=args.no_progress,
)
if args.report and (args.reports_dir / f"{report_date.date[:8]}").exists():
logger.info("Input arguments")
for line in pformat(yaml_args).split("\n"):
logger.info(line.rstrip())

return {
"output_dir": args.output_dir,
"reports_dir": args.reports_dir,
"report_date": report_date,
"agasc_ids": agasc_ids if agasc_ids else None,
"multi_process": args.multi_process,
"start": args.start,
"stop": args.stop,
"report": args.report,
"include_bad": args.include_bad,
"dry_run": args.dry_run,
"no_progress": args.no_progress,
"args_log_file": args_log_file,
}


def get_next_file_name(file_name):
if not file_name.exists():
return file_name
i = 1
while True:
new_file_name = file_name.with_suffix(f".{i}{file_name.suffix}")
if not new_file_name.exists():
return new_file_name
i += 1


def main():

args = get_args()
args_log_file = args.pop("args_log_file")

update_mag_supplement.do(**args)

if (
args["report"]
and (args["reports_dir"] / f"{args['report_date'].date[:8]}").exists()
):
args_log_file.replace(
args.reports_dir / f"{report_date.date[:8]}" / args_log_file.name
args["reports_dir"] / f"{args['report_date'].date[:8]}" / args_log_file.name
)


Expand Down
14 changes: 14 additions & 0 deletions agasc/supplement/magnitudes/update_mag_supplement.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,20 @@ def do(
# do the processing
logger.info(f"Will process {len(agasc_ids)} stars on {len(stars_obs)} observations")
logger.info(f"from {start} to {stop}")

obs_times = CxoTime(star_obs_catalogs.STARS_OBS["mp_starcat_time"])
latest = np.sort(
np.unique(
star_obs_catalogs.STARS_OBS[["mp_starcat_time", "obsid"]][
obs_times > obs_times.max() - 1 * u.day
]
)
)[-10:]
logger.info("latest observations:")
for row in latest:
logger.info(
f" mp_starcat_time: {row['mp_starcat_time']}, OBSID {row['obsid']}"
)
if dry_run:
return

Expand Down
Loading