Skip to content

Commit

Permalink
Updating behaviour to abort on config more often to save precious time
Browse files Browse the repository at this point in the history
  • Loading branch information
Samreay committed Feb 13, 2020
1 parent af08ff9 commit 16fc734
Show file tree
Hide file tree
Showing 19 changed files with 209 additions and 34 deletions.
2 changes: 1 addition & 1 deletion pippin/analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def get_slurm_raw(self):
return base

def add_plot_script_to_run(self, script_name):
script_path = get_data_loc(self.plot_code_dir, script_name)
script_path = get_data_loc(script_name, extra=self.plot_code_dir)
self.path_to_codes.append(script_path)
self.done_files.append(os.path.join(self.output_dir, os.path.basename(script_name).split(".")[0] + ".done"))

Expand Down
5 changes: 1 addition & 4 deletions pippin/biascor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import copy
import inspect
import shutil
import subprocess
import os
Expand All @@ -16,9 +15,7 @@

class BiasCor(ConfigBasedExecutable):
def __init__(self, name, output_dir, dependencies, options, config, global_config):
self.global_config = global_config
self.data_dirs = global_config["DATA_DIRS"]
base = get_data_loc(self.data_dirs, config.get("BASE", "surveys/des/bbc/bbc_5yr.input"))
base = get_data_loc(config.get("BASE", "surveys/des/bbc/bbc_5yr.input"))

super().__init__(name, output_dir, base, "=", dependencies=dependencies)

Expand Down
22 changes: 18 additions & 4 deletions pippin/classifiers/classifier.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from abc import abstractmethod

from pippin.config import get_output_loc
from pippin.config import get_output_loc, get_data_loc
from pippin.dataprep import DataPrep
from pippin.task import Task
from pippin.snana_sim import SNANASimulation
Expand Down Expand Up @@ -85,6 +85,19 @@ def get_simulation_dependency(self):
return t
return None

def validate_model(self):
if self.mode == Classifier.PREDICT:
model = self.options.get("MODEL")
if model is None:
Task.fail_config(f"Classifier {self.name} is in predict mode but does not have a model specified")
model_classifier = self.get_model_classifier()
if model_classifier is not None and model_classifier.name == model:
return True
path = get_data_loc(model)
if not os.path.exists(path):
Task.fail_config(f"Classifier {self.name} does not have a classifier dependency and model is not a serialised file path")
return True

def get_model_classifier(self):
for t in self.dependencies:
if isinstance(t, Classifier):
Expand Down Expand Up @@ -142,6 +155,8 @@ def get_num_ranseed(sim_task, lcfit_task):
name = config["CLASSIFIER"]
cls = ClassifierFactory.get(name)
options = config.get("OPTS", {})
if "MODE" not in config:
Task.fail_config(f"Classifier task {clas_name} needs to specify MODE as train or predict")
mode = config["MODE"].lower()
assert mode in ["train", "predict"], "MODE should be either train or predict"
if mode == "train":
Expand All @@ -166,6 +181,7 @@ def get_num_ranseed(sim_task, lcfit_task):
mask_sim = config.get("MASK_SIM", "")
mask_fit = config.get("MASK_FIT", "")
for s, l in runs:

sim_name = s.name if s is not None else None
fit_name = l.name if l is not None else None
matched_sim = True
Expand Down Expand Up @@ -200,7 +216,6 @@ def get_num_ranseed(sim_task, lcfit_task):
assert (
len(folders) == 1
), f"Training requires one version of the lcfits, you have {len(folders)} for lcfit task {l}. Make sure your training sim doesn't set RANSEED_CHANGE"

if model is not None:
if "/" in model or "." in model:
potential_path = get_output_loc(model)
Expand Down Expand Up @@ -253,6 +268,5 @@ def get_num_ranseed(sim_task, lcfit_task):
tasks.append(cc)

if num_gen == 0:
Task.logger.error(f"Classifier {clas_name} with masks |{mask}|{mask_sim}|{mask_fit}| matched no combination of sims and fits")
return None # This should cause pippin to crash, which is probably what we want
Task.fail_config(f"Classifier {clas_name} with masks |{mask}|{mask_sim}|{mask_fit}| matched no combination of sims and fits")
return tasks
1 change: 1 addition & 0 deletions pippin/classifiers/nearest_neighbor_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(self, name, output_dir, dependencies, mode, options, index=0, model

self.output["predictions_filename"] = self.predictions_filename
self.output["model_filename"] = self.output_pk_file
self.validate_model()

self.slurm = """#!/bin/bash
#SBATCH --job-name={job_name}
Expand Down
3 changes: 2 additions & 1 deletion pippin/classifiers/snirf.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def __init__(self, name, output_dir, dependencies, mode, options, index=0, model
self.path_to_classifier = get_output_loc(self.global_config["SNIRF"]["location"])
self.job_base_name = os.path.basename(Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)
self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR")
# self.model_pk_file = self.get_unique_name() + ".pkl"
self.validate_model()

self.model_pk_file = "model.pkl"
self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file)
self.fitopt = options.get("FITOPT", "DEFAULT")
Expand Down
2 changes: 2 additions & 0 deletions pippin/classifiers/supernnova.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def __init__(self, name, output_dir, dependencies, mode, options, index=0, model
self.variant = options.get("VARIANT", "vanilla").lower()
self.redshift = "zspe" if options.get("REDSHIFT", True) else "none"
self.norm = options.get("NORM", "global")
self.validate_model()

assert self.norm in ["global", "cosmo", "perfilter"], f"Norm option is set to {self.norm}, needs to be one of 'global', 'cosmo', 'perfilter'"
assert self.variant in ["vanilla", "variational", "bayesian"], f"Variant {self.variant} is not vanilla, variational or bayesian"
self.slurm = """#!/bin/bash
Expand Down
12 changes: 8 additions & 4 deletions pippin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def get_config(initial_path=None, overwrites=None):
config = merge_dict(config, overwrites)

for i, path in enumerate(config["DATA_DIRS"]):
updated = get_data_loc([this_dir], path)
updated = get_data_loc(path, extra=this_dir)
if updated is None:
logging.error(f"Data dir {path} cannot be resolved!")
assert updated is not None
Expand All @@ -67,9 +67,13 @@ def get_output_dir():
return output_dir


def get_data_loc(data_dirs, path):
if not isinstance(data_dirs, list):
data_dirs = [data_dirs]
def get_data_loc(path, extra=None):
if extra is None:
data_dirs = get_config()["DATA_DIRS"]
if not isinstance(data_dirs, list):
data_dirs = [data_dirs]
else:
data_dirs = [extra]
if "$" in path:
path = os.path.expandvars(path)
if "$" in path:
Expand Down
5 changes: 2 additions & 3 deletions pippin/cosmomc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
import numpy as np

from pippin.config import mkdirs, get_config, get_output_loc, get_data_loc
from pippin.config import mkdirs, get_output_loc, get_data_loc
from pippin.create_cov import CreateCov
from pippin.task import Task

Expand Down Expand Up @@ -39,7 +39,6 @@ class CosmoMC(Task): # TODO: Define the location of the output so we can run th
"""

def __init__(self, name, output_dir, options, global_config, dependencies=None):
self.data_dirs = global_config["DATA_DIRS"]
super().__init__(name, output_dir, dependencies=dependencies)
self.options = options
self.global_config = global_config
Expand Down Expand Up @@ -198,7 +197,7 @@ def _run(self, force_refresh):
if self.static:
self.logger.info("CMB only constraints detected, copying static files")

cosmomc_static_loc = get_data_loc(self.data_dirs, self.static_path + self.ini_prefix)
cosmomc_static_loc = get_data_loc(self.static_path + self.ini_prefix)
if cosmomc_static_loc is None:
self.logger.error("Seems like we can't find the static chains...")
return False
Expand Down
7 changes: 3 additions & 4 deletions pippin/create_cov.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@ class CreateCov(ConfigBasedExecutable):
"""

def __init__(self, name, output_dir, options, global_config, dependencies=None, index=0):
self.data_dirs = global_config["DATA_DIRS"]

base_file = get_data_loc(self.data_dirs, "create_cov/input_file.txt")
base_file = get_data_loc("create_cov/input_file.txt")
super().__init__(name, output_dir, base_file, default_assignment=": ", dependencies=dependencies)

self.options = options
Expand All @@ -49,7 +48,7 @@ def __init__(self, name, output_dir, options, global_config, dependencies=None,
self.path_to_code = os.path.abspath(os.path.dirname(inspect.stack()[0][1]) + "/external")

self.logfile = os.path.join(self.output_dir, "output.log")
self.sys_file_in = get_data_loc(self.data_dirs, options.get("SYS_SCALE", "create_cov/sys_scale.LIST"))
self.sys_file_in = get_data_loc(options.get("SYS_SCALE", "create_cov/sys_scale.LIST"))
self.sys_file_out = os.path.join(self.output_dir, "sys_scale.LIST")
self.chain_dir = os.path.join(self.output_dir, "chains/")
self.config_dir = os.path.join(self.output_dir, "output")
Expand Down Expand Up @@ -94,7 +93,7 @@ def _check_completion(self, squeue):

def calculate_input(self):
self.logger.debug(f"Calculating input")
self.set_property("COSMOMC_TEMPLATES", get_data_loc(self.data_dirs, "cosmomc_templates"))
self.set_property("COSMOMC_TEMPLATES", get_data_loc("cosmomc_templates"))
self.set_property("BASEOUTPUT", self.name)
self.set_property("SYSFILE", self.sys_file_out)
self.set_property("TOPDIR", self.biascor_dep.output["fit_output_dir"])
Expand Down
3 changes: 1 addition & 2 deletions pippin/dataprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ class DataPrep(Task): # TODO: Define the location of the output so we can run t
"""

def __init__(self, name, output_dir, options, global_config, dependencies=None):
self.data_dirs = global_config["DATA_DIRS"]
super().__init__(name, output_dir, dependencies=dependencies)
self.options = options
self.global_config = get_config()
Expand All @@ -36,7 +35,7 @@ def __init__(self, name, output_dir, options, global_config, dependencies=None):
self.conda_env = self.global_config["DataSkimmer"]["conda_env"]
self.path_to_task = output_dir

self.raw_dir = get_data_loc(self.data_dirs, self.options.get("RAW_DIR"))
self.raw_dir = get_data_loc(self.options.get("RAW_DIR"))
if self.raw_dir is None:
Task.fail_config(f"Unable to find {self.options.get('RAW_DIR')}")

Expand Down
8 changes: 2 additions & 6 deletions pippin/snana_fit.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import inspect
import os
import shutil
import subprocess
Expand Down Expand Up @@ -31,15 +30,14 @@ class SNANALightCurveFit(ConfigBasedExecutable):
"""

def __init__(self, name, output_dir, sim_task, config, global_config):
self.data_dirs = global_config["DATA_DIRS"]

self.config = config
self.global_config = global_config

base = config.get("BASE")
if base is None:
Task.fail_config(f"You have not specified a BASE nml file for task {name}")
self.base_file = get_data_loc(self.data_dirs, base)
self.base_file = get_data_loc(base)
if self.base_file is None:
Task.fail_config(f"Base file {base} cannot be found for task {name}")

Expand Down Expand Up @@ -80,7 +78,7 @@ def __init__(self, name, output_dir, sim_task, config, global_config):
self.logger.debug("Loading fitopts")
self.fitopts = []
for f in fitopts:
potential_path = get_data_loc(self.data_dirs, f)
potential_path = get_data_loc(f)
if os.path.exists(potential_path):
self.logger.debug(f"Loading in fitopts from {potential_path}")
with open(potential_path) as f:
Expand Down Expand Up @@ -205,8 +203,6 @@ def write_nml(self, force_refresh):

# We want to do our hashing check here
string_to_hash = self.fitopts + self.base
# with open(os.path.abspath(inspect.stack()[0][1]), "r") as f:
# string_to_hash += f.read()
new_hash = self.get_hash_from_string("".join(string_to_hash))
old_hash = self.get_old_hash()
regenerate = force_refresh or (old_hash is None or old_hash != new_hash)
Expand Down
10 changes: 5 additions & 5 deletions pippin/snana_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class SNANASimulation(ConfigBasedExecutable):

def __init__(self, name, output_dir, genversion, config, global_config, combine="combine.input"):
self.data_dirs = global_config["DATA_DIRS"]
base_file = get_data_loc(self.data_dirs, combine)
base_file = get_data_loc(combine)
super().__init__(name, output_dir, base_file, ": ")

self.genversion = genversion
Expand Down Expand Up @@ -76,7 +76,7 @@ def __init__(self, name, output_dir, genversion, config, global_config, combine=
if len(self.base_ia + self.base_cc) == 0:
Task.fail_config("Your sim has no components specified! Please add something to simulate!")
for file in self.base_ia + self.base_cc:
if get_data_loc(self.data_dirs, file) is None:
if get_data_loc(file) is None:
Task.fail_config(f"Cannot find file {file} specified in simulation {self.name}")

# Try to determine how many jobs will be put in the queue
Expand Down Expand Up @@ -168,7 +168,7 @@ def write_input(self, force_refresh):
# Copy the base files across
input_paths = []
for f in self.base_ia + self.base_cc:
resolved = get_data_loc(self.data_dirs, f)
resolved = get_data_loc(f)
shutil.copy(resolved, temp_dir)
input_paths.append(os.path.join(temp_dir, os.path.basename(f)))
self.logger.debug(f"Copying input file {resolved} to {temp_dir}")
Expand All @@ -181,14 +181,14 @@ def write_input(self, force_refresh):
for ff in fs:
if ff not in input_copied:
input_copied.append(ff)
path = get_data_loc(self.data_dirs, ff)
path = get_data_loc(ff)
copied_path = os.path.join(temp_dir, os.path.basename(path))
with open(path, "r") as f:
for line in f.readlines():
line = line.strip()
if line.startswith("INPUT_FILE_INCLUDE"):
include_file = line.split(":")[-1].strip()
include_file_path = get_data_loc(self.data_dirs, include_file)
include_file_path = get_data_loc(include_file)
self.logger.debug(f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}")

include_file_basename = os.path.basename(include_file_path)
Expand Down
21 changes: 21 additions & 0 deletions tests/config_files/fail_classify1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
SIM:
ASIM:
IA_G10_DES3YR:
BASE: surveys/sdss/sims_ia/sn_ia_g10_sdss_3yr.input
II:
BASE: surveys/sdss/sims_cc/sn_ii_templates.input
Ibc:
BASE: surveys/sdss/sims_cc/sn_ibc_templates.input
GLOBAL:
NGEN_UNIT: 1
RANSEED_REPEAT: 10 12345
SOLID_ANGLE: 10

LCFIT:
D:
BASE: surveys/des/lcfit_nml/des_5yr.nml

CLASSIFICATION:
FITPROBTEST:
CLASSIFIER: FitProbClassifier # No mode, no work

22 changes: 22 additions & 0 deletions tests/config_files/fail_classify2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
SIM:
ASIM:
IA_G10_DES3YR:
BASE: surveys/sdss/sims_ia/sn_ia_g10_sdss_3yr.input
II:
BASE: surveys/sdss/sims_cc/sn_ii_templates.input
Ibc:
BASE: surveys/sdss/sims_cc/sn_ibc_templates.input
GLOBAL:
NGEN_UNIT: 1
RANSEED_REPEAT: 10 12345
SOLID_ANGLE: 10

LCFIT:
D:
BASE: surveys/des/lcfit_nml/des_5yr.nml

CLASSIFICATION:
FITPROBTEST:
CLASSIFIER: FitProbClassifier # No mode, no work
MODE: predict
MASK_SIM: NOTHING
21 changes: 21 additions & 0 deletions tests/config_files/fail_classify3.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
SIM:
ASIM:
IA_G10_DES3YR:
BASE: surveys/sdss/sims_ia/sn_ia_g10_sdss_3yr.input
II:
BASE: surveys/sdss/sims_cc/sn_ii_templates.input
Ibc:
BASE: surveys/sdss/sims_cc/sn_ibc_templates.input
GLOBAL:
NGEN_UNIT: 1
RANSEED_REPEAT: 10 12345
SOLID_ANGLE: 10

LCFIT:
D:
BASE: surveys/des/lcfit_nml/des_5yr.nml

CLASSIFICATION:
NNTRAIN:
CLASSIFIER: NearestNeighborPyClassifier
MODE: predict
23 changes: 23 additions & 0 deletions tests/config_files/fail_classify4.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
SIM:
ASIM:
IA_G10_DES3YR:
BASE: surveys/sdss/sims_ia/sn_ia_g10_sdss_3yr.input
II:
BASE: surveys/sdss/sims_cc/sn_ii_templates.input
Ibc:
BASE: surveys/sdss/sims_cc/sn_ibc_templates.input
GLOBAL:
NGEN_UNIT: 1
RANSEED_REPEAT: 10 12345
SOLID_ANGLE: 10

LCFIT:
D:
BASE: surveys/des/lcfit_nml/des_5yr.nml

CLASSIFICATION:
NNTRAIN:
CLASSIFIER: NearestNeighborPyClassifier # No mode, no work
MODE: predict
OPTS:
MODEL: nothing
Loading

0 comments on commit 16fc734

Please sign in to comment.