From d36170d458bcf598909ff918a103a5d70ae445c9 Mon Sep 17 00:00:00 2001 From: helenqu <8826297+helenqu@users.noreply.github.com> Date: Wed, 24 Jan 2024 14:48:41 -0800 Subject: [PATCH] SCONE: make slurm memory a user-facing option --- pippin/classifiers/classifier.py | 4 +++- pippin/classifiers/scone.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pippin/classifiers/classifier.py b/pippin/classifiers/classifier.py index dc96cafa..9dc88a2e 100644 --- a/pippin/classifiers/classifier.py +++ b/pippin/classifiers/classifier.py @@ -159,6 +159,8 @@ def get_num_ranseed(sim_tasks, lcfit_tasks): name = config["CLASSIFIER"] cls = ClassifierFactory.get(name) options = config.get("OPTS", {}) + if options == None: + Task.fail_config(f"Classifier {clas_name} has no OPTS specified -- either remove the OPTS keyword or specify some options under it") if "MODE" not in config: Task.fail_config(f"Classifier task {clas_name} needs to specify MODE as train or predict") mode = config["MODE"].lower() @@ -169,7 +171,7 @@ def get_num_ranseed(sim_tasks, lcfit_tasks): mode = Classifier.PREDICT # Prevent mode = predict and SIM_FRACTION < 1 - if mode == Classifier.PREDICT and "SIM_FRACTION" in options and options["SIM_FRACTION"] < 1: + if mode == Classifier.PREDICT and options.get("SIM_FRACTION", 1) > 1: Task.fail_config("SIM_FRACTION must be 1 (all sims included) for predict mode") # Validate that train is not used on certain classifiers diff --git a/pippin/classifiers/scone.py b/pippin/classifiers/scone.py index 770fdbf6..57267086 100644 --- a/pippin/classifiers/scone.py +++ b/pippin/classifiers/scone.py @@ -100,7 +100,7 @@ def make_heatmaps_sbatch_header(self): header_dict = { "REPLACE_LOGFILE": self.heatmaps_log_path, "REPLACE_WALLTIME": "12:00:00", #TODO: change to scale with # of heatmaps expected - "REPLACE_MEM": "64GB", + "REPLACE_MEM": self.options.get("HEATMAPS_MEM", "32GB"), } heatmaps_sbatch_header = self.make_sbatch_header("HEATMAPS_BATCH_FILE", header_dict) @@ -111,7 +111,7 @@ def make_model_sbatch_script(self): header_dict = { "REPLACE_NAME": self.job_base_name, "REPLACE_LOGFILE": str(Path(self.output_dir) / "output.log"), - "REPLACE_MEM": "32GB", + "REPLACE_MEM": self.options.get("MODEL_MEM", "64GB"), "REPLACE_WALLTIME": "4:00:00" if self.gpu else "12:00:00", # 4h is max for gpu } model_sbatch_header = self.make_sbatch_header("MODEL_BATCH_FILE", header_dict, use_gpu=self.gpu) @@ -221,7 +221,7 @@ def _make_config(self, metadata_paths, lcdata_paths, mode, heatmaps_created): config["batch_size"] = self.options.get("BATCH_SIZE", 32) # TODO: replace with percentage of total size? config["Ia_fraction"] = self.options.get("IA_FRACTION", 0.5) config["output_path"] = self.output_dir - config["trained_model"] = self.options.get("MODEL", False) + config["trained_model"] = self.options.get("MODEL", None) config["kcor_file"] = self.options.get("KCOR_FILE", None) config["mode"] = mode config["job_base_name"] = self.job_base_name