From 2c5e258c37eb4cfb7477f0f6ae57654fb8c4ae3b Mon Sep 17 00:00:00 2001 From: thuiop Date: Wed, 19 May 2021 14:09:55 +0200 Subject: [PATCH] Added possibility to save results (#148) * Added possibility to save results * Added functions to load the results * Fix bug and add error messages * Added tests for saving and loading * Update btk/draw_blends.py Co-authored-by: Ismael Mendoza <11745764+ismael-mendoza@users.noreply.github.com> * Update btk/utils.py Co-authored-by: Alexandre Boucaud <3065310+aboucaud@users.noreply.github.com> * Update btk/utils.py Co-authored-by: Alexandre Boucaud <3065310+aboucaud@users.noreply.github.com> * Convert to pathlib and folder structure * Switch test to tempfile * Put list into a constant * Update btk/draw_blends.py Co-authored-by: Ismael Mendoza <11745764+ismael-mendoza@users.noreply.github.com> * Update btk/measure.py Co-authored-by: Ismael Mendoza <11745764+ismael-mendoza@users.noreply.github.com> * Update btk/metrics.py Co-authored-by: Ismael Mendoza <11745764+ismael-mendoza@users.noreply.github.com> * Update btk/metrics.py Co-authored-by: Ismael Mendoza <11745764+ismael-mendoza@users.noreply.github.com> * Fix broken parts after solving merge conflicts * remove extraneous print statement Co-authored-by: Ismael Mendoza <11745764+ismael-mendoza@users.noreply.github.com> Co-authored-by: Alexandre Boucaud <3065310+aboucaud@users.noreply.github.com> Co-authored-by: Ismael Mendoza --- btk/__init__.py | 1 + btk/draw_blends.py | 21 ++++++- btk/measure.py | 19 +++++++ btk/metrics.py | 20 ++++++- btk/utils.py | 133 +++++++++++++++++++++++++++++++++++++++++++++ tests/test_save.py | 51 +++++++++++++++++ 6 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 btk/utils.py create mode 100644 tests/test_save.py diff --git a/btk/__init__.py b/btk/__init__.py index 877444d84..059d2cad8 100644 --- a/btk/__init__.py +++ b/btk/__init__.py @@ -17,3 +17,4 @@ from . import plot_utils from . import sampling_functions from . import survey +from . import utils diff --git a/btk/draw_blends.py b/btk/draw_blends.py index 8fa63d9dd..0668d640e 100644 --- a/btk/draw_blends.py +++ b/btk/draw_blends.py @@ -1,5 +1,6 @@ """Module for generating batches of drawn blended images.""" import copy +import os from abc import ABC from abc import abstractmethod from itertools import chain @@ -148,6 +149,7 @@ def __init__( shifts=None, indexes=None, channels_last=False, + save_path=None, ): """Initializes the DrawBlendsGenerator class. @@ -170,6 +172,8 @@ def __init__( channels_last (bool): Whether to return images as numpy arrays with the channel (band) dimension as the last dimension or before the pixels dimensions (default). + save_path (str): Path to a directory where results will be saved. If left + as None, results will not be saved. """ self.blend_generator = BlendGenerator( catalog, sampling_function, batch_size, shifts, indexes, verbose @@ -191,8 +195,8 @@ def __init__( self.add_noise = add_noise self.verbose = verbose - self.channels_last = channels_last + self.save_path = save_path def __iter__(self): """Returns iterable which is the object itself.""" @@ -274,6 +278,20 @@ def __next__(self): blend_images[s.name][i] = batch_results[i][0] isolated_images[s.name][i] = batch_results[i][1] batch_blend_cat[s.name].append(batch_results[i][2]) + + if self.save_path is not None: + if not os.path.exists(os.path.join(self.save_path, s.name)): + os.mkdir(os.path.join(self.save_path, s.name)) + + np.save(os.path.join(self.save_path, s.name, "blended"), blend_images[s.name]) + np.save(os.path.join(self.save_path, s.name, "isolated"), isolated_images[s.name]) + for i in range(len(batch_results)): + batch_blend_cat[s.name][i].write( + os.path.join(self.save_path, s.name, f"blend_info_{i}"), + format="ascii", + overwrite=True, + ) + if len(self.surveys) > 1: output = { "blend_images": blend_images, @@ -291,6 +309,7 @@ def __next__(self): "psf": psfs[survey_name], "wcs": wcss[survey_name], } + return output def render_mini_batch(self, blend_list, psf, wcs, survey, extra_data=None): diff --git a/btk/measure.py b/btk/measure.py index 3dbd8f168..832c89aae 100644 --- a/btk/measure.py +++ b/btk/measure.py @@ -48,6 +48,7 @@ def measure_function(batch, idx, **kwargs): Omitted keys in the returned dictionary are automatically assigned a `None` value (except for `catalog` which is a mandatory entry). """ +import os from itertools import repeat import astropy.table @@ -154,6 +155,7 @@ def __init__( cpus=1, verbose=False, measure_kwargs: list = None, + save_path=None, ): """Initialize measurement generator. @@ -168,6 +170,8 @@ def __init__( to be passed in to each of the `measure_functions`. Each dictionnary is passed one time to each function, meaning that each function which be ran as many times as there are different dictionnaries. + save_path (str): Path to a directory where results will be saved. If left + as None, results will not be saved. """ # setup and verify measure_functions. if callable(measure_functions): @@ -188,6 +192,7 @@ def __init__( self.batch_size = self.draw_blend_generator.batch_size self.channels_last = self.draw_blend_generator.channels_last self.verbose = verbose + self.save_path = save_path # initialize measure_kwargs dictionary. self.measure_kwargs = [{}] if measure_kwargs is None else measure_kwargs @@ -281,5 +286,19 @@ def __next__(self): measure_output[j][i][key] for j in range(len(measure_output)) ] measure_results[f.__name__ + str(m)] = measure_dict + if self.save_path is not None: + dir_name = f.__name__ + str(m) + if not os.path.exists(os.path.join(self.save_path, dir_name)): + os.mkdir(os.path.join(self.save_path, dir_name)) + + for key in ["segmentation", "deblended_images"]: + if key in measure_dict.keys(): + np.save(os.path.join(self.save_path, dir_name, key), measure_dict[key]) + for j, cat in enumerate(measure_dict["catalog"]): + cat.write( + os.path.join(self.save_path, dir_name, f"detection_catalog_{j}"), + format="ascii", + overwrite=True, + ) return blend_output, measure_results diff --git a/btk/metrics.py b/btk/metrics.py index 1e9f6ddea..03ac527ff 100644 --- a/btk/metrics.py +++ b/btk/metrics.py @@ -53,6 +53,8 @@ is the standard scalar product on vectors. """ +import os + import astropy.table import galsim import matplotlib.pyplot as plt @@ -163,7 +165,6 @@ def get_detection_match(true_table, detected_table, f_distance=distance_center): raise KeyError("Detection table has no column y_peak") match_table = astropy.table.Table() - print(f_distance) # dist[i][j] = distance between true object i and detected object j. dist = np.zeros((len(true_table), len(detected_table))) for i, true_gal in enumerate(true_table): @@ -531,6 +532,7 @@ def compute_metrics( # noqa: C901 meas_band_num=0, target_meas={}, channels_last=False, + save_path=None, f_distance=distance_center, ): """Computes all requested metrics given information in a single batch from measure_generator. @@ -565,6 +567,8 @@ def compute_metrics( # noqa: C901 be returned for both isolated and deblended images to compare. channels_last (bool) : Indicates whether the images should be channels first (NCHW) or channels last (NHWC). + save_path (str): Path to directory where results will be saved. If left + as None, results will not be saved. f_distance (func): Function used to compute the distance between true and detected galaxies. Takes as arguments the entries corresponding to the two galaxies. By default the distance is the euclidean distance from center to center. @@ -651,6 +655,13 @@ def compute_metrics( # noqa: C901 for k in reconstruction_keys: row[k] = results["reconstruction"][k][i][j] results["galaxy_summary"].add_row(row[0]) + if save_path is not None: + if not os.path.exists(save_path): + os.mkdir(save_path) + + for key in use_metrics: + np.save(os.path.join(save_path, f"{key}_metric"), results[key]) + results["galaxy_summary"].write(os.path.join(save_path, "galaxy_summary"), format="ascii") return results @@ -665,6 +676,7 @@ def __init__( meas_band_num=0, target_meas={}, noise_threshold_factor=3, + save_path=None, f_distance=distance_center, ): """Initialize metrics generator. @@ -684,6 +696,8 @@ def __init__( applied when getting segmentations from true images. A value of 3 would correspond to a threshold of 3 sigmas (with sigma the standard deviation of the noise) + save_path (str): Path to directory where results will be saved. If left + as None, results will not be saved. f_distance (func): Function used to compute the distance between true and detected galaxies. Takes as arguments the entries corresponding to the two galaxies. By default the distance is the euclidean distance from center to center. @@ -693,6 +707,7 @@ def __init__( self.meas_band_num = meas_band_num self.target_meas = target_meas self.noise_threshold_factor = noise_threshold_factor + self.save_path = save_path self.f_distance = f_distance def __next__(self): @@ -725,6 +740,9 @@ def __next__(self): self.meas_band_num, target_meas, channels_last=self.measure_generator.channels_last, + save_path=os.path.join(self.save_path, meas_func) + if self.save_path is not None + else None, f_distance=self.f_distance, ) metrics_results[meas_func] = metrics_results_f diff --git a/btk/utils.py b/btk/utils.py new file mode 100644 index 000000000..f20f2cf99 --- /dev/null +++ b/btk/utils.py @@ -0,0 +1,133 @@ +"""Contains utility functions, including functions for loading saved results.""" +import os + +import numpy as np +from astropy.table import Table + +BLEND_RESULT_KEYS = ("blend_images", "isolated_images", "blend_list") + + +def load_blend_results(path, survey): + """Load results exported from a DrawBlendsGenerator. + + Args; + path (str): Path to the files. Should be the same as the save_path + which was provided to the DrawBlendsGenerator to save + the files. + survey (str): Name of the survey for which you want to load the files. + + Returns: + Dictionnary containing the blend images, the isolated images and the + informations about the blends. + """ + blend_images = np.load(os.path.join(path, survey, "blended.npy"), allow_pickle=True) + isolated_images = np.load(os.path.join(path, survey, "isolated.npy"), allow_pickle=True) + blend_list = [ + Table.read(os.path.join(path, survey, f"blend_info_{i}"), format="ascii") + for i in range(blend_images.shape[0]) + ] + + return { + "blend_images": blend_images, + "isolated_images": isolated_images, + "blend_list": blend_list, + } + + +def load_measure_results(path, measure_name, n_batch): + """Load results exported from a MeasureGenerator. + + Args: + path (str): Path to the files. Should be the same as the save_path + which was provided to the MeasureGenerator to save + the files. + measure_name (str): Name of the measure function for which you + want to load the files + n_batch (int): Number of blends in the batch you want to load + + Returns: + Dictionnary containing the detection catalogs, the segmentations + and the deblended images. + """ + measure_results = {} + for key in ["segmentation", "deblended_images"]: + try: + measure_results[key] = np.load( + os.path.join(path, measure_name, f"{key}.npy"), allow_pickle=True + ) + except FileNotFoundError: + print(f"No {key} found.") + + catalog = [ + Table.read( + os.path.join(path, measure_name, f"detection_catalog_{j}"), + format="ascii", + ) + for j in range(n_batch) + ] + measure_results["catalog"] = catalog + return measure_results + + +def load_metrics_results(path, measure_name): + """Load results exported from a MetricsGenerator. + + Args: + path (str): Path to the files. Should be the same as the save_path + which was provided to the MetricsGenerator to save + the files. + measure_name (str): Name of the measure function for which you + want to load the files + + Returns: + Dictionnary containing the detection catalogs, the segmentations + and the deblended images. + """ + metrics_results = {} + for key in ["detection", "segmentation", "reconstruction"]: + try: + metrics_results[key] = np.load( + os.path.join(path, measure_name, f"{key}_metric.npy"), allow_pickle=True + ) + except FileNotFoundError: + print(f"No {key} metrics found.") + + metrics_results["galaxy_summary"] = Table.read( + os.path.join(path, measure_name, "galaxy_summary"), + format="ascii", + ) + return metrics_results + + +def load_all_results(path, surveys, measure_names, n_batch, n_meas_kwargs=1): + """Load results exported from a MetricsGenerator. + + Args: + path (str): Path to the files. Should be the same as the save_path + which was provided to the MetricsGenerator to save + the files. + surveys (list): Names of the surveys for which you want to load + the files + measure_names (list): Names of the measure functions for which you + want to load the files + n_batch (int): Number of blends in the batch you want to load + + Returns: + The three dictionnaries corresponding to the results. + """ + blend_results = {} + for key in BLEND_RESULT_KEYS: + blend_results[key] = {} + measure_results = {} + metrics_results = {} + for s in surveys: + blend_results_temp = load_blend_results(path, s) + for key in BLEND_RESULT_KEYS: + blend_results[key][s] = blend_results_temp[key] + + for meas in measure_names: + for n in range(n_meas_kwargs): + measure_results[meas + str(n)] = load_measure_results(path, meas + str(n), n_batch) + metrics_results[meas + str(n)] = load_metrics_results(path, meas + str(n)) + + return blend_results, measure_results, metrics_results diff --git a/tests/test_save.py b/tests/test_save.py new file mode 100644 index 000000000..3992066c4 --- /dev/null +++ b/tests/test_save.py @@ -0,0 +1,51 @@ +import tempfile + +import numpy as np + +import btk +from btk.survey import Rubin + + +def test_save(): + output_dir = tempfile.mkdtemp() + catalog_name = "data/sample_input_catalog.fits" + stamp_size = 24.0 + batch_size = 8 + catalog = btk.catalog.CatsimCatalog.from_file(catalog_name) + sampling_function = btk.sampling_functions.DefaultSampling(stamp_size=stamp_size) + draw_blend_generator = btk.draw_blends.CatsimGenerator( + catalog, + sampling_function, + [Rubin], + batch_size=batch_size, + stamp_size=stamp_size, + save_path=output_dir, + ) + meas_generator = btk.measure.MeasureGenerator( + btk.measure.sep_measure, draw_blend_generator, save_path=output_dir + ) + metrics_generator = btk.metrics.MetricsGenerator( + meas_generator, + use_metrics=("detection", "segmentation", "reconstruction"), + target_meas={"ellipticity": btk.metrics.meas_ksb_ellipticity}, + save_path=output_dir, + ) + blend_results, measure_results, metrics_results = next(metrics_generator) + blend_results2, measure_results2, metrics_results2 = btk.utils.load_all_results( + output_dir, ["LSST"], ["sep_measure"], batch_size + ) + np.testing.assert_array_equal( + blend_results["blend_images"], blend_results2["blend_images"]["LSST"] + ) + np.testing.assert_array_equal( + measure_results["sep_measure0"]["segmentation"][0], + measure_results2["sep_measure0"]["segmentation"][0], + ) + np.testing.assert_array_equal( + measure_results["sep_measure0"]["deblended_images"][0], + measure_results2["sep_measure0"]["deblended_images"][0], + ) + np.testing.assert_array_equal( + metrics_results["sep_measure0"]["galaxy_summary"]["distance_closest_galaxy"], + metrics_results2["sep_measure0"]["galaxy_summary"]["distance_closest_galaxy"], + )