From bbe79343213083a1cc5904731cfd0081a718fad1 Mon Sep 17 00:00:00 2001 From: Harry Bevins <40355093+htjb@users.noreply.github.com> Date: Tue, 16 Jan 2024 16:23:32 +0000 Subject: [PATCH] Test preprocessing (#26) * fixing #24 * fixing afb subtraction for test data * bumping bug fix version number * division of test data by training standard deviation --- README.rst | 2 +- globalemu/preprocess.py | 18 ++++++++---------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index d352f96..9eb7a44 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ Introduction :globalemu: Robust Global 21-cm Signal Emulation :Author: Harry Thomas Jones Bevins -:Version: 1.8.0 +:Version: 1.8.1 :Homepage: https://github.com/htjb/globalemu :Documentation: https://globalemu.readthedocs.io/ diff --git a/globalemu/preprocess.py b/globalemu/preprocess.py index 31d3168..eb5b046 100644 --- a/globalemu/preprocess.py +++ b/globalemu/preprocess.py @@ -73,13 +73,9 @@ class process(): data set or not. Set to True by default as this is advised for training both neutral fraction and global signal emulators. - logs: **list / default: [0, 1, 2]** + logs: **list / default: []** | The indices corresponding to the astrophysical parameters in - "train_data.txt" that need to be logged. The default assumes - that the first three columns in "train_data.txt" are - :math:`{f_*}` (star formation efficiency), - :math:`{V_c}` (minimum virial circular velocity) and - :math:`{f_x}` (X-ray efficieny). + "train_data.txt" that need to be logged. """ def __init__(self, num, z, **kwargs): @@ -137,7 +133,7 @@ def __init__(self, num, z, **kwargs): if type(bool_kwargs[i]) is not bool: raise TypeError(bool_strings[i] + " must be a bool.") - self.logs = kwargs.pop('logs', [0, 1, 2]) + self.logs = kwargs.pop('logs', []) if type(self.logs) is not list: raise TypeError("'logs' must be a list.") @@ -170,7 +166,6 @@ def load_data(file): train_data = full_train_data.copy() if self.preprocess_settings['AFB'] is True: train_labels = full_train_labels.copy() - res.deltaT - test_labels -= res.deltaT else: train_labels = full_train_labels.copy() else: @@ -189,10 +184,14 @@ def load_data(file): train_data.append(full_train_data[i, :]) if self.preprocess_settings['AFB'] is True: train_labels.append(full_train_labels[i] - res.deltaT) + else: train_labels.append(full_train_labels[i]) train_data, train_labels = np.array(train_data), \ np.array(train_labels) + + if self.preprocess_settings['AFB'] is True: + test_labels = test_labels.copy() - res.deltaT log_train_data = [] for i in range(train_data.shape[1]): @@ -268,9 +267,8 @@ def load_data(file): norm_train_labels = norm_train_labels.flatten() np.save(self.base_dir + 'labels_stds.npy', labels_stds) - test_labels_stds = test_labels.std() norm_test_labels = [ - test_labels[i, :]/test_labels_stds + test_labels[i, :]/labels_stds for i in range(test_labels.shape[0])] norm_test_labels = np.array(norm_test_labels)