From d1e175ed610b24c8d930d13cb4302f44921cdab2 Mon Sep 17 00:00:00 2001 From: Aaron David Schneider Date: Thu, 12 Oct 2023 12:56:22 +0200 Subject: [PATCH] fix pylint and tests --- .flake8 | 13 +++ .github/workflows/pylint.yml | 2 +- opac_mixer/emulator.py | 202 ++++++++++++++++++++++++----------- opac_mixer/mix.py | 88 ++++++++++----- opac_mixer/read.py | 87 ++++++++++----- opac_mixer/utils/__init__.py | 2 +- opac_mixer/utils/models.py | 60 +++++++---- opac_mixer/utils/scalings.py | 16 +-- pyproject.toml | 29 +++++ tests/test_config.py | 12 ++- tests/test_mix.py | 46 +++++--- 11 files changed, 390 insertions(+), 167 deletions(-) create mode 100644 .flake8 create mode 100644 pyproject.toml diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..10e2c0c --- /dev/null +++ b/.flake8 @@ -0,0 +1,13 @@ +[flake8] +ignore = E203, E266, E501, W503, F403, F401 +max-line-length = 79 +max-complexity = 18 +select = B,C,E,F,W,T4,B9 +exclude = + .git, + __pycache__, + doc/source/conf.py, + old, + build, + dist, + tests/*.py \ No newline at end of file diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 8383f45..3d4acaf 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -20,4 +20,4 @@ jobs: pip install pylint - name: Analysing the code with pylint run: | - pylint $(git ls-files '*.py') --fail-under=9.5 --ignore-paths=doc,tests,oapc_mixer/patches --disable=R0913,R0914,C0415,E0401,E0402,C0103,C0301,R0902,C0209 + pylint opac_mixer/utils/*.py opac_mixer/*.py --fail-under=9.5 --disable=R0913,R0914,C0415,E0401,E0402,C0103,C0301,R0902,C0209 diff --git a/opac_mixer/emulator.py b/opac_mixer/emulator.py index 1d4e115..ae6f639 100644 --- a/opac_mixer/emulator.py +++ b/opac_mixer/emulator.py @@ -6,7 +6,11 @@ import matplotlib.pyplot as plt import numpy as np from MITgcmutils import wrmds -from sklearn.metrics import mean_squared_error, mean_squared_log_error, r2_score +from sklearn.metrics import ( + mean_squared_error, + mean_squared_log_error, + r2_score, +) from sklearn.model_selection import train_test_split from tensorflow import keras @@ -67,7 +71,9 @@ def load(self): def write_out(self, mix, input_data, split_seed, test_size): """write data""" with h5py.File(self.filename, "w") as f: - inp_ds = f.create_dataset("input", input_data.shape, dtype=input_data.dtype) + inp_ds = f.create_dataset( + "input", input_data.shape, dtype=input_data.dtype + ) mix_ds = f.create_dataset("mix", mix.shape, dtype=input_data.dtype) mix_ds.attrs["split_seed"] = split_seed mix_ds.attrs["test_size"] = test_size @@ -81,11 +87,11 @@ class Emulator: """ def __init__( - self, - opac, - prange_opacset=DEFAULT_PRANGE, - trange_opacset=DEFAULT_TRANGE, - filename_data=None, + self, + opac, + prange_opacset=DEFAULT_PRANGE, + trange_opacset=DEFAULT_TRANGE, + filename_data=None, ): """ Construct the emulator class. @@ -108,7 +114,8 @@ def __init__( self.opac = [opac] else: raise ValueError( - "opac needs to be either a list of ReadOpac or a ReadOpac instance" + "opac needs to be either a list of ReadOpac or a ReadOpac" + " instance" ) for opac_i in self.opac: @@ -135,10 +142,10 @@ def __init__( if len(ls) > 1: assert ( - np.diff(ls) == 0.0 + np.diff(ls) == 0.0 ), "we need the same number of species for all ReadOpac instances" assert ( - np.diff(lg) == 0.0 + np.diff(lg) == 0.0 ), "we need the same number of g points for all ReadOpac instances" self._lg = lg[0] @@ -170,7 +177,7 @@ def __init__( self.input_data = np.empty((0, *self._input_dim)) def setup_scaling( - self, input_scaling=None, output_scaling=None, inv_output_scaling=None + self, input_scaling=None, output_scaling=None, inv_output_scaling=None ): """ (optional) Change the callback functions for the scaling of in and output. @@ -187,7 +194,8 @@ def setup_scaling( if hasattr(self, "X_train"): np.testing.assert_allclose( self.inv_output_scaling( - self.X_train, self.output_scaling(self.X_train, self.y_train) + self.X_train, + self.output_scaling(self.X_train, self.y_train), ), self.y_train, ) @@ -199,7 +207,9 @@ def setup_scaling( self.y_test, ) - def setup_sampling_grid(self, approx_batchsize=8e5, extra_abus=None, bounds=None): + def setup_sampling_grid( + self, approx_batchsize=8e5, extra_abus=None, bounds=None + ): """ Setup the sampling grid. Sampling along MMR and pressure is in logspace. Sampling along temperature is in linspace. @@ -232,20 +242,25 @@ def setup_sampling_grid(self, approx_batchsize=8e5, extra_abus=None, bounds=None for opac in self.opac: if extra_abus is not None: - assert ( - extra_abus.shape[1] == self._ls - ), "wrong shape in extra_abus second dimension (number of species)" - assert ( - extra_abus.shape[2] == opac.lp[0] - ), "wrong shape in extra_abus second dimension (number of pressure points)" - assert ( - extra_abus.shape[3] == opac.lt[0] - ), "wrong shape in extra_abus second dimension (number of temperature points)" + assert extra_abus.shape[1] == self._ls, ( + "wrong shape in extra_abus second dimension (number of" + " species)" + ) + assert extra_abus.shape[2] == opac.lp[0], ( + "wrong shape in extra_abus second dimension (number of" + " pressure points)" + ) + assert extra_abus.shape[3] == opac.lt[0], ( + "wrong shape in extra_abus second dimension (number of" + " temperature points)" + ) extra_batchsize = int(extra_abus.shape[0]) else: extra_batchsize = 0 - batchsize = int(approx_batchsize) // opac.lp[0] // opac.lt[0] // opac.lf[0] + batchsize = ( + int(approx_batchsize) // opac.lp[0] // opac.lt[0] // opac.lf[0] + ) batchsize_resh = batchsize * opac.lp[0] * opac.lt[0] * opac.lf[0] self._batchsize_resh.append(batchsize_resh) self._batchsize.append(batchsize) @@ -265,7 +280,12 @@ def setup_sampling_grid(self, approx_batchsize=8e5, extra_abus=None, bounds=None sample = np.random.uniform( low=0.0, high=1.0, - size=(batchsize - extra_batchsize, self._ls, opac.lp[0], opac.lt[0]), + size=( + batchsize - extra_batchsize, + self._ls, + opac.lp[0], + opac.lt[0], + ), ) # Scale the sampling to the actual bounds @@ -274,8 +294,8 @@ def setup_sampling_grid(self, approx_batchsize=8e5, extra_abus=None, bounds=None abus = np.exp( sample[:, :, :, :] * ( - np.log(u_bounds)[np.newaxis, :, np.newaxis, np.newaxis] - - np.log(l_bounds)[np.newaxis, :, np.newaxis, np.newaxis] + np.log(u_bounds)[np.newaxis, :, np.newaxis, np.newaxis] + - np.log(l_bounds)[np.newaxis, :, np.newaxis, np.newaxis] ) + np.log(l_bounds)[np.newaxis, :, np.newaxis, np.newaxis] ) @@ -284,7 +304,8 @@ def setup_sampling_grid(self, approx_batchsize=8e5, extra_abus=None, bounds=None abus = np.concatenate((abus, extra_abus), axis=0) weighted_kappas = ( - abus[:, :, :, :, np.newaxis, np.newaxis] * opac.kcoeff[np.newaxis, ...] + abus[:, :, :, :, np.newaxis, np.newaxis] + * opac.kcoeff[np.newaxis, ...] ) weighted_kappas = weighted_kappas.transpose((0, 2, 3, 4, 1, 5)) self.abus.append(abus) @@ -306,6 +327,14 @@ def setup_sampling_grid(self, approx_batchsize=8e5, extra_abus=None, bounds=None return self.input_data def _check_input_data(self, input_data): + """ + Checks the shape of the input data and raises an error if wrong + + Parameters + ---------- + input_data (array(batchsize, opac.lg, opac.ls)): + The input data + """ shape = input_data.shape if len(shape) != 3 or shape[1:] != self._input_dim: raise ValueError("input data does not match") @@ -330,12 +359,12 @@ def setup_mix(self, test_size=0.2, split_seed=None, do_parallel=True): ) if split_seed is None: - split_seed = np.random.randint(2 ** 32 - 1) + split_seed = np.random.randint(2**32 - 1) # make sure the filename comes without the npy suffix mixes = np.empty((0, self._lg)) for mixer, abus, batchsize_resh in zip( - self.mixer, self.abus, self._batchsize_resh + self.mixer, self.abus, self._batchsize_resh ): if do_parallel: mix = mixer.add_batch_parallel(abus) @@ -358,7 +387,11 @@ def setup_mix(self, test_size=0.2, split_seed=None, do_parallel=True): return self.X_train, self.X_test, self.y_train, self.y_test def load_data( - self, filename=None, test_size=None, split_seed=None, use_split_seed=True + self, + filename=None, + test_size=None, + split_seed=None, + use_split_seed=True, ): """ Load the training and test data from a h5 file. @@ -377,8 +410,8 @@ def load_data( if not hasattr(self, "_io") and filename is None: raise ValueError( - "we have no clue where we could get the data from. Set a filename either in this method or the " - "constructor" + "we have no clue where we could get the data from. Set a" + " filename either in this method or the constructor" ) if filename is not None: @@ -433,14 +466,14 @@ def _do_split(input_data, mix, split_seed, test_size, use_split_seed=True): ) def setup_model( - self, - model=None, - filename=None, - load=False, - learning_rate=1e-3, - hidden_units=None, - verbose=True, - **model_kwargs, + self, + model=None, + filename=None, + load=False, + learning_rate=1e-3, + hidden_units=None, + verbose=True, + **model_kwargs, ): """ Setup the emulator model and train it. @@ -477,19 +510,26 @@ def setup_model( else: if not self._has_mix: raise AttributeError( - "we do not have a mix to work with yet. Run setup_sampling_grid and setup_mix first." + "we do not have a mix to work with yet. Run" + " setup_sampling_grid and setup_mix first." ) if model is None: - self.model = get_deepset(ng=self._lg, hidden_units=hidden_units) + self.model = get_deepset( + ng=self._lg, hidden_units=hidden_units + ) elif model is not None: - print("WARNING: make sure your model is permutation invariant!") + print( + "WARNING: make sure your model is permutation invariant!" + ) # Use provided model (needs to be sklearn compatible) self.model = model if isinstance(self.model, keras.Model): extra_model_kwargs = { - "optimizer": keras.optimizers.Adam(learning_rate=learning_rate), + "optimizer": keras.optimizers.Adam( + learning_rate=learning_rate + ), "loss": "mean_squared_error", } extra_model_kwargs.update(model_kwargs) @@ -526,7 +566,8 @@ def fit(self, *args, **kwargs): if not self._has_model: raise AttributeError( - "we do not have a model yet. Run setup_sampling_grid, setup_mix and setup_model first." + "we do not have a model yet. Run setup_sampling_grid," + " setup_mix and setup_model first." ) # fit the model on the training dataset @@ -534,16 +575,20 @@ def fit(self, *args, **kwargs): y_train = self.output_scaling(self.X_train, self.y_train) if isinstance(self.model, keras.Model): - callbacks = [keras.callbacks.EarlyStopping(monitor="loss", patience=3)] + callbacks = [ + keras.callbacks.EarlyStopping(monitor="loss", patience=3) + ] if self.verbose: callbacks.append(CustomCallback(self)) self.model.fit(X_train, y_train, callbacks=callbacks, **fit_args) else: - self.model.fit(X_train.reshape(len(X_train), -1), y_train, *args, **kwargs) + self.model.fit( + X_train.reshape(len(X_train), -1), y_train, *args, **kwargs + ) if hasattr(self, "_model_filename") and callable( - getattr(self.model, "save", None) + getattr(self.model, "save", None) ): print(f"Saving model to {self._model_filename}") self.model.save(self._model_filename) @@ -620,10 +665,16 @@ def score(self, validation_set=None): y_add_test_masked = y_add_test[test_mask] y_add_train_masked = y_add_train[train_mask] - e_add_test = np.sqrt(mean_squared_error(y_test_masked, y_add_test_masked)) - e_add_train = np.sqrt(mean_squared_error(y_train_masked, y_add_train_masked)) + e_add_test = np.sqrt( + mean_squared_error(y_test_masked, y_add_test_masked) + ) + e_add_train = np.sqrt( + mean_squared_error(y_train_masked, y_add_train_masked) + ) e_p_test = np.sqrt(mean_squared_error(y_test_masked, y_p_test_masked)) - e_p_train = np.sqrt(mean_squared_error(y_train_masked, y_p_train_masked)) + e_p_train = np.sqrt( + mean_squared_error(y_train_masked, y_p_train_masked) + ) if log_err_out: e_log_add_test = np.sqrt( @@ -645,18 +696,28 @@ def score(self, validation_set=None): r2_p_train = r2_score(y_train_masked, y_p_train_masked) if log_err_out: - r2_log_add_test = r2_score(np.log(y_test_masked), np.log(y_add_test_masked)) + r2_log_add_test = r2_score( + np.log(y_test_masked), np.log(y_add_test_masked) + ) r2_log_add_train = r2_score( np.log(y_train_masked), np.log(y_add_train_masked) ) - r2_log_p_test = r2_score(np.log(y_test_masked), np.log(y_p_test_masked)) - r2_log_p_train = r2_score(np.log(y_train_masked), np.log(y_p_train_masked)) + r2_log_p_test = r2_score( + np.log(y_test_masked), np.log(y_p_test_masked) + ) + r2_log_p_train = r2_score( + np.log(y_train_masked), np.log(y_p_train_masked) + ) print( - "test (add, model): {:.2e}, {:.2e}".format(e_add_test, e_p_test) + "test (add, model): {:.2e}, {:.2e}".format( + e_add_test, e_p_test + ) ) print( - "train (add, model): {:.2e}, {:.2e}".format(e_add_train, e_p_train) + "train (add, model): {:.2e}, {:.2e}".format( + e_add_train, e_p_train + ) ) if log_err_out: print( @@ -670,7 +731,9 @@ def score(self, validation_set=None): ) ) print( - "r2 test (add, model): {:.2e}, {:.2e}".format(r2_add_test, r2_p_test) + "r2 test (add, model): {:.2e}, {:.2e}".format( + r2_add_test, r2_p_test + ) ) print( "r2 train (add, model): {:.2e}, {:.2e}".format( @@ -693,7 +756,8 @@ def _check_trained(self): """Just a random check if the model has been trained or not.""" if not self._is_trained: raise AttributeError( - "we do not have a trained model yet. Run setup_sampling_grid, setup_mix and setup_model and fit first." + "we do not have a trained model yet. Run setup_sampling_grid," + " setup_mix and setup_model and fit first." ) def export(self, path, file_format="exorad"): @@ -710,7 +774,7 @@ def export(self, path, file_format="exorad"): self._check_trained() if isinstance(self.model, keras.Model): for i, weights in enumerate(self.model.weights): - if file_format in ('np', 'numpy'): + if file_format in ("np", "numpy"): np.save(f"{path}/ml_coeff_{i}.npy", weights.numpy()) elif file_format == "exorad": wrmds( @@ -721,7 +785,9 @@ def export(self, path, file_format="exorad"): else: raise NotImplementedError("format is not supported yet.") else: - raise NotImplementedError("not implemented for the type of model used") + raise NotImplementedError( + "not implemented for the type of model used" + ) def plot_predictions(self, validation_set=None): """ @@ -748,10 +814,18 @@ def plot_predictions(self, validation_set=None): fig, axes = plt.subplots(1, 2, sharex=True, sharey=True) axes[0].plot( - y_add_test[:, index], y_test[:, index], "bo", ms=0.01, linestyle="None" + y_add_test[:, index], + y_test[:, index], + "bo", + ms=0.01, + linestyle="None", ) axes[1].plot( - y_p_test[:, index], y_test[:, index], "ro", ms=0.01, linestyle="None" + y_p_test[:, index], + y_test[:, index], + "ro", + ms=0.01, + linestyle="None", ) fig.suptitle(f"g index = {index}") axes[0].set_title("simple sum") @@ -790,9 +864,7 @@ def plot_weights(self, do_log=True): vmin = -vmax linthr = abs(weights.numpy()).min() # linthr = 1e-1 - norm = mcolors.SymLogNorm( - linthr, vmin=vmin, vmax=vmax - ) + norm = mcolors.SymLogNorm(linthr, vmin, vmax) cmap = "BrBG" else: norm = mcolors.LogNorm() diff --git a/opac_mixer/mix.py b/opac_mixer/mix.py index 737d6b6..99b6acc 100644 --- a/opac_mixer/mix.py +++ b/opac_mixer/mix.py @@ -23,7 +23,9 @@ @numba.njit(nogil=True, fastmath=True, cache=True) -def resort_rebin_njit(kout_conv, k1, k2, weights_in, weights_conv, Np, Nt, Nf, Ng): +def resort_rebin_njit( + kout_conv, k1, k2, weights_in, weights_conv, Np, Nt, Nf, Ng +): """ Resort and rebin the convoluted kappas. Note that this function works with g values calculated half integer. Fast, because it uses Numba @@ -73,16 +75,22 @@ def resort_rebin_njit(kout_conv, k1, k2, weights_in, weights_conv, Np, Nt, Nf, N for freqi in range(Nf): # Sort and resort: index_sort = np.argsort(kout_conv[pi, ti, freqi]) - kout_conv_resorted[:len_resort] = kout_conv[pi, ti, freqi][index_sort] + kout_conv_resorted[:len_resort] = kout_conv[pi, ti, freqi][ + index_sort + ] weights_resorted = weights_conv[index_sort] # compute new g-grid: - g_resorted[:len_resort] = compute_ggrid(weights_resorted, Ng * Ng) + g_resorted[:len_resort] = compute_ggrid( + weights_resorted, Ng * Ng + ) # edges: g_resorted[len_resort] = 1.0 kout_conv_resorted[len_resort] = ( - k1[pi, ti, freqi, -1] + k2[pi, ti, freqi, -1] + k1[pi, ti, freqi, -1] + k2[pi, ti, freqi, -1] + ) + kout_conv_resorted[0] = ( + k1[pi, ti, freqi, 0] + k2[pi, ti, freqi, 0] ) - kout_conv_resorted[0] = k1[pi, ti, freqi, 0] + k2[pi, ti, freqi, 0] # interpolate: kout[pi, ti, freqi, :] = np.interp( ggrid, g_resorted, kout_conv_resorted @@ -147,17 +155,17 @@ def compute_weights(g, Ng): @numba.njit(nogil=True, fastmath=True, cache=True, parallel=False) def _rorr_single( - ktable, - weights, - weights_conv, - ls, - lf, - lg, - temp_old, - press_old, - lt_old, - lp_old, - input_data, + ktable, + weights, + weights_conv, + ls, + lf, + lg, + temp_old, + press_old, + lt_old, + lp_old, + input_data, ): """ A numba accelerated function that performs RORR on one pressure and temperature point @@ -203,7 +211,18 @@ def _rorr_single( mmr = np.asarray(input_data[:-2]) ki = interp_2d( - temp_old, press_old, temp, press, ktable, ls, lf, lg, lt_old, lp_old, 1, 1 + temp_old, + press_old, + temp, + press, + ktable, + ls, + lf, + lg, + lt_old, + lp_old, + 1, + 1, ) for speci in range(ls): @@ -216,9 +235,13 @@ def _rorr_single( k2 = mixed_ktables[speci, :, :, :, :] for gi in range(lg): for gj in range(lg): - kout_conv[0, 0, :, gi + lg * gj] = k1[0, 0, :, gj] + k2[0, 0, :, gi] + kout_conv[0, 0, :, gi + lg * gj] = ( + k1[0, 0, :, gj] + k2[0, 0, :, gi] + ) - kout = resort_rebin_njit(kout_conv, k1, k2, weights, weights_conv, 1, 1, lf, lg) + kout = resort_rebin_njit( + kout_conv, k1, k2, weights, weights_conv, 1, 1, lf, lg + ) return kout[0, 0, :, :] @@ -250,7 +273,8 @@ def add_batch_parallel(self, input_data, method=DEFAULT_METHOD): class CombineOpacIndividual(CombineOpac): - """A class for mixing arbitrary abundances and temperatures, pressures for each species""" + """A class for mixing arbitrary abundances and temperatures, pressures for each species + """ def add_batch(self, input_data, method=DEFAULT_METHOD): """mix the kgrid with a batch of different pressure temperature and abundances values. @@ -404,14 +428,16 @@ def _add_rorr(ktable, weights, temp_old, press_old, use_mult, input_data): return np.asarray( list( tqdm.tqdm( - pool.imap(func, input_data, chunksize=100), total=Nsamples + pool.imap(func, input_data, chunksize=100), + total=Nsamples, ) ), dtype=np.float64, ) else: return np.asarray( - list(tqdm.tqdm(map(func, input_data), total=Nsamples)), dtype=np.float64 + list(tqdm.tqdm(map(func, input_data), total=Nsamples)), + dtype=np.float64, ) @@ -441,8 +467,8 @@ def _get_mix_func(self, method): if method == "RORR": # The RORR method return partial(self._add_rorr, self.opac.kcoeff, self.opac.weights) - else: - raise NotImplementedError("Method not implemented.") + + raise NotImplementedError("Method not implemented.") def _check_mmr_shape(self, mmr): """ @@ -513,7 +539,9 @@ def add_batch(self, input_data, method=DEFAULT_METHOD): mix_func = self._get_mix_func(method) return np.asarray([mix_func(mmr_i) for mmr_i in tqdm.tqdm(mmr)]) - def add_batch_parallel(self, input_data, method=DEFAULT_METHOD, **pool_kwargs): + def add_batch_parallel( + self, input_data, method=DEFAULT_METHOD, **pool_kwargs + ): """Parallel version of add_batch Parameters @@ -580,16 +608,18 @@ def _add_rorr(ktable, weights, mmr): The mixed k tables """ - mixed_ktables = mmr[:, :, :, np.newaxis, np.newaxis] * ktable[:, :, :, :, :] + mixed_ktables = ( + mmr[:, :, :, np.newaxis, np.newaxis] * ktable[:, :, :, :, :] + ) kout = mixed_ktables[0, :, :, :, :] weights_conv = np.outer(weights, weights).flatten() for speci in range(1, ktable.shape[0]): k1 = kout k2 = mixed_ktables[speci] - kout_conv = (k1[..., :, np.newaxis] + k2[..., np.newaxis, :]).reshape( - *kout.shape[:-1], weights_conv.shape[0] - ) + kout_conv = ( + k1[..., :, np.newaxis] + k2[..., np.newaxis, :] + ).reshape(*kout.shape[:-1], weights_conv.shape[0]) kout = resort_rebin_njit( kout_conv, k1, k2, weights, weights_conv, *kout.shape ) diff --git a/opac_mixer/read.py b/opac_mixer/read.py index 7bd9ebd..27fb33d 100644 --- a/opac_mixer/read.py +++ b/opac_mixer/read.py @@ -8,7 +8,7 @@ class ReadOpac: - """" + """ " The opacity reader base class The reader class only needs to define a read in function and pass important metadata to the constructor of the parent class. That's it. @@ -62,6 +62,7 @@ def compute_weights(g, Ng): # Verify that both functions are compatible np.testing.assert_allclose(weights, compute_weights(compute_ggrid(weights,len(weights)),len(weights))) """ + def __init__(self, ls, lp, lt, lf, lg): """ Construct the reader. Initialize all arrays. @@ -91,18 +92,32 @@ def __init__(self, ls, lp, lt, lf, lg): (self.ls, self.lp.max(), self.lt.max(), self.lf[0], self.lg[0]), dtype=np.float64, ) - self.bin_edges = np.zeros(self.lf[0] + 1, dtype=np.float64) # wavenumbers at edges (1/lambda) in 1/cm - self.bin_center = np.zeros(self.lf[0], dtype=np.float64) # wavenumbers at center(1/lambda) in 1/cm - self.weights = np.zeros(self.lg[0], dtype=np.float64) # ktable weights of distribution function - self.T = np.zeros((self.ls, self.lt.max()), dtype=np.float64) # temperature of k-table grid for each species in K - self.p = np.zeros((self.ls, self.lp.max()), dtype=np.float64) # pressure of k-table grid for each species in bar - self.spec = self.ls * [""] # names of opacity species + self.bin_edges = np.zeros( + self.lf[0] + 1, dtype=np.float64 + ) # wavenumbers at edges (1/lambda) in 1/cm + self.bin_center = np.zeros( + self.lf[0], dtype=np.float64 + ) # wavenumbers at center(1/lambda) in 1/cm + self.weights = np.zeros( + self.lg[0], dtype=np.float64 + ) # ktable weights of distribution function + self.T = np.zeros( + (self.ls, self.lt.max()), dtype=np.float64 + ) # temperature of k-table grid for each species in K + self.p = np.zeros( + (self.ls, self.lp.max()), dtype=np.float64 + ) # pressure of k-table grid for each species in bar + self.spec = self.ls * [""] # names of opacity species # Initialize reduced arrays (will only be set during interpolation) - self.pr = np.empty(self.lp.max(), dtype=np.float64) # pressure in interpolated k table grid - self.Tr = np.empty(self.lt.max(), dtype=np.float64) # temperature in interpolated k table grid - self.interp_done = False # flag to indicate sucessful interpolation - self.read_done = False # flag to indicate read in + self.pr = np.empty( + self.lp.max(), dtype=np.float64 + ) # pressure in interpolated k table grid + self.Tr = np.empty( + self.lt.max(), dtype=np.float64 + ) # temperature in interpolated k table grid + self.interp_done = False # flag to indicate sucessful interpolation + self.read_done = False # flag to indicate read in def read_opac(self): """read in the opacity, dependent on the opac IO model.""" @@ -127,14 +142,18 @@ def setup_temp_and_pres(self, temp=None, pres=None): assert self.read_done, "run read_opac first" if pres is None: - pmin = min([min(self.p[i, : self.lp[i]]) for i in range(self.ls)]) - pres = np.logspace(np.log10(pmin), np.log10(self.p.max()), len(self.p[0])) + pmin = min(min(self.p[i, : self.lp[i]]) for i in range(self.ls)) + pres = np.logspace( + np.log10(pmin), np.log10(self.p.max()), len(self.p[0]) + ) else: pres = np.array(pres) if temp is None: - tmin = min([min(self.T[i, : self.lt[i]]) for i in range(self.ls)]) - temp = np.logspace(np.log10(tmin), np.log10(self.T.max()), len(self.T[0])) + tmin = min(min(self.T[i, : self.lt[i]]) for i in range(self.ls)) + temp = np.logspace( + np.log10(tmin), np.log10(self.T.max()), len(self.T[0]) + ) else: temp = np.array(temp) @@ -171,7 +190,8 @@ def remove_sparse_frequencies(self): nonzero_index = np.empty((self.ls, self.lf[0])) for i in range(self.ls): nonzero_index[i] = np.all( - self.kcoeff[i, : self.lp[i], : self.lt[i], :, :], axis=(0, 1, 3) + self.kcoeff[i, : self.lp[i], : self.lt[i], :, :], + axis=(0, 1, 3), ) # Search for common zeros in every species @@ -194,7 +214,9 @@ def remove_sparse_frequencies(self): self.lf = np.repeat(np.count_nonzero(nonzero_index), self.ls) self.bin_edges = self.bin_edges[np.asarray(edges_nonzero, dtype=bool)] self.bin_center = 0.5 * (self.bin_edges[1:] + self.bin_edges[:-1]) - self.kcoeff = self.kcoeff[:, :, :, np.asarray(nonzero_index, dtype=bool), :] + self.kcoeff = self.kcoeff[ + :, :, :, np.asarray(nonzero_index, dtype=bool), : + ] def plot_opac(self, pres, temp, spec, ax=None, **plot_kwargs): """ @@ -215,7 +237,7 @@ def plot_opac(self, pres, temp, spec, ax=None, **plot_kwargs): Returns ------- - l (list): + lines (list): list of line plots """ if ax is None: @@ -227,18 +249,21 @@ def plot_opac(self, pres, temp, spec, ax=None, **plot_kwargs): print("p:", self.p[speci, pi]) print("T:", self.T[speci, ti]) - l = [] + lines = [] for fi in range(self.lf[0]): x = self.bin_edges[fi] + self.weights.cumsum() * ( self.bin_edges[fi + 1] - self.bin_edges[fi] ) - l.append(ax.loglog(x, self.kcoeff[speci, pi, ti, fi, :], **plot_kwargs)) + lines.append( + ax.loglog(x, self.kcoeff[speci, pi, ti, fi, :], **plot_kwargs) + ) - return l + return lines class ReadOpacChubb(ReadOpac): """A ktable grid reader for the ExomolOP-pRT k-table format""" + def __init__(self, files) -> None: """ Construct the chubb reader for the ExomolOP-pRT k-table format. @@ -248,7 +273,9 @@ def __init__(self, files) -> None: files (list): A list of filenames of the h5 files in which the k-tables are stored. """ - ls = len(files) # Number of opacity species is the number of k-table grid files + ls = len( + files + ) # Number of opacity species is the number of k-table grid files self._files = files # This is custom to this reader, since we do the readin later # initialize the arrays that hold the dimensions @@ -263,7 +290,7 @@ def __init__(self, files) -> None: # read in this metadata for all species for i, file in enumerate(files): with h5py.File(file) as f: - lp[i], lt[i], lf[i], lg[i] = f["kcoeff"].shape + lp[i], lt[i], lf[i], lg[i] = np.array(f["kcoeff"]).shape # call the parent constructor with the metadata super().__init__(ls, lp, lt, lf, lg) @@ -289,16 +316,22 @@ def read_opac(self): # convert k-table grid from cm2/mol to cm2/g: conversion_factor = 1 / ( - np.float64(f["mol_mass"][0]) * const.atomic_mass * 1000 + np.float64(f["mol_mass"][0]) * const.atomic_mass * 1000 + ) + kcoeff = ( + np.array(f["kcoeff"], dtype=np.float64) * conversion_factor ) - kcoeff = np.array(f["kcoeff"], dtype=np.float64) * conversion_factor # store ktable grid self.kcoeff[i, : self.lp[i], : self.lt[i], :, :] = kcoeff # Do the check if the frequencies and g values are the same for all species - assert np.all(bin_edges[1:, :] == bin_edges[:-1, :]), "frequency needs to match" - assert np.all(weights[1:, :] == weights[:-1, :]), "g grid needs to match" + assert np.all( + bin_edges[1:, :] == bin_edges[:-1, :] + ), "frequency needs to match" + assert np.all( + weights[1:, :] == weights[:-1, :] + ), "g grid needs to match" # store the weights and frequency edges self.weights = weights[0, :] diff --git a/opac_mixer/utils/__init__.py b/opac_mixer/utils/__init__.py index 31d6cab..c1451a2 100644 --- a/opac_mixer/utils/__init__.py +++ b/opac_mixer/utils/__init__.py @@ -1 +1 @@ -"""Utils global""" \ No newline at end of file +"""Utils global""" diff --git a/opac_mixer/utils/models.py b/opac_mixer/utils/models.py index 17ed319..47d891a 100644 --- a/opac_mixer/utils/models.py +++ b/opac_mixer/utils/models.py @@ -23,15 +23,21 @@ def get_simple_1d_conv(ng, num_species): """ model = keras.Sequential() model.add(layers.Input(shape=(ng, num_species))) - layers.Permute((2, 1), input_shape=(ng, num_species)), + model.add(layers.Permute((2, 1), input_shape=(ng, num_species))) model.add( - layers.Conv1D(filters=4, kernel_size=3, activation="linear", padding="same") + layers.Conv1D( + filters=4, kernel_size=3, activation="linear", padding="same" + ) ) model.add( - layers.Conv1D(filters=4, kernel_size=3, activation="linear", padding="same") + layers.Conv1D( + filters=4, kernel_size=3, activation="linear", padding="same" + ) ) model.add( - layers.Conv1D(filters=1, kernel_size=3, activation="linear", padding="same") + layers.Conv1D( + filters=1, kernel_size=3, activation="linear", padding="same" + ) ) model.add(layers.Flatten()) model.add(layers.Dense(units=ng, activation="linear")) @@ -92,7 +98,9 @@ def get_deepset(ng, hidden_units=None): [ layers.Input(shape=(ng, None)), layers.Permute((2, 1), input_shape=(ng, None)), - layers.Dense(units=hidden_units, activation="relu", use_bias=False), + layers.Dense( + units=hidden_units, activation="relu", use_bias=False + ), layers.Lambda(lambda x: tf.reduce_sum(x, axis=-2)), layers.Dense(units=ng, activation="linear", use_bias=False), ] @@ -127,17 +135,29 @@ def get_unet_1d(ng, species, min_filters=8): conv1 = layers.Conv1D(min_filters, 3, activation="relu", padding="same")( input_layer ) - conv1 = layers.Conv1D(min_filters, 3, activation="relu", padding="same")(conv1) + conv1 = layers.Conv1D(min_filters, 3, activation="relu", padding="same")( + conv1 + ) pool1 = layers.MaxPooling1D(2)(conv1) - conv2 = layers.Conv1D(2 * min_filters, 3, activation="relu", padding="same")(pool1) - conv2 = layers.Conv1D(2 * min_filters, 3, activation="relu", padding="same")(conv2) + conv2 = layers.Conv1D( + 2 * min_filters, 3, activation="relu", padding="same" + )(pool1) + conv2 = layers.Conv1D( + 2 * min_filters, 3, activation="relu", padding="same" + )(conv2) pool2 = layers.MaxPooling1D(2)(conv2) - conv3 = layers.Conv1D(4 * min_filters, 3, activation="relu", padding="same")(pool2) - conv3 = layers.Conv1D(4 * min_filters, 3, activation="relu", padding="same")(conv3) + conv3 = layers.Conv1D( + 4 * min_filters, 3, activation="relu", padding="same" + )(pool2) + conv3 = layers.Conv1D( + 4 * min_filters, 3, activation="relu", padding="same" + )(conv3) - up4 = layers.Conv1DTranspose(2 * min_filters, 2, strides=2, padding="same")(conv3) + up4 = layers.Conv1DTranspose( + 2 * min_filters, 2, strides=2, padding="same" + )(conv3) concat4 = layers.concatenate([conv2, up4], axis=-1) conv4 = layers.Conv1DTranspose( 2 * min_filters, 3, activation="relu", padding="same" @@ -146,16 +166,20 @@ def get_unet_1d(ng, species, min_filters=8): 2 * min_filters, 3, activation="relu", padding="same" )(conv4) - up5 = layers.Conv1DTranspose(min_filters, 2, strides=2, padding="same")(conv4) - concat5 = layers.concatenate([conv1, up5], axis=-1) - conv5 = layers.Conv1DTranspose(min_filters, 3, activation="relu", padding="same")( - concat5 + up5 = layers.Conv1DTranspose(min_filters, 2, strides=2, padding="same")( + conv4 ) - conv5 = layers.Conv1DTranspose(min_filters, 3, activation="relu", padding="same")( + concat5 = layers.concatenate([conv1, up5], axis=-1) + conv5 = layers.Conv1DTranspose( + min_filters, 3, activation="relu", padding="same" + )(concat5) + conv5 = layers.Conv1DTranspose( + min_filters, 3, activation="relu", padding="same" + )(conv5) + + output_layer = layers.Conv1D(1, 3, activation="linear", padding="same")( conv5 ) - - output_layer = layers.Conv1D(1, 3, activation="linear", padding="same")(conv5) output_layer = layers.Flatten()(output_layer) model = keras.Model(inputs=input_layer, outputs=output_layer) diff --git a/opac_mixer/utils/scalings.py b/opac_mixer/utils/scalings.py index 704d6a6..8330eeb 100644 --- a/opac_mixer/utils/scalings.py +++ b/opac_mixer/utils/scalings.py @@ -27,8 +27,8 @@ def diff(vals, do_log=False): if len(vals.shape) == 3: return np.concatenate((zero_val[:, None, :], diffvals), axis=1) - else: - return np.concatenate((zero_val[:, None], diffvals), axis=-1) + + return np.concatenate((zero_val[:, None], diffvals), axis=-1) def integrate_diff(diff_vals, do_log=False): @@ -187,8 +187,8 @@ def inverse_transform_y_sum(X, y, do_log=True): xsum = X.sum(axis=-1) if do_log: return np.exp(y) * xsum - else: - return y + xsum + + return y + xsum def transform_x_diff(X, do_log=True): @@ -231,8 +231,8 @@ def transform_y_diff_sum(X, y, do_log=True): xsum = X.sum(axis=-1) if do_log: return diff(y, do_log=True) - diff(xsum, do_log=True) - else: - return diff((y - xsum) / (xsum[:, -1][:, None] + 1), do_log=False) + + return diff((y - xsum) / (xsum[:, -1][:, None] + 1), do_log=False) def inverse_transform_y_diff_sum(X, y, do_log=True): @@ -256,8 +256,8 @@ def inverse_transform_y_diff_sum(X, y, do_log=True): xsum = X.sum(axis=-1) if do_log: return integrate_diff(y + diff(xsum, do_log=True), do_log=True) - else: - return (xsum[:, -1][:, None] + 1) * integrate_diff(y, do_log=False) + xsum + + return (xsum[:, -1][:, None] + 1) * integrate_diff(y, do_log=False) + xsum def default_input_scaling(X): diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5a55c14 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,29 @@ +# Example configuration for Black. + +# NOTE: you have to use single-quoted strings in TOML for regular expressions. +# It's the equivalent of r-strings in Python. Multiline strings are treated as +# verbose regular expressions by Black. Use [ ] to denote a significant space +# character. + +[tool.black] +line-length = 79 +include = '\.pyi?$' +target-version = ['py38', 'py39', 'py310'] +# We use preview style for formatting Black itself. If you +# want stable formatting across releases, you should keep +# this off. +preview = true +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + | tests +)/ +''' \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py index ef941e5..d429be1 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -62,7 +62,7 @@ def setup_test_mix_grid(setup_interp_reader): opac, expected = setup_interp_reader mmr = 1e-4 * np.ones((opac.ls, opac.lp[0], opac.lt[0])) mixer = CombineOpacGrid(opac) - mix = mixer.add_single(mmr=mmr, method="RORR") + mix = mixer.add_single(mmr, "RORR") return opac, expected, mmr, mix, mixer @@ -71,8 +71,14 @@ def setup_test_mix_ind(setup_interp_reader): opac, expected = setup_interp_reader mmr_batch = 1e-4 * np.ones((opac.ls, opac.lp[0], opac.lt[0])) - pres = np.ones((1, opac.lp[0], opac.lt[0])) * opac.pr[np.newaxis, :, np.newaxis] - temp = np.ones((1, opac.lp[0], opac.lt[0])) * opac.Tr[np.newaxis, np.newaxis, :] + pres = ( + np.ones((1, opac.lp[0], opac.lt[0])) + * opac.pr[np.newaxis, :, np.newaxis] + ) + temp = ( + np.ones((1, opac.lp[0], opac.lt[0])) + * opac.Tr[np.newaxis, np.newaxis, :] + ) input_data = np.concatenate((mmr_batch, pres, temp), axis=0).T.reshape( (opac.lt[0] * opac.lp[0], opac.ls + 2) ) diff --git a/tests/test_mix.py b/tests/test_mix.py index 9b4add6..774c1f6 100644 --- a/tests/test_mix.py +++ b/tests/test_mix.py @@ -12,7 +12,8 @@ def test_mix_single(setup_interp_reader): - """Test that single molecule is 'mixed' identically with all mixing functions""" + """Test that single molecule is 'mixed' identically with all mixing functions + """ opac, expected = setup_interp_reader mol_single_i = 0 @@ -20,8 +21,8 @@ def test_mix_single(setup_interp_reader): mmr[mol_single_i, :, :] = 1.0 mix = CombineOpacGrid(opac) - lm = mix.add_single(mmr=mmr, method="linear") - rm = mix.add_single(mmr=mmr, method="RORR") + lm = mix.add_single(mmr, "linear") + rm = mix.add_single(mmr, "RORR") assert np.all( np.isclose(lm, rm) @@ -56,7 +57,9 @@ def kdata_conv_loop_profile(kdata1, kdata2, kdataconv, Nlay, Nw, Ng): for j in range(Nw): for l in range(Ng): for m in range(Ng): - kdataconv[i, j, l * Ng + m] = kdata1[i, j, m] + kdata2[i, j, l] + kdataconv[i, j, l * Ng + m] = ( + kdata1[i, j, m] + kdata2[i, j, l] + ) @numba.njit(nogil=True, fastmath=True, cache=True) @@ -114,7 +117,8 @@ def RandOverlap_2_kdata_prof(Nlay, Nw, Ng, kdata1, kdata2, weights, ggrid): def test_individual_rorr_vs_grid_rorr(setup_test_mix_grid, setup_test_mix_ind): - """Test that there is no difference between the individual mixing and the grid mixing""" + """Test that there is no difference between the individual mixing and the grid mixing + """ opac_ind, _, input_data, mix_ind, _ = setup_test_mix_ind opac_grid, _, _, mix_grid, _ = setup_test_mix_grid @@ -131,8 +135,8 @@ def _test_rorr_vs_aee_vs_linear(setup_test_mix_grid): opac, expected, mmr, mix, mixer = setup_test_mix_grid - mix_l = mixer.add_single(mmr=mmr, method="linear") - mix_a = mixer.add_single(mmr=mmr, method="AEE") + mix_l = mixer.add_single(mmr, "linear") + mix_a = mixer.add_single(mmr, "AEE") for p in [1e-1]: for t in [2000]: @@ -184,7 +188,10 @@ def test_mix_vs_prt(setup_test_mix_grid): atmosphere = Radtrans( line_species=linespecies, pressures=opac.pr, - wlen_bords_micron=[(1e4 / opac.bin_edges).min(), (1e4 / opac.bin_edges).max()], + wlen_bords_micron=[ + (1e4 / opac.bin_edges).min(), + (1e4 / opac.bin_edges).max(), + ], test_ck_shuffle_comp=True, ) @@ -203,7 +210,9 @@ def test_mix_vs_prt(setup_test_mix_grid): kcoeff_prt = np.empty_like(mix) for i, temp in enumerate(opac.Tr): - abunds = {spec: mmr[speci, :, i] for speci, spec in enumerate(linespecies)} + abunds = { + spec: mmr[speci, :, i] for speci, spec in enumerate(linespecies) + } atmosphere.interpolate_species_opa(np.ones_like(opac.pr) * temp) @@ -229,9 +238,9 @@ def test_mix_vs_prt(setup_test_mix_grid): give_scattering_opacity=give_scattering_opacity, ) - kcoeff_prt[:, i, :, :] = atmosphere.line_struc_kappas[:, ::-1, 0, :].transpose( - 2, 1, 0 - ) + kcoeff_prt[:, i, :, :] = atmosphere.line_struc_kappas[ + :, ::-1, 0, : + ].transpose(2, 1, 0) np.testing.assert_allclose(kcoeff_prt, mix, rtol=2.0) @@ -251,7 +260,8 @@ def test_mix_vs_prt(setup_test_mix_grid): def _test_mix_vs_exok(setup_test_mix_grid): - """Compare against exok ck RORR implementation. Note, that this implementation is different.""" + """Compare against exok ck RORR implementation. Note, that this implementation is different. + """ import matplotlib.pyplot as plt opac, expected, mmr, mix, mixer = setup_test_mix_grid @@ -290,9 +300,15 @@ def _test_mix_vs_exok(setup_test_mix_grid): opac.bin_edges[fi + 1] - opac.bin_edges[fi] ) plt.title(f"exok: {p}, {t}") - plt.loglog(x, mix[pi, ti, fi, :], color="black", alpha=0.4, ls="-") plt.loglog( - x, kcoeff_exok[pi, ti, fi, :], color="orange", alpha=0.4, ls="-" + x, mix[pi, ti, fi, :], color="black", alpha=0.4, ls="-" + ) + plt.loglog( + x, + kcoeff_exok[pi, ti, fi, :], + color="orange", + alpha=0.4, + ls="-", ) plt.show()