Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Treat 1-point datasets equally in sequential and parallel fits #2276

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 24 additions & 13 deletions n3fit/src/n3fit/model_gen.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""
Library of functions which generate the NN objects
Library of functions which generate the NN objects

Contains:
# observable_generator:
Generates the output layers as functions
# pdfNN_layer_generator:
Generates the PDF NN layer to be fitted
Contains:
# observable_generator:
Generates the output layers as functions
# pdfNN_layer_generator:
Generates the PDF NN layer to be fitted


"""
Expand All @@ -26,7 +26,7 @@
base_layer_selector,
)
from n3fit.backends import operations as op
from n3fit.backends import regularizer_selector
from n3fit.backends import regularizer_selector as reg_sec
from n3fit.layers import (
DIS,
DY,
Expand Down Expand Up @@ -128,6 +128,7 @@ def observable_generator(
spec_dict,
boundary_condition=None,
mask_array=None,
validation_mask_array=None,
training_data=None,
validation_data=None,
invcovmat_tr=None,
Expand Down Expand Up @@ -170,6 +171,10 @@ def observable_generator(
boundary_condition: dict
dictionary containing the instance of the a PDF set to be used as a
Boundary Condition.
mask_array: np.ndarray
training mask per replica
validation_mask_array: np.ndarray
validation mask per replica, when not given ¬mask_array will be used
n_replicas: int
number of replicas fitted simultaneously
positivity_initial: float
Expand Down Expand Up @@ -245,12 +250,18 @@ def observable_generator(
model_inputs = np.concatenate(model_inputs).reshape(1, -1)

# Make the mask layers...
if mask_array is not None:
tr_mask_layer = Mask(mask_array, name=f"trmask_{spec_name}")
vl_mask_layer = Mask(~mask_array, name=f"vlmask_{spec_name}")
else:
if mask_array is None:
tr_mask_layer = None
vl_mask_layer = None
if validation_mask_array is None:
vl_mask_layer = None
else:
vl_mask_layer = Mask(validation_mask_array, name=f"vlmask_{spec_name}")
else:
tr_mask_layer = Mask(mask_array, name=f"trmask_{spec_name}")
if validation_mask_array is None:
vl_mask_layer = Mask(~mask_array, name=f"vlmask_{spec_name}")
else:
vl_mask_layer = Mask(validation_mask_array, name=f"vlmask_{spec_name}")

# Make rotations of the final data (if any)
if spec_dict.get("data_transformation") is not None:
Expand Down Expand Up @@ -724,7 +735,7 @@ def generate_nn(
"""
nodes_list = list(nodes) # so we can modify it
x_input = Input(shape=(None, nodes_in), batch_size=1, name="NN_input")
reg = regularizer_selector(regularizer, **regularizer_args)
reg = reg_sec(regularizer, **regularizer_args)

if layer_type == "dense_per_flavour":
# set the arguments that will define the layer
Expand Down
68 changes: 49 additions & 19 deletions n3fit/src/n3fit/model_trainer.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""
The ModelTrainer class is the true driver around the n3fit code
The ModelTrainer class is the true driver around the n3fit code

This class is initialized with all information about the NN, inputs and outputs.
The construction of the NN and the fitting is performed at the same time when the
hyperparametrizable method of the function is called.
This class is initialized with all information about the NN, inputs and outputs.
The construction of the NN and the fitting is performed at the same time when the
hyperparametrizable method of the function is called.

This allows to use hyperscanning libraries, that need to change the parameters of the network
between iterations while at the same time keeping the amount of redundant calls to a minimum
This allows to use hyperscanning libraries, that need to change the parameters of the network
between iterations while at the same time keeping the amount of redundant calls to a minimum
"""

from collections import namedtuple
Expand Down Expand Up @@ -151,7 +151,6 @@ def __init__(
self.exp_info = list(exp_info)
self.pos_info = [] if pos_info is None else pos_info
self.integ_info = [] if integ_info is None else integ_info
self.all_info = self.exp_info[0] + self.pos_info + self.integ_info
self.boundary_condition = boundary_condition
self.flavinfo = flavinfo
self.fitbasis = fitbasis
Expand Down Expand Up @@ -528,9 +527,12 @@ def _generate_observables(
self._reset_observables()
log.info("Generating layers")

# We need to transpose Experimental data, stacking over replicas
# validphys has generated the self.exp_info information replica-by-replica
# Here we transpose all information for convenience so that the loop over observables
# and the vectorization over replicas is made explicit
experiment_data = {
"trmask": [],
"vlmask": [],
"expdata": [],
"expdata_vl": [],
"invcovmat": [],
Expand Down Expand Up @@ -562,6 +564,7 @@ def _generate_observables(
exp_dict,
self.boundary_condition,
mask_array=experiment_data["trmask"][i],
validation_mask_array=experiment_data["vlmask"][i],
training_data=experiment_data["expdata"][i],
validation_data=experiment_data["expdata_vl"][i],
invcovmat_tr=experiment_data["invcovmat"][i],
Expand Down Expand Up @@ -712,20 +715,47 @@ def _prepare_reporting(self, partition):
to select the bits necessary for reporting the chi2.
Receives the chi2 partition data to see whether any dataset is to be left out
"""
reported_keys = ["name", "count_chi2", "positivity", "integrability", "ndata", "ndata_vl"]
reported_keys = ["name", "count_chi2", "positivity", "integrability"]
reporting_list = []
for exp_dict in self.all_info:

# Most of the information is shared among replicas, only ndata/ndata_vl
# might change replica to replica and they need to be filled with care
for idx, exp_dict in enumerate(self.exp_info[0]):
# Fill in the keys that are equal across replicas
reporting_dict = {k: exp_dict.get(k) for k in reported_keys}

# Now loop over replicas to fill in all data points as a list
list_ndata = []
list_ndata_vl = []
for replica in self.exp_info:
replica_exp_dict = replica[idx]

ndata = replica_exp_dict.get("ndata")
ndata_vl = replica_exp_dict.get("ndata_vl")

if partition:
# If we are in a k-fold partition, we need to remove the folded data
# from both the training and validation to avoid calculating the chi2 wrong
for dataset in replica_exp_dict["datasets"]:
if dataset in partition["datasets"]:
dataset_ndata = dataset["ndata"]
frac = dataset["frac"]
ndata -= int(dataset_ndata * frac)
ndata_vl -= int(dataset_ndata * (1 - frac))

list_ndata.append(ndata)
list_ndata_vl.append(ndata_vl)

reporting_dict["ndata"] = list_ndata
reporting_dict["ndata_vl"] = list_ndata_vl
reporting_list.append(reporting_dict)

for exp_dict in self.pos_info + self.integ_info:
reporting_dict = {k: exp_dict.get(k) for k in reported_keys}
if partition:
# If we are in a partition we need to remove the number of datapoints
# in order to avoid calculating the chi2 wrong
for dataset in exp_dict["datasets"]:
if dataset in partition["datasets"]:
ndata = dataset["ndata"]
frac = dataset["frac"]
reporting_dict["ndata"] -= int(ndata * frac)
reporting_dict["ndata_vl"] = int(ndata * (1 - frac))
reporting_dict["ndata"] = [exp_dict.get("ndata")]
reporting_dict["ndata_vl"] = [exp_dict.get("ndata_vl")]
reporting_list.append(reporting_dict)

return reporting_list

def _train_and_fit(self, training_model, stopping_object, epochs=100) -> bool:
Expand Down
18 changes: 9 additions & 9 deletions n3fit/src/n3fit/scripts/n3fit_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,15 @@ def from_yaml(cls, o, *args, **kwargs):
if fps := file_content["fitting"].get("savepseudodata", True):
if fps != True:
raise TypeError(f"fitting::savepseudodata is neither True nor False ({fps})")
if len(kwargs["environment"].replicas) != 1:
raise ConfigError(
"Cannot request that multiple replicas are fitted and that "
"pseudodata is saved. Either set `fitting::savepseudodata` "
"to `false` or fit replicas one at a time."
)
# take same namespace configuration on the pseudodata_table action.
training_action = namespace + "training_pseudodata"
validation_action = namespace + "validation_pseudodata"
#if len(kwargs["environment"].replicas) != 1:
#raise ConfigError(
# "Cannot request that multiple replicas are fitted and that "
# "pseudodata is saved. Either set `fitting::savepseudodata` "
# "to `false` or fit replicas one at a time."
#)
# take same namespace configuration on the pseudodata_table action.
training_action = namespace + "replicas_training_pseudodata"
validation_action = namespace + "replicas_validation_pseudodata"

N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action))

Expand Down
78 changes: 36 additions & 42 deletions n3fit/src/n3fit/stopping.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
"""
Module containing the classes related to the stopping alogirthm

In this module there are four Classes:

- FitState: this class contains the information of the fit
for a given point in history
- FitHistory: this class contains the information necessary
in order to reset the state of the fit to the point
in which the history was saved.
i.e., a list of FitStates
- Stopping: this class monitors the chi2 of the validation
and training sets and decides when to stop
- Positivity: Decides whether a given point fullfills the positivity conditions
- Validation: Controls the NNPDF cross-validation algorithm

Note:
There are situations in which the validation set is empty, in those cases
the training set is used as validation set.
This implies several changes in the behaviour of this class as the training chi2 will
now be monitored for stability.
In order to parse the set of loss functions coming from the backend::MetaModel,
the function `parse_losses` relies on the fact that they are all suffixed with `_loss`
the validation case, instead, is suffixed with `val_loss`. In the particular casse in
which both training and validation model correspond to the same backend::MetaModel only
the `_loss` suffix can be found. This is taken into account by the class `Stopping`
which will tell `Validation` that no validation set was found and that the training is to
be used instead.
Module containing the classes related to the stopping alogirthm

In this module there are four Classes:

- FitState: this class contains the information of the fit
for a given point in history
- FitHistory: this class contains the information necessary
in order to reset the state of the fit to the point
in which the history was saved.
i.e., a list of FitStates
- Stopping: this class monitors the chi2 of the validation
and training sets and decides when to stop
- Positivity: Decides whether a given point fullfills the positivity conditions
- Validation: Controls the NNPDF cross-validation algorithm

Note:
There are situations in which the validation set is empty, in those cases
the training set is used as validation set.
This implies several changes in the behaviour of this class as the training chi2 will
now be monitored for stability.
In order to parse the set of loss functions coming from the backend::MetaModel,
the function `parse_losses` relies on the fact that they are all suffixed with `_loss`
the validation case, instead, is suffixed with `val_loss`. In the particular casse in
which both training and validation model correspond to the same backend::MetaModel only
the `_loss` suffix can be found. This is taken into account by the class `Stopping`
which will tell `Validation` that no validation set was found and that the training is to
be used instead.
"""

import logging
Expand All @@ -47,16 +47,15 @@

def parse_ndata(all_data):
"""
Parses the list of dictionaries received from ModelTrainer
into a dictionary containing only the name of the experiments
together with the number of points.
Parses the list of dictionaries received from ModelTrainer into dictionaries
containing only the name of the experiments and the number of points per replica

Returns
-------
`tr_ndata`
dictionary of {'exp' : ndata}
dictionary of {'exp' : np.ndarray}
`vl_ndata`
dictionary of {'exp' : ndata}
dictionary of {'exp' : np.ndarray}
`pos_set`: list of the names of the positivity sets

Note: if there is no validation (total number of val points == 0)
Expand All @@ -71,9 +70,9 @@ def parse_ndata(all_data):
tr_ndata = dictionary["ndata"]
vl_ndata = dictionary["ndata_vl"]
if tr_ndata:
tr_ndata_dict[exp_name] = tr_ndata
tr_ndata_dict[exp_name] = np.array(tr_ndata)
if vl_ndata:
vl_ndata_dict[exp_name] = vl_ndata
vl_ndata_dict[exp_name] = np.array(vl_ndata)
if dictionary.get("positivity") and not dictionary.get("integrability"):
pos_set.append(exp_name)
if not vl_ndata_dict:
Expand Down Expand Up @@ -111,24 +110,19 @@ def parse_losses(history_object, data, suffix="loss"):
except AttributeError: # So it works whether we pass the out or the out.history
hobj = history_object

# In the general case epochs = 1.
# In case that we are doing more than 1 epoch, take the last result as it is the result
# the model is in at the moment
# This value is only used for printing output purposes so should not have any significance
dict_chi2 = {}
total_points = 0
total_loss = 0
total_loss = np.zeros_like(hobj["loss"])
for exp_name, npoints in data.items():
loss = np.array(hobj[exp_name + f"_{suffix}"])
dict_chi2[exp_name] = loss / npoints
dict_chi2[exp_name] = loss / np.maximum(npoints, 1)
total_points += npoints
total_loss += loss

# By taking the loss from the history object we would be saving the total loss
# including positivity sets and (if added/enabled) regularizsers
# instead we want to restrict ourselves to the loss coming from experiments
# total_loss = np.mean(hobj["loss"]) / total_points
total_loss /= total_points
total_loss /= np.maximum(total_points, 1)
dict_chi2["total"] = total_loss
return total_loss, dict_chi2

Expand Down
Loading
Loading