NNPDF · scarlehoff · Feb 12, 2025 · Feb 12, 2025 · Jul 31, 2024 · Aug 1, 2024
diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py
@@ -1,11 +1,11 @@
 """
-    Library of functions which generate the NN objects
+Library of functions which generate the NN objects
 
-    Contains:
-        # observable_generator:
-            Generates the output layers as functions
-        # pdfNN_layer_generator:
-            Generates the PDF NN layer to be fitted
+Contains:
+    # observable_generator:
+        Generates the output layers as functions
+    # pdfNN_layer_generator:
+        Generates the PDF NN layer to be fitted
 
 
 """
@@ -26,7 +26,7 @@
     base_layer_selector,
 )
 from n3fit.backends import operations as op
-from n3fit.backends import regularizer_selector
+from n3fit.backends import regularizer_selector as reg_sec
 from n3fit.layers import (
     DIS,
     DY,
@@ -128,6 +128,7 @@ def observable_generator(
     spec_dict,
     boundary_condition=None,
     mask_array=None,
+    validation_mask_array=None,
     training_data=None,
     validation_data=None,
     invcovmat_tr=None,
@@ -170,6 +171,10 @@ def observable_generator(
         boundary_condition: dict
             dictionary containing the instance of the a PDF set to be used as a
             Boundary Condition.
+        mask_array: np.ndarray
+            training mask per replica
+        validation_mask_array: np.ndarray
+            validation mask per replica, when not given ¬mask_array will be used
         n_replicas: int
             number of replicas fitted simultaneously
         positivity_initial: float
@@ -245,12 +250,18 @@ def observable_generator(
     model_inputs = np.concatenate(model_inputs).reshape(1, -1)
 
     # Make the mask layers...
-    if mask_array is not None:
-        tr_mask_layer = Mask(mask_array, name=f"trmask_{spec_name}")
-        vl_mask_layer = Mask(~mask_array, name=f"vlmask_{spec_name}")
-    else:
+    if mask_array is None:
         tr_mask_layer = None
-        vl_mask_layer = None
+        if validation_mask_array is None:
+            vl_mask_layer = None
+        else:
+            vl_mask_layer = Mask(validation_mask_array, name=f"vlmask_{spec_name}")
+    else:
+        tr_mask_layer = Mask(mask_array, name=f"trmask_{spec_name}")
+        if validation_mask_array is None:
+            vl_mask_layer = Mask(~mask_array, name=f"vlmask_{spec_name}")
+        else:
+            vl_mask_layer = Mask(validation_mask_array, name=f"vlmask_{spec_name}")
 
     # Make rotations of the final data (if any)
     if spec_dict.get("data_transformation") is not None:
@@ -724,7 +735,7 @@ def generate_nn(
     """
     nodes_list = list(nodes)  # so we can modify it
     x_input = Input(shape=(None, nodes_in), batch_size=1, name="NN_input")
-    reg = regularizer_selector(regularizer, **regularizer_args)
+    reg = reg_sec(regularizer, **regularizer_args)
 
     if layer_type == "dense_per_flavour":
         # set the arguments that will define the layer

diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
@@ -1,12 +1,12 @@
 """
-    The ModelTrainer class is the true driver around the n3fit code
+The ModelTrainer class is the true driver around the n3fit code
 
-    This class is initialized with all information about the NN, inputs and outputs.
-    The construction of the NN and the fitting is performed at the same time when the
-    hyperparametrizable method of the function is called.
+This class is initialized with all information about the NN, inputs and outputs.
+The construction of the NN and the fitting is performed at the same time when the
+hyperparametrizable method of the function is called.
 
-    This allows to use hyperscanning libraries, that need to change the parameters of the network
-    between iterations while at the same time keeping the amount of redundant calls to a minimum
+This allows to use hyperscanning libraries, that need to change the parameters of the network
+between iterations while at the same time keeping the amount of redundant calls to a minimum
 """
 
 from collections import namedtuple
@@ -151,7 +151,6 @@ def __init__(
         self.exp_info = list(exp_info)
         self.pos_info = [] if pos_info is None else pos_info
         self.integ_info = [] if integ_info is None else integ_info
-        self.all_info = self.exp_info[0] + self.pos_info + self.integ_info
         self.boundary_condition = boundary_condition
         self.flavinfo = flavinfo
         self.fitbasis = fitbasis
@@ -528,9 +527,12 @@ def _generate_observables(
         self._reset_observables()
         log.info("Generating layers")
 
-        # We need to transpose Experimental data, stacking over replicas
+        # validphys has generated the self.exp_info information replica-by-replica
+        # Here we transpose all information for convenience so that the loop over observables
+        # and the vectorization over replicas is made explicit
         experiment_data = {
             "trmask": [],
+            "vlmask": [],
             "expdata": [],
             "expdata_vl": [],
             "invcovmat": [],
@@ -562,6 +564,7 @@ def _generate_observables(
                 exp_dict,
                 self.boundary_condition,
                 mask_array=experiment_data["trmask"][i],
+                validation_mask_array=experiment_data["vlmask"][i],
                 training_data=experiment_data["expdata"][i],
                 validation_data=experiment_data["expdata_vl"][i],
                 invcovmat_tr=experiment_data["invcovmat"][i],
@@ -712,20 +715,47 @@ def _prepare_reporting(self, partition):
         to select the bits necessary for reporting the chi2.
         Receives the chi2 partition data to see whether any dataset is to be left out
         """
-        reported_keys = ["name", "count_chi2", "positivity", "integrability", "ndata", "ndata_vl"]
+        reported_keys = ["name", "count_chi2", "positivity", "integrability"]
         reporting_list = []
-        for exp_dict in self.all_info:
+
+        # Most of the information is shared among replicas, only ndata/ndata_vl
+        # might change replica to replica and they need to be filled with care
+        for idx, exp_dict in enumerate(self.exp_info[0]):
+            # Fill in the keys that are equal across replicas
+            reporting_dict = {k: exp_dict.get(k) for k in reported_keys}
+
+            # Now loop over replicas to fill in all data points as a list
+            list_ndata = []
+            list_ndata_vl = []
+            for replica in self.exp_info:
+                replica_exp_dict = replica[idx]
+
+                ndata = replica_exp_dict.get("ndata")
+                ndata_vl = replica_exp_dict.get("ndata_vl")
+
+                if partition:
+                    # If we are in a k-fold partition, we need to remove the folded data
+                    # from both the training and validation to avoid calculating the chi2 wrong
+                    for dataset in replica_exp_dict["datasets"]:
+                        if dataset in partition["datasets"]:
+                            dataset_ndata = dataset["ndata"]
+                            frac = dataset["frac"]
+                            ndata -= int(dataset_ndata * frac)
+                            ndata_vl -= int(dataset_ndata * (1 - frac))
+
+                list_ndata.append(ndata)
+                list_ndata_vl.append(ndata_vl)
+
+            reporting_dict["ndata"] = list_ndata
+            reporting_dict["ndata_vl"] = list_ndata_vl
+            reporting_list.append(reporting_dict)
+
+        for exp_dict in self.pos_info + self.integ_info:
             reporting_dict = {k: exp_dict.get(k) for k in reported_keys}
-            if partition:
-                # If we are in a partition we need to remove the number of datapoints
-                # in order to avoid calculating the chi2 wrong
-                for dataset in exp_dict["datasets"]:
-                    if dataset in partition["datasets"]:
-                        ndata = dataset["ndata"]
-                        frac = dataset["frac"]
-                        reporting_dict["ndata"] -= int(ndata * frac)
-                        reporting_dict["ndata_vl"] = int(ndata * (1 - frac))
+            reporting_dict["ndata"] = [exp_dict.get("ndata")]
+            reporting_dict["ndata_vl"] = [exp_dict.get("ndata_vl")]
             reporting_list.append(reporting_dict)
+
         return reporting_list
 
     def _train_and_fit(self, training_model, stopping_object, epochs=100) -> bool:

diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py
@@ -127,15 +127,15 @@ def from_yaml(cls, o, *args, **kwargs):
         if fps := file_content["fitting"].get("savepseudodata", True):
             if fps != True:
                 raise TypeError(f"fitting::savepseudodata is neither True nor False ({fps})")
-            if len(kwargs["environment"].replicas) != 1:
-                raise ConfigError(
-                    "Cannot request that multiple replicas are fitted and that "
-                    "pseudodata is saved. Either set `fitting::savepseudodata` "
-                    "to `false` or fit replicas one at a time."
-                )
-            # take same namespace configuration on the pseudodata_table action.
-            training_action = namespace + "training_pseudodata"
-            validation_action = namespace + "validation_pseudodata"
+            #if len(kwargs["environment"].replicas) != 1:
+                #raise ConfigError(
+                #    "Cannot request that multiple replicas are fitted and that "
+                #    "pseudodata is saved. Either set `fitting::savepseudodata` "
+                #    "to `false` or fit replicas one at a time."
+                #)
+                # take same namespace configuration on the pseudodata_table action.
+            training_action = namespace + "replicas_training_pseudodata"
+            validation_action = namespace + "replicas_validation_pseudodata"
 
             N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action))
 

diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py
@@ -1,31 +1,31 @@
 """
-    Module containing the classes related to the stopping alogirthm
-
-    In this module there are four Classes:
-
-    - FitState: this class contains the information of the fit
-            for a given point in history
-    - FitHistory: this class contains the information necessary
-            in order to reset the state of the fit to the point
-            in which the history was saved.
-            i.e., a list of FitStates
-    - Stopping: this class monitors the chi2 of the validation
-            and training sets and decides when to stop
-    - Positivity: Decides whether a given point fullfills the positivity conditions
-    - Validation: Controls the NNPDF cross-validation algorithm
-
-    Note:
-        There are situations in which the validation set is empty, in those cases
-    the training set is used as validation set.
-    This implies several changes in the behaviour of this class as the training chi2 will
-    now be monitored for stability.
-        In order to parse the set of loss functions coming from the backend::MetaModel,
-    the function `parse_losses` relies on the fact that they are all suffixed with `_loss`
-    the validation case, instead, is suffixed with `val_loss`. In the particular casse in
-    which both training and validation model correspond to the same backend::MetaModel only
-    the `_loss` suffix can be found. This is taken into account by the class `Stopping`
-    which will tell `Validation` that no validation set was found and that the training is to
-    be used instead.
+Module containing the classes related to the stopping alogirthm
+
+In this module there are four Classes:
+
+- FitState: this class contains the information of the fit
+        for a given point in history
+- FitHistory: this class contains the information necessary
+        in order to reset the state of the fit to the point
+        in which the history was saved.
+        i.e., a list of FitStates
+- Stopping: this class monitors the chi2 of the validation
+        and training sets and decides when to stop
+- Positivity: Decides whether a given point fullfills the positivity conditions
+- Validation: Controls the NNPDF cross-validation algorithm
+
+Note:
+    There are situations in which the validation set is empty, in those cases
+the training set is used as validation set.
+This implies several changes in the behaviour of this class as the training chi2 will
+now be monitored for stability.
+    In order to parse the set of loss functions coming from the backend::MetaModel,
+the function `parse_losses` relies on the fact that they are all suffixed with `_loss`
+the validation case, instead, is suffixed with `val_loss`. In the particular casse in
+which both training and validation model correspond to the same backend::MetaModel only
+the `_loss` suffix can be found. This is taken into account by the class `Stopping`
+which will tell `Validation` that no validation set was found and that the training is to
+be used instead.
 """
 
 import logging
@@ -47,16 +47,15 @@
 
 def parse_ndata(all_data):
     """
-    Parses the list of dictionaries received from ModelTrainer
-    into a dictionary containing only the name of the experiments
-    together with the number of points.
+    Parses the list of dictionaries received from ModelTrainer into dictionaries
+    containing only the name of the experiments and the number of points per replica
 
     Returns
     -------
         `tr_ndata`
-            dictionary of {'exp' : ndata}
+            dictionary of {'exp' : np.ndarray}
         `vl_ndata`
-            dictionary of {'exp' : ndata}
+            dictionary of {'exp' : np.ndarray}
         `pos_set`: list of the names of the positivity sets
 
     Note: if there is no validation (total number of val points == 0)
@@ -71,9 +70,9 @@ def parse_ndata(all_data):
             tr_ndata = dictionary["ndata"]
             vl_ndata = dictionary["ndata_vl"]
             if tr_ndata:
-                tr_ndata_dict[exp_name] = tr_ndata
+                tr_ndata_dict[exp_name] = np.array(tr_ndata)
             if vl_ndata:
-                vl_ndata_dict[exp_name] = vl_ndata
+                vl_ndata_dict[exp_name] = np.array(vl_ndata)
         if dictionary.get("positivity") and not dictionary.get("integrability"):
             pos_set.append(exp_name)
     if not vl_ndata_dict:
@@ -111,24 +110,19 @@ def parse_losses(history_object, data, suffix="loss"):
     except AttributeError:  # So it works whether we pass the out or the out.history
         hobj = history_object
 
-    # In the general case epochs = 1.
-    # In case that we are doing more than 1 epoch, take the last result as it is the result
-    # the model is in at the moment
-    # This value is only used for printing output purposes so should not have any significance
     dict_chi2 = {}
     total_points = 0
-    total_loss = 0
+    total_loss = np.zeros_like(hobj["loss"])
     for exp_name, npoints in data.items():
         loss = np.array(hobj[exp_name + f"_{suffix}"])
-        dict_chi2[exp_name] = loss / npoints
+        dict_chi2[exp_name] = loss / np.maximum(npoints, 1)
         total_points += npoints
         total_loss += loss
 
     # By taking the loss from the history object we would be saving the total loss
     # including positivity sets and (if added/enabled) regularizsers
     # instead we want to restrict ourselves to the loss coming from experiments
-    # total_loss = np.mean(hobj["loss"]) / total_points
-    total_loss /= total_points
+    total_loss /= np.maximum(total_points, 1)
     dict_chi2["total"] = total_loss
     return total_loss, dict_chi2