Merge pull request #6 from pietroepis/trajectory-generation

Trajectory generation
madlabunimib · Jul 21, 2021 · f696b10 · f696b10
2 parents 59b7dd8 + ad2ad0f
commit f696b10
Show file tree

Hide file tree

Showing 75 changed files with 4,083 additions and 861 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,5 +5,13 @@ __pycache__
 **/PyCTBN.egg-info
 **/results_data
 **/.scannerwork
+**/build
+test.py
+test_1.py
+test1.json
+test2.json
+test3.json
+result0.png
+example.json
+test_time.py
 .idea
-
diff --git a/PyCTBN/PyCTBN/estimators/parameters_estimator.py b/PyCTBN/PyCTBN/estimators/parameters_estimator.py
@@ -36,7 +36,8 @@ def fast_init(self, node_id: str) -> None:
         """
         p_vals = self._net_graph._aggregated_info_about_nodes_parents[2]
         node_states_number = self._net_graph.get_states_number(node_id)
-        self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs)
+        self._single_set_of_cims = SetOfCims(node_id = node_id, parents_states_number = p_vals, 
+            node_states_number = node_states_number, p_combs = self._net_graph.p_combs)
 
     def compute_parameters_for_node(self, node_id: str) -> SetOfCims:
         """Compute the CIMS of the node identified by the label ``node_id``.

diff --git a/PyCTBN/PyCTBN/structure_graph/conditional_intensity_matrix.py b/PyCTBN/PyCTBN/structure_graph/conditional_intensity_matrix.py
@@ -14,12 +14,16 @@ class ConditionalIntensityMatrix(object):
     :type state_transition_matrix: numpy.ndArray
     :_cim: the actual cim of the node
     """
-    def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array):
+    def __init__(self, state_residence_times: np.array = None, state_transition_matrix: np.array = None, 
+        cim: np.array = None):
         """Constructor Method
         """
         self._state_residence_times = state_residence_times
         self._state_transition_matrix = state_transition_matrix
-        self._cim = self.state_transition_matrix.astype(np.float64)
+        if cim is not None:
+            self._cim = cim
+        else:
+            self._cim = self.state_transition_matrix.astype(np.float64)
 
     def compute_cim_coefficients(self) -> None:
         """Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x].

diff --git a/PyCTBN/PyCTBN/structure_graph/network_generator.py b/PyCTBN/PyCTBN/structure_graph/network_generator.py
@@ -0,0 +1,130 @@
+from .structure import Structure
+from .network_graph import NetworkGraph
+from .conditional_intensity_matrix import ConditionalIntensityMatrix
+from .set_of_cims import SetOfCims
+import numpy as np
+import pandas as pd
+import random
+
+class NetworkGenerator(object):
+    """Provides the methods to generate a network graph and the CIMs related to it
+    Items in _labels, _vals and _indxs are related, and therefore respect the same order
+    
+    :param _labels: List of variables labels that will be part of the network
+    :type _labels: List
+    :param _vals: List of cardinalities of the variables in network (defined in the same order as _labels)
+    :type _vals: List
+    :param _indxs: List of the nodes indexes
+    :type _indxs: List
+    :param _cims: It contains, for each variable label (the key), the SetOfCims object related to it
+    :type _cims: Dict
+    :param _graph: The NetworkGraph object representing the generated structure
+    :type _graph: NetworkGraph
+    """
+
+    def __init__(self, labels, vals):
+        self._labels = labels
+        self._vals = vals
+        self._indxs = np.array([i for i, l in enumerate(labels)])
+        self._graph = None
+        self._cims = None
+
+    def generate_graph(self, density, fixed: bool = False):
+        """Generate the edges according to specified density, and then instantiate the NetworkGraph object
+            to represent the network
+
+        :param density: Probability of an edge between two nodes to exist
+        :type density: float
+        :param fixed: Specifies whether the required density is mandatory or it's just a probability
+        :type fixed: bool
+        """
+
+        if fixed:
+            n_edges = density * len(self._labels) * (len(self._labels) - 1)
+            if not float.is_integer(n_edges):
+                raise RuntimeError("Invalid Density")
+            else:
+                n_edges = int(n_edges)
+            edges = [(i, j) for i in self._labels for j in self._labels if i != j]
+            random.shuffle(edges)
+            edges = edges[:n_edges]
+        else:
+            edges = [(i, j) for i in self._labels for j in self._labels if np.random.binomial(1, density) == 1 and i != j]
+
+        s = Structure(self._labels, self._indxs, self._vals, edges, len(self._labels))
+        self._graph = NetworkGraph(s)
+        self._graph.add_nodes(s.nodes_labels)
+        self._graph.add_edges(s.edges)
+
+    def generate_cims(self, min_val, max_val):
+        """For each node, generate the corresponding SetOfCims. The objective is to group the CIMs 
+            (actually generated by private method __generate_cim) according to parents possibles states of every node.
+            This method must obviously be executed after the graph has been generated.
+
+        :param min_val: Minimum value allowed for the coefficients in the CIMs
+        :type min_val: float
+        :param max_val: Maximum value allowed for the coefficients in the CIMs
+        :type max_val: float
+        """
+
+        if self._graph is None:
+            return
+
+        self._cims = {}
+
+        for i, l in enumerate(self._labels):
+            p_info = self._graph.get_ordered_by_indx_set_of_parents(l)
+            combs = self._graph.build_p_comb_structure_for_a_node(p_info[2])
+
+            if len(p_info[0]) != 0:
+                node_cims = []
+                for comb in combs:
+                    cim = self.__generate_cim(min_val, max_val, self._vals[i])
+                    node_cims.append(ConditionalIntensityMatrix(cim = cim))
+            else:
+                node_cims = []
+                cim = self.__generate_cim(min_val, max_val, self._vals[i])
+                node_cims.append(ConditionalIntensityMatrix(cim = cim))
+
+            self._cims[l] = SetOfCims(node_id = l, parents_states_number = p_info[2], node_states_number = self._vals[i], p_combs = combs, 
+                cims = np.array(node_cims))
+
+    def __generate_cim(self, min_val, max_val, shape):
+        """Generate a valid CIM matrix, with coefficients in the range [min_val, max_val] and with dimension [shape, shape]
+
+        :param min_val: Minimum value allowed for the coefficients in the CIMs
+        :type min_val: float
+        :param max_val: Maximum value allowed for the coefficients in the CIMs
+        :type max_val: float
+        :param shape: Number of elements in each dimension of the matrix (this actually is the cardinality of the node)
+        :type shape: int
+        """
+
+        cim = np.empty(shape=(shape, shape))
+        cim[:] = np.nan
+
+        for i, c in enumerate(cim):
+            diag = (max_val - min_val) * np.random.random_sample() + min_val
+            row = np.random.rand(1, len(cim))[0]
+            row /= (sum(row) - row[i])
+            row *= diag
+            row[i] = -1 * diag
+            cim[i] = np.around(row, 4)
+
+        return cim
+
+    @property
+    def graph(self) -> NetworkGraph:
+        return self._graph
+
+    @property
+    def cims(self) -> dict:
+        return self._cims   
+
+    @property
+    def dyn_str(self) -> list:
+        return pd.DataFrame([[edge[0], edge[1]] for edge in self._graph.edges], columns = ["From", "To"])
+
+    @property
+    def variables(self) -> list:
+        return pd.DataFrame([[l, self._vals[i]] for i, l in enumerate(self._labels)], columns = ["Name", "Value"])
diff --git a/PyCTBN/PyCTBN/structure_graph/set_of_cims.py b/PyCTBN/PyCTBN/structure_graph/set_of_cims.py
@@ -26,7 +26,8 @@ class SetOfCims(object):
     :_actual_cims: the cims of the node
     """
 
-    def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray):
+    def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray,
+        cims: np.ndarray = None):
         """Constructor Method
         """
         self._node_id = node_id
@@ -36,7 +37,11 @@ def __init__(self, node_id: str, parents_states_number: typing.List, node_states
         self._state_residence_times = None
         self._transition_matrices = None
         self._p_combs = p_combs
-        self.build_times_and_transitions_structures()
+
+        if cims is not None:
+            self._actual_cims = cims
+        else:
+            self.build_times_and_transitions_structures()
 
     def build_times_and_transitions_structures(self) -> None:
         """Initializes at the correct dimensions the state residence times matrix and the state transition matrices.
@@ -60,7 +65,7 @@ def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarra
         :type transition_matrices: numpy.ndArray
         """
         for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices):
-            cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix)
+            cim_to_add = ConditionalIntensityMatrix(state_residence_times = state_res_time_vector, state_transition_matrix = transition_matrix)
             cim_to_add.compute_cim_coefficients()
             self._actual_cims.append(cim_to_add)
         self._actual_cims = np.array(self._actual_cims)
@@ -82,7 +87,7 @@ def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.n
             return self._actual_cims
         else:
             flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel()
-            return self._actual_cims[flat_indxs]
+            return np.array(self._actual_cims)[flat_indxs.astype(int)]
 
     @property
     def actual_cims(self) -> np.ndarray:

diff --git a/PyCTBN/PyCTBN/structure_graph/trajectory_generator.py b/PyCTBN/PyCTBN/structure_graph/trajectory_generator.py
@@ -0,0 +1,179 @@
+from ..utility.abstract_importer import AbstractImporter
+from .conditional_intensity_matrix import ConditionalIntensityMatrix
+from .set_of_cims import SetOfCims
+from .trajectory import Trajectory
+import numpy as np
+import pandas as pd
+import re
+import json
+from numpy import random
+from multiprocessing import Process, Manager
+
+class TrajectoryGenerator(object):
+    """Provides the methods to generate a trajectory basing on the network defined
+    in the importer.
+    
+    :param _importer: the Importer object which contains the imported and processed data
+    :type _importer: AbstractImporter
+    :param _vnames: List of the variables labels that belong to the network
+    :type _vnames: List
+    :param _parents: It contains, for each variable label (the key), the list of related parents labels
+    :type _parents: Dict
+    :param _cims: It contains, for each variable label (the key), the SetOfCims object related to it
+    :type _cims: Dict
+    :param _generated_trajectory: Result of the execution of CTBN_Sample, contains the output trajectory
+    :type _generated_trajectory: pandas.DataFrame
+    """
+
+    def __init__(self, importer: AbstractImporter = None, variables: pd.DataFrame = None, dyn_str: pd.DataFrame = None, dyn_cims: dict = None):
+        """Constructor Method
+            It parses and elaborates the data fetched from importer (if defined, otherwise variables, dyn_str and dyn_cims are used) 
+            in order to make the objects structure more suitable for the forthcoming trajectory generation
+        """
+
+        self._importer = importer
+
+        self._vnames = self._importer._df_variables.iloc[:, 0].to_list() if importer is not None else variables.iloc[:, 0].to_list()
+
+        self._parents = {}
+        for v in self._vnames:
+            if importer is not None:
+                self._parents[v] = self._importer._df_structure.where(self._importer._df_structure["To"] == v).dropna()["From"].tolist()
+            else:
+                self._parents[v] = dyn_str.where(dyn_str["To"] == v).dropna()["From"].tolist()
+
+        self._cims = {}
+        if importer is not None:
+            sampled_cims = self._importer._cims
+
+            for v in sampled_cims.keys():
+                p_combs = []
+                v_cims = []
+                for comb in sampled_cims[v].keys():
+                    p_combs.append(np.array(re.findall(r"=(\d)", comb)).astype("int"))
+                    cim = pd.DataFrame(sampled_cims[v][comb]).to_numpy()    
+                    v_cims.append(ConditionalIntensityMatrix(cim = cim))
+
+                if importer is not None:
+                    sof = SetOfCims(node_id = v, parents_states_number = [self._importer._df_variables.where(self._importer._df_variables["Name"] == p)["Value"] for p in self._parents[v]], 
+                        node_states_number = self._importer._df_variables.where(self._importer._df_variables["Name"] == v)["Value"], p_combs = np.array(p_combs), cims = v_cims)
+                else:
+                    sof = SetOfCims(node_id = v, parents_states_number = [variables.where(variables["Name"] == p)["Value"] for p in self._parents[v]], 
+                        node_states_number = variables.where(variables["Name"] == v)["Value"], p_combs = np.array(p_combs), cims = v_cims)
+                self._cims[v] = sof
+        else:
+            self._cims = dyn_cims
+
+    def CTBN_Sample(self, t_end = -1, max_tr = -1):
+        """This method implements the generation of a trajectory, basing on the network structure and
+            on the coefficients defined in the CIMs.
+            The variables are initialized with value 0, and the method takes care of adding the
+            conventional last row made up of -1.
+
+        :param t_end: If defined, the sampling ends when end time is reached
+        :type t_end: float
+        :param max_tr: Parameter taken in consideration in case that t_end isn't defined. It specifies the number of transitions to execute
+        :type max_tr: int
+        """
+
+        t = 0
+        sigma = pd.DataFrame(columns = (["Time"] + self._vnames))
+        sigma.loc[len(sigma)] = [0] + [0 for v in self._vnames]
+        time = np.full(len(self._vnames), np.NaN)
+        n_tr = 0
+        self._generated_trajectory = None
+
+        while True:
+            current_values = sigma.loc[len(sigma) - 1]
+
+            for i in range(0, time.size):
+                if np.isnan(time[i]):
+                    n_parents = len(self._parents[self._vnames[i]])
+                    cim_obj = self._cims[self._vnames[i]].filter_cims_with_mask(np.array([True for p in self._parents[self._vnames[i]]]), 
+                        [current_values.at[p] for p in self._parents[self._vnames[i]]])
+
+                    if n_parents == 1:
+                        cim = cim_obj[current_values.at[self._parents[self._vnames[i]][0]]].cim
+                    else:
+                        cim = cim_obj[0].cim
+
+                    param = -1 * cim[current_values.at[self._vnames[i]]][current_values.at[self._vnames[i]]]
+
+                    time[i] = t + random.exponential(scale = param)
+
+            next = time.argmin()
+            t = time[next]
+
+            if (max_tr != -1 and n_tr == max_tr) or (t_end != -1 and t >= t_end):
+                sigma.loc[len(sigma) - 1, self._vnames] = -1
+                self._generated_trajectory = sigma
+                return sigma
+            else:
+                n_parents = len(self._parents[self._vnames[next]])
+                cim_obj = self._cims[self._vnames[next]].filter_cims_with_mask(np.array([True for p in self._parents[self._vnames[next]]]), 
+                    [current_values.at[p] for p in self._parents[self._vnames[next]]])
+
+                if n_parents == 1:
+                    cim = cim_obj[current_values.at[self._parents[self._vnames[next]][0]]].cim
+                else:
+                    cim = cim_obj[0].cim
+
+                cim_row = np.array(cim[current_values.at[self._vnames[next]]])
+                cim_row[current_values.at[self._vnames[next]]] = 0
+                cim_row /= sum(cim_row)
+                rand_mult = np.random.multinomial(1, cim_row, size=1)
+
+                new_row = pd.DataFrame(sigma[-1:].values, columns = sigma.columns)
+                new_row.loc[0].at[self._vnames[next]] = np.where(rand_mult[0] == 1)[0][0]
+                new_row.loc[0].at["Time"] = round(t, 4)
+                sigma = sigma.append(new_row, ignore_index = True)
+
+                n_tr += 1
+
+                # undefine variable time
+                time[next] = np.NaN
+                for i, v in enumerate(self._parents):
+                    if self._vnames[next] in self._parents[v]:
+                        time[i] = np.NaN
+
+    def worker(self, t_end, max_tr, trajectories):
+        """Single process that will be executed in parallel in order to generate one trajectory. 
+
+            :param t_end: If defined, the sampling ends when end time is reached
+            :type t_end: float
+            :param max_tr: Parameter taken in consideration in case that t_end isn't defined. It specifies the number of transitions to execute
+            :type max_tr: int
+            :param trajectories: Shared list that contains to which the generated trajectory is added
+            :type trajectories: list
+        """
+
+        trajectory = self.CTBN_Sample(t_end = t_end, max_tr = max_tr)
+        trajectories.append(trajectory)
+
+    def multi_trajectory(self, t_ends: list = None, max_trs: list = None):
+        """Generate n trajectories in parallel, where n is the number of items in 
+            t_ends, if defined, or the number of items in max_trs otherwise
+
+            :param t_ends: List of t_end values for the trajectories that will be generated
+            :type t_ends: list
+            :param max_trs: List of max_tr values for the trajectories that will be generated
+            :type max_trs: list
+        """
+
+        if t_ends is None and max_trs is None:
+            return
+
+        trajectories = Manager().list()
+
+        if t_ends is not None:
+            processes = [Process(target = self.worker, args = (t, -1, trajectories)) for t in t_ends]
+        else:   
+            processes = [Process(target = self.worker, args = (-1, m, trajectories)) for m in max_trs]
+
+        for p in processes:
+            p.start()
+
+        for p in processes:
+            p.join()
+
+        return trajectories