Skip to content

Commit

Permalink
Merge pull request #6 from pietroepis/trajectory-generation
Browse files Browse the repository at this point in the history
Trajectory generation
  • Loading branch information
AlessandroBregoli authored Jul 21, 2021
2 parents 59b7dd8 + ad2ad0f commit f696b10
Show file tree
Hide file tree
Showing 75 changed files with 4,083 additions and 861 deletions.
10 changes: 9 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,13 @@ __pycache__
**/PyCTBN.egg-info
**/results_data
**/.scannerwork
**/build
test.py
test_1.py
test1.json
test2.json
test3.json
result0.png
example.json
test_time.py
.idea

3 changes: 2 additions & 1 deletion PyCTBN/PyCTBN/estimators/parameters_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ def fast_init(self, node_id: str) -> None:
"""
p_vals = self._net_graph._aggregated_info_about_nodes_parents[2]
node_states_number = self._net_graph.get_states_number(node_id)
self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs)
self._single_set_of_cims = SetOfCims(node_id = node_id, parents_states_number = p_vals,
node_states_number = node_states_number, p_combs = self._net_graph.p_combs)

def compute_parameters_for_node(self, node_id: str) -> SetOfCims:
"""Compute the CIMS of the node identified by the label ``node_id``.
Expand Down
8 changes: 6 additions & 2 deletions PyCTBN/PyCTBN/structure_graph/conditional_intensity_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@ class ConditionalIntensityMatrix(object):
:type state_transition_matrix: numpy.ndArray
:_cim: the actual cim of the node
"""
def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array):
def __init__(self, state_residence_times: np.array = None, state_transition_matrix: np.array = None,
cim: np.array = None):
"""Constructor Method
"""
self._state_residence_times = state_residence_times
self._state_transition_matrix = state_transition_matrix
self._cim = self.state_transition_matrix.astype(np.float64)
if cim is not None:
self._cim = cim
else:
self._cim = self.state_transition_matrix.astype(np.float64)

def compute_cim_coefficients(self) -> None:
"""Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x].
Expand Down
130 changes: 130 additions & 0 deletions PyCTBN/PyCTBN/structure_graph/network_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from .structure import Structure
from .network_graph import NetworkGraph
from .conditional_intensity_matrix import ConditionalIntensityMatrix
from .set_of_cims import SetOfCims
import numpy as np
import pandas as pd
import random

class NetworkGenerator(object):
"""Provides the methods to generate a network graph and the CIMs related to it
Items in _labels, _vals and _indxs are related, and therefore respect the same order
:param _labels: List of variables labels that will be part of the network
:type _labels: List
:param _vals: List of cardinalities of the variables in network (defined in the same order as _labels)
:type _vals: List
:param _indxs: List of the nodes indexes
:type _indxs: List
:param _cims: It contains, for each variable label (the key), the SetOfCims object related to it
:type _cims: Dict
:param _graph: The NetworkGraph object representing the generated structure
:type _graph: NetworkGraph
"""

def __init__(self, labels, vals):
self._labels = labels
self._vals = vals
self._indxs = np.array([i for i, l in enumerate(labels)])
self._graph = None
self._cims = None

def generate_graph(self, density, fixed: bool = False):
"""Generate the edges according to specified density, and then instantiate the NetworkGraph object
to represent the network
:param density: Probability of an edge between two nodes to exist
:type density: float
:param fixed: Specifies whether the required density is mandatory or it's just a probability
:type fixed: bool
"""

if fixed:
n_edges = density * len(self._labels) * (len(self._labels) - 1)
if not float.is_integer(n_edges):
raise RuntimeError("Invalid Density")
else:
n_edges = int(n_edges)
edges = [(i, j) for i in self._labels for j in self._labels if i != j]
random.shuffle(edges)
edges = edges[:n_edges]
else:
edges = [(i, j) for i in self._labels for j in self._labels if np.random.binomial(1, density) == 1 and i != j]

s = Structure(self._labels, self._indxs, self._vals, edges, len(self._labels))
self._graph = NetworkGraph(s)
self._graph.add_nodes(s.nodes_labels)
self._graph.add_edges(s.edges)

def generate_cims(self, min_val, max_val):
"""For each node, generate the corresponding SetOfCims. The objective is to group the CIMs
(actually generated by private method __generate_cim) according to parents possibles states of every node.
This method must obviously be executed after the graph has been generated.
:param min_val: Minimum value allowed for the coefficients in the CIMs
:type min_val: float
:param max_val: Maximum value allowed for the coefficients in the CIMs
:type max_val: float
"""

if self._graph is None:
return

self._cims = {}

for i, l in enumerate(self._labels):
p_info = self._graph.get_ordered_by_indx_set_of_parents(l)
combs = self._graph.build_p_comb_structure_for_a_node(p_info[2])

if len(p_info[0]) != 0:
node_cims = []
for comb in combs:
cim = self.__generate_cim(min_val, max_val, self._vals[i])
node_cims.append(ConditionalIntensityMatrix(cim = cim))
else:
node_cims = []
cim = self.__generate_cim(min_val, max_val, self._vals[i])
node_cims.append(ConditionalIntensityMatrix(cim = cim))

self._cims[l] = SetOfCims(node_id = l, parents_states_number = p_info[2], node_states_number = self._vals[i], p_combs = combs,
cims = np.array(node_cims))

def __generate_cim(self, min_val, max_val, shape):
"""Generate a valid CIM matrix, with coefficients in the range [min_val, max_val] and with dimension [shape, shape]
:param min_val: Minimum value allowed for the coefficients in the CIMs
:type min_val: float
:param max_val: Maximum value allowed for the coefficients in the CIMs
:type max_val: float
:param shape: Number of elements in each dimension of the matrix (this actually is the cardinality of the node)
:type shape: int
"""

cim = np.empty(shape=(shape, shape))
cim[:] = np.nan

for i, c in enumerate(cim):
diag = (max_val - min_val) * np.random.random_sample() + min_val
row = np.random.rand(1, len(cim))[0]
row /= (sum(row) - row[i])
row *= diag
row[i] = -1 * diag
cim[i] = np.around(row, 4)

return cim

@property
def graph(self) -> NetworkGraph:
return self._graph

@property
def cims(self) -> dict:
return self._cims

@property
def dyn_str(self) -> list:
return pd.DataFrame([[edge[0], edge[1]] for edge in self._graph.edges], columns = ["From", "To"])

@property
def variables(self) -> list:
return pd.DataFrame([[l, self._vals[i]] for i, l in enumerate(self._labels)], columns = ["Name", "Value"])
13 changes: 9 additions & 4 deletions PyCTBN/PyCTBN/structure_graph/set_of_cims.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class SetOfCims(object):
:_actual_cims: the cims of the node
"""

def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray):
def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray,
cims: np.ndarray = None):
"""Constructor Method
"""
self._node_id = node_id
Expand All @@ -36,7 +37,11 @@ def __init__(self, node_id: str, parents_states_number: typing.List, node_states
self._state_residence_times = None
self._transition_matrices = None
self._p_combs = p_combs
self.build_times_and_transitions_structures()

if cims is not None:
self._actual_cims = cims
else:
self.build_times_and_transitions_structures()

def build_times_and_transitions_structures(self) -> None:
"""Initializes at the correct dimensions the state residence times matrix and the state transition matrices.
Expand All @@ -60,7 +65,7 @@ def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarra
:type transition_matrices: numpy.ndArray
"""
for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices):
cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix)
cim_to_add = ConditionalIntensityMatrix(state_residence_times = state_res_time_vector, state_transition_matrix = transition_matrix)
cim_to_add.compute_cim_coefficients()
self._actual_cims.append(cim_to_add)
self._actual_cims = np.array(self._actual_cims)
Expand All @@ -82,7 +87,7 @@ def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.n
return self._actual_cims
else:
flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel()
return self._actual_cims[flat_indxs]
return np.array(self._actual_cims)[flat_indxs.astype(int)]

@property
def actual_cims(self) -> np.ndarray:
Expand Down
179 changes: 179 additions & 0 deletions PyCTBN/PyCTBN/structure_graph/trajectory_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
from ..utility.abstract_importer import AbstractImporter
from .conditional_intensity_matrix import ConditionalIntensityMatrix
from .set_of_cims import SetOfCims
from .trajectory import Trajectory
import numpy as np
import pandas as pd
import re
import json
from numpy import random
from multiprocessing import Process, Manager

class TrajectoryGenerator(object):
"""Provides the methods to generate a trajectory basing on the network defined
in the importer.
:param _importer: the Importer object which contains the imported and processed data
:type _importer: AbstractImporter
:param _vnames: List of the variables labels that belong to the network
:type _vnames: List
:param _parents: It contains, for each variable label (the key), the list of related parents labels
:type _parents: Dict
:param _cims: It contains, for each variable label (the key), the SetOfCims object related to it
:type _cims: Dict
:param _generated_trajectory: Result of the execution of CTBN_Sample, contains the output trajectory
:type _generated_trajectory: pandas.DataFrame
"""

def __init__(self, importer: AbstractImporter = None, variables: pd.DataFrame = None, dyn_str: pd.DataFrame = None, dyn_cims: dict = None):
"""Constructor Method
It parses and elaborates the data fetched from importer (if defined, otherwise variables, dyn_str and dyn_cims are used)
in order to make the objects structure more suitable for the forthcoming trajectory generation
"""

self._importer = importer

self._vnames = self._importer._df_variables.iloc[:, 0].to_list() if importer is not None else variables.iloc[:, 0].to_list()

self._parents = {}
for v in self._vnames:
if importer is not None:
self._parents[v] = self._importer._df_structure.where(self._importer._df_structure["To"] == v).dropna()["From"].tolist()
else:
self._parents[v] = dyn_str.where(dyn_str["To"] == v).dropna()["From"].tolist()

self._cims = {}
if importer is not None:
sampled_cims = self._importer._cims

for v in sampled_cims.keys():
p_combs = []
v_cims = []
for comb in sampled_cims[v].keys():
p_combs.append(np.array(re.findall(r"=(\d)", comb)).astype("int"))
cim = pd.DataFrame(sampled_cims[v][comb]).to_numpy()
v_cims.append(ConditionalIntensityMatrix(cim = cim))

if importer is not None:
sof = SetOfCims(node_id = v, parents_states_number = [self._importer._df_variables.where(self._importer._df_variables["Name"] == p)["Value"] for p in self._parents[v]],
node_states_number = self._importer._df_variables.where(self._importer._df_variables["Name"] == v)["Value"], p_combs = np.array(p_combs), cims = v_cims)
else:
sof = SetOfCims(node_id = v, parents_states_number = [variables.where(variables["Name"] == p)["Value"] for p in self._parents[v]],
node_states_number = variables.where(variables["Name"] == v)["Value"], p_combs = np.array(p_combs), cims = v_cims)
self._cims[v] = sof
else:
self._cims = dyn_cims

def CTBN_Sample(self, t_end = -1, max_tr = -1):
"""This method implements the generation of a trajectory, basing on the network structure and
on the coefficients defined in the CIMs.
The variables are initialized with value 0, and the method takes care of adding the
conventional last row made up of -1.
:param t_end: If defined, the sampling ends when end time is reached
:type t_end: float
:param max_tr: Parameter taken in consideration in case that t_end isn't defined. It specifies the number of transitions to execute
:type max_tr: int
"""

t = 0
sigma = pd.DataFrame(columns = (["Time"] + self._vnames))
sigma.loc[len(sigma)] = [0] + [0 for v in self._vnames]
time = np.full(len(self._vnames), np.NaN)
n_tr = 0
self._generated_trajectory = None

while True:
current_values = sigma.loc[len(sigma) - 1]

for i in range(0, time.size):
if np.isnan(time[i]):
n_parents = len(self._parents[self._vnames[i]])
cim_obj = self._cims[self._vnames[i]].filter_cims_with_mask(np.array([True for p in self._parents[self._vnames[i]]]),
[current_values.at[p] for p in self._parents[self._vnames[i]]])

if n_parents == 1:
cim = cim_obj[current_values.at[self._parents[self._vnames[i]][0]]].cim
else:
cim = cim_obj[0].cim

param = -1 * cim[current_values.at[self._vnames[i]]][current_values.at[self._vnames[i]]]

time[i] = t + random.exponential(scale = param)

next = time.argmin()
t = time[next]

if (max_tr != -1 and n_tr == max_tr) or (t_end != -1 and t >= t_end):
sigma.loc[len(sigma) - 1, self._vnames] = -1
self._generated_trajectory = sigma
return sigma
else:
n_parents = len(self._parents[self._vnames[next]])
cim_obj = self._cims[self._vnames[next]].filter_cims_with_mask(np.array([True for p in self._parents[self._vnames[next]]]),
[current_values.at[p] for p in self._parents[self._vnames[next]]])

if n_parents == 1:
cim = cim_obj[current_values.at[self._parents[self._vnames[next]][0]]].cim
else:
cim = cim_obj[0].cim

cim_row = np.array(cim[current_values.at[self._vnames[next]]])
cim_row[current_values.at[self._vnames[next]]] = 0
cim_row /= sum(cim_row)
rand_mult = np.random.multinomial(1, cim_row, size=1)

new_row = pd.DataFrame(sigma[-1:].values, columns = sigma.columns)
new_row.loc[0].at[self._vnames[next]] = np.where(rand_mult[0] == 1)[0][0]
new_row.loc[0].at["Time"] = round(t, 4)
sigma = sigma.append(new_row, ignore_index = True)

n_tr += 1

# undefine variable time
time[next] = np.NaN
for i, v in enumerate(self._parents):
if self._vnames[next] in self._parents[v]:
time[i] = np.NaN

def worker(self, t_end, max_tr, trajectories):
"""Single process that will be executed in parallel in order to generate one trajectory.
:param t_end: If defined, the sampling ends when end time is reached
:type t_end: float
:param max_tr: Parameter taken in consideration in case that t_end isn't defined. It specifies the number of transitions to execute
:type max_tr: int
:param trajectories: Shared list that contains to which the generated trajectory is added
:type trajectories: list
"""

trajectory = self.CTBN_Sample(t_end = t_end, max_tr = max_tr)
trajectories.append(trajectory)

def multi_trajectory(self, t_ends: list = None, max_trs: list = None):
"""Generate n trajectories in parallel, where n is the number of items in
t_ends, if defined, or the number of items in max_trs otherwise
:param t_ends: List of t_end values for the trajectories that will be generated
:type t_ends: list
:param max_trs: List of max_tr values for the trajectories that will be generated
:type max_trs: list
"""

if t_ends is None and max_trs is None:
return

trajectories = Manager().list()

if t_ends is not None:
processes = [Process(target = self.worker, args = (t, -1, trajectories)) for t in t_ends]
else:
processes = [Process(target = self.worker, args = (-1, m, trajectories)) for m in max_trs]

for p in processes:
p.start()

for p in processes:
p.join()

return trajectories
Loading

0 comments on commit f696b10

Please sign in to comment.