diff --git a/docs/datasets.html b/docs/datasets.html index 8a0a89a..1ac85fb 100644 --- a/docs/datasets.html +++ b/docs/datasets.html @@ -80,6 +80,58 @@

Module dsm.datasets

return e, t +def _load_framingham_dataset(sequential): + """Helper function to load and preprocess the Framingham dataset. + + The Framingham Dataset is a subset of 4,434 participants of the well known, + ongoing Framingham Heart study [1] for studying epidemiology for + hypertensive and arteriosclerotic cardiovascular disease. It is a popular + dataset for longitudinal survival analysis with time dependent covariates. + + Parameters + ---------- + sequential: bool + If True returns a list of np.arrays for each individual. + else, returns collapsed results for each time step. To train + recurrent neural models you would typically use True. + + References + ---------- + [1] Dawber, Thomas R., Gilcin F. Meadors, and Felix E. Moore Jr. + "Epidemiological approaches to heart disease: the Framingham Study." + American Journal of Public Health and the Nations Health 41.3 (1951). + + """ + + data = pkgutil.get_data(__name__, 'datasets/framingham.csv') + data = pd.read_csv(io.BytesIO(data)) + + dat_cat = data[['SEX', 'CURSMOKE', 'DIABETES', 'BPMEDS', + 'educ', 'PREVCHD', 'PREVAP', 'PREVMI', + 'PREVSTRK', 'PREVHYP']] + dat_num = data[['TOTCHOL', 'AGE', 'SYSBP', 'DIABP', + 'CIGPDAY', 'BMI', 'HEARTRTE', 'GLUCOSE']] + + x1 = pd.get_dummies(dat_cat).values + x2 = dat_num.values + x = np.hstack([x1, x2]) + + time = (data['TIMEDTH'] - data['TIME']).values + event = data['DEATH'].values + + x = SimpleImputer(missing_values=np.nan, strategy='mean').fit_transform(x) + x_ = StandardScaler().fit_transform(x) + + if not sequential: + return x_, time, event + else: + x, t, e = [], [], [] + for id_ in sorted(list(set(data['RANDID']))): + x.append(x_[data['RANDID'] == id_]) + t.append(time[data['RANDID'] == id_]) + e.append(event[data['RANDID'] == id_]) + return x, t, e + def _load_pbc_dataset(sequential): """Helper function to load and preprocess the PBC dataset @@ -176,8 +228,8 @@

Module dsm.datasets

Parameters ---------- dataset: str - The choice of dataset to load. Currently implemented is 'SUPPORT' - and 'PBC'. + The choice of dataset to load. Currently implemented is 'SUPPORT', + 'PBC' and 'FRAMINGHAM'. **kwargs: dict Dataset specific keyword arguments. @@ -188,14 +240,16 @@

Module dsm.datasets

event times and the censoring indicators respectively. """ + sequential = kwargs.get('sequential', False) if dataset == 'SUPPORT': return _load_support_dataset() if dataset == 'PBC': - sequential = kwargs.get('sequential', False) return _load_pbc_dataset(sequential) + if dataset == 'FRAMINGHAM': + return _load_framingham_dataset(sequential) else: - return NotImplementedError('Dataset '+dataset+' not implemented.') + raise NotImplementedError('Dataset '+dataset+' not implemented.')
@@ -239,8 +293,8 @@

Functions

Parameters

dataset : str
-
The choice of dataset to load. Currently implemented is 'SUPPORT' -and 'PBC'.
+
The choice of dataset to load. Currently implemented is 'SUPPORT', +'PBC' and 'FRAMINGHAM'.
**kwargs : dict
Dataset specific keyword arguments.
@@ -260,8 +314,8 @@

Returns

Parameters ---------- dataset: str - The choice of dataset to load. Currently implemented is 'SUPPORT' - and 'PBC'. + The choice of dataset to load. Currently implemented is 'SUPPORT', + 'PBC' and 'FRAMINGHAM'. **kwargs: dict Dataset specific keyword arguments. @@ -272,14 +326,16 @@

Returns

event times and the censoring indicators respectively. """ + sequential = kwargs.get('sequential', False) if dataset == 'SUPPORT': return _load_support_dataset() if dataset == 'PBC': - sequential = kwargs.get('sequential', False) return _load_pbc_dataset(sequential) + if dataset == 'FRAMINGHAM': + return _load_framingham_dataset(sequential) else: - return NotImplementedError('Dataset '+dataset+' not implemented.') + raise NotImplementedError('Dataset '+dataset+' not implemented.') diff --git a/docs/dsm_api.html b/docs/dsm_api.html index d8009a2..b8a190a 100644 --- a/docs/dsm_api.html +++ b/docs/dsm_api.html @@ -142,7 +142,6 @@

Module dsm.dsm_api

A numpy array of the event/censoring times, \( t \). e: np.ndarray A numpy array of the event/censoring indicators, \( \delta \). - \( \delta = 1 \) means the event took place. vsize: float Amount of data to set aside as the validation set. @@ -259,6 +258,7 @@

Module dsm.dsm_api

"before calling `predict_risk`.") class DeepRecurrentSurvivalMachines(DeepSurvivalMachines): + __doc__ = "..warning:: Not Implemented" pass @@ -321,6 +321,7 @@

Inherited members

Expand source code
class DeepRecurrentSurvivalMachines(DeepSurvivalMachines):
+
   __doc__ = "..warning:: Not Implemented"
   pass
@@ -467,7 +468,6 @@

Example

A numpy array of the event/censoring times, \( t \). e: np.ndarray A numpy array of the event/censoring indicators, \( \delta \). - \( \delta = 1 \) means the event took place. vsize: float Amount of data to set aside as the validation set. @@ -601,10 +601,8 @@

Parameters

t : np.ndarray
A numpy array of the event/censoring times, t .
e : np.ndarray
-
-

A numpy array of the event/censoring indicators, \delta .

-

\delta = 1 means the event took place.

-
+
A numpy array of the event/censoring indicators, \delta . + \delta = 1 means the event took place.
vsize : float
Amount of data to set aside as the validation set.
iters : int
@@ -641,7 +639,6 @@

Parameters

A numpy array of the event/censoring times, \( t \). e: np.ndarray A numpy array of the event/censoring indicators, \( \delta \). - \( \delta = 1 \) means the event took place. vsize: float Amount of data to set aside as the validation set. diff --git a/docs/dsm_torch.html b/docs/dsm_torch.html index 36d8a1d..3459ffb 100644 --- a/docs/dsm_torch.html +++ b/docs/dsm_torch.html @@ -281,7 +281,6 @@

Module dsm.dsm_torch

self.embedding = nn.RNN(inputdim, hidden, layers, bias=False, batch_first=True) - #self.embedding = nn.ReLU6(self.embedding) def forward(self, x): @@ -518,7 +517,6 @@

Parameters

self.embedding = nn.RNN(inputdim, hidden, layers, bias=False, batch_first=True) - #self.embedding = nn.ReLU6(self.embedding) def forward(self, x): diff --git a/docs/index.html b/docs/index.html index f460ff4..658f5b4 100644 --- a/docs/index.html +++ b/docs/index.html @@ -63,7 +63,13 @@

Deep Recurrent Survival Machines

model and allows for learning of representations of the input covariates using Recurrent Neural Networks like LSTMs, GRUs. Deep Recurrent Survival Machines is a natural fit to model problems where there are time dependendent -covariates.

+covariates. Examples include situations where we are working with streaming +data like vital signs, degradation monitoring signals in predictive +maintainance. DRSM allows the learnt representations at each time step to +involve historical context from previous time steps. DRSM implementation in +dsm is carried out through an easy to use API that accepts lists of data +streams and corresponding failure times. The module automatically takes care of +appropriate batching and padding of variable length sequences.

Warning: Not Implemented Yet!

@@ -188,7 +194,14 @@

License

model and allows for learning of representations of the input covariates using **Recurrent Neural Networks** like **LSTMs, GRUs**. Deep Recurrent Survival Machines is a natural fit to model problems where there are time dependendent -covariates. +covariates. Examples include situations where we are working with streaming +data like vital signs, degradation monitoring signals in predictive +maintainance. **DRSM** allows the learnt representations at each time step to +involve historical context from previous time steps. **DRSM** implementation in +`dsm` is carried out through an easy to use API that accepts lists of data +streams and corresponding failure times. The module automatically takes care of +appropriate batching and padding of variable length sequences. + ..warning:: Not Implemented Yet! diff --git a/docs/losses.html b/docs/losses.html index 5df6074..b104f8c 100644 --- a/docs/losses.html +++ b/docs/losses.html @@ -192,8 +192,6 @@

Module dsm.losses

alpha = model.discount shape, scale, logits = model.forward(x) - #print (shape, scale, logits) - k_ = shape b_ = scale @@ -258,7 +256,7 @@

Module dsm.losses

b_ = scale t_horz = torch.tensor(t_horizon).double() - t_horz = t_horz.repeat(x.shape[0], 1) + t_horz = t_horz.repeat(shape.shape[0], 1) cdfs = [] for j in range(len(t_horizon)): @@ -292,7 +290,7 @@

Module dsm.losses

b_ = scale t_horz = torch.tensor(t_horizon).double() - t_horz = t_horz.repeat(x.shape[0], 1) + t_horz = t_horz.repeat(shape.shape[0], 1) cdfs = []