modified: docs/datasets.html

modified: docs/dsm_api.html modified: docs/dsm_torch.html modified: docs/index.html modified: docs/losses.html
autonlab · Oct 30, 2020 · 660497f · 660497f
1 parent 1ee7790
commit 660497f
Show file tree

Hide file tree

Showing 5 changed files with 87 additions and 25 deletions.
diff --git a/docs/datasets.html b/docs/datasets.html
@@ -80,6 +80,58 @@ <h1 class="title">Module <code>dsm.datasets</code></h1>
 
   return e, t
 
+def _load_framingham_dataset(sequential):
+  &#34;&#34;&#34;Helper function to load and preprocess the Framingham dataset.
+
+  The Framingham Dataset is a subset of 4,434 participants of the well known,
+  ongoing Framingham Heart study [1] for studying epidemiology for
+  hypertensive and arteriosclerotic cardiovascular disease. It is a popular
+  dataset for longitudinal survival analysis with time dependent covariates.
+
+  Parameters
+  ----------
+  sequential: bool
+    If True returns a list of np.arrays for each individual.
+    else, returns collapsed results for each time step. To train
+    recurrent neural models you would typically use True.
+
+  References
+  ----------
+  [1] Dawber, Thomas R., Gilcin F. Meadors, and Felix E. Moore Jr.
+  &#34;Epidemiological approaches to heart disease: the Framingham Study.&#34;
+  American Journal of Public Health and the Nations Health 41.3 (1951).
+
+  &#34;&#34;&#34;
+
+  data = pkgutil.get_data(__name__, &#39;datasets/framingham.csv&#39;)
+  data = pd.read_csv(io.BytesIO(data))
+
+  dat_cat = data[[&#39;SEX&#39;, &#39;CURSMOKE&#39;, &#39;DIABETES&#39;, &#39;BPMEDS&#39;,
+                  &#39;educ&#39;, &#39;PREVCHD&#39;, &#39;PREVAP&#39;, &#39;PREVMI&#39;,
+                  &#39;PREVSTRK&#39;, &#39;PREVHYP&#39;]]
+  dat_num = data[[&#39;TOTCHOL&#39;, &#39;AGE&#39;, &#39;SYSBP&#39;, &#39;DIABP&#39;,
+                  &#39;CIGPDAY&#39;, &#39;BMI&#39;, &#39;HEARTRTE&#39;, &#39;GLUCOSE&#39;]]
+
+  x1 = pd.get_dummies(dat_cat).values
+  x2 = dat_num.values
+  x = np.hstack([x1, x2])
+
+  time = (data[&#39;TIMEDTH&#39;] - data[&#39;TIME&#39;]).values
+  event = data[&#39;DEATH&#39;].values
+
+  x = SimpleImputer(missing_values=np.nan, strategy=&#39;mean&#39;).fit_transform(x)
+  x_ = StandardScaler().fit_transform(x)
+
+  if not sequential:
+    return x_, time, event
+  else:
+    x, t, e = [], [], []
+    for id_ in sorted(list(set(data[&#39;RANDID&#39;]))):
+      x.append(x_[data[&#39;RANDID&#39;] == id_])
+      t.append(time[data[&#39;RANDID&#39;] == id_])
+      e.append(event[data[&#39;RANDID&#39;] == id_])
+    return x, t, e
+
 def _load_pbc_dataset(sequential):
   &#34;&#34;&#34;Helper function to load and preprocess the PBC dataset
 
@@ -176,8 +228,8 @@ <h1 class="title">Module <code>dsm.datasets</code></h1>
   Parameters
   ----------
   dataset: str
-      The choice of dataset to load. Currently implemented is &#39;SUPPORT&#39;
-      and &#39;PBC&#39;.
+      The choice of dataset to load. Currently implemented is &#39;SUPPORT&#39;,
+      &#39;PBC&#39; and &#39;FRAMINGHAM&#39;.
   **kwargs: dict
       Dataset specific keyword arguments.
 
@@ -188,14 +240,16 @@ <h1 class="title">Module <code>dsm.datasets</code></h1>
       event times and the censoring indicators respectively.
 
   &#34;&#34;&#34;
+  sequential = kwargs.get(&#39;sequential&#39;, False)
 
   if dataset == &#39;SUPPORT&#39;:
     return _load_support_dataset()
   if dataset == &#39;PBC&#39;:
-    sequential = kwargs.get(&#39;sequential&#39;, False)
     return _load_pbc_dataset(sequential)
+  if dataset == &#39;FRAMINGHAM&#39;:
+    return _load_framingham_dataset(sequential)
   else:
-    return NotImplementedError(&#39;Dataset &#39;+dataset+&#39; not implemented.&#39;)</code></pre>
+    raise NotImplementedError(&#39;Dataset &#39;+dataset+&#39; not implemented.&#39;)</code></pre>
 </details>
 </section>
 <section>
@@ -239,8 +293,8 @@ <h2 class="section-title" id="header-functions">Functions</h2>
 <h2 id="parameters">Parameters</h2>
 <dl>
 <dt><strong><code>dataset</code></strong> :&ensp;<code>str</code></dt>
-<dd>The choice of dataset to load. Currently implemented is 'SUPPORT'
-and 'PBC'.</dd>
+<dd>The choice of dataset to load. Currently implemented is 'SUPPORT',
+'PBC' and 'FRAMINGHAM'.</dd>
 <dt><strong><code>**kwargs</code></strong> :&ensp;<code>dict</code></dt>
 <dd>Dataset specific keyword arguments.</dd>
 </dl>
@@ -260,8 +314,8 @@ <h2 id="returns">Returns</h2>
   Parameters
   ----------
   dataset: str
-      The choice of dataset to load. Currently implemented is &#39;SUPPORT&#39;
-      and &#39;PBC&#39;.
+      The choice of dataset to load. Currently implemented is &#39;SUPPORT&#39;,
+      &#39;PBC&#39; and &#39;FRAMINGHAM&#39;.
   **kwargs: dict
       Dataset specific keyword arguments.
 
@@ -272,14 +326,16 @@ <h2 id="returns">Returns</h2>
       event times and the censoring indicators respectively.
 
   &#34;&#34;&#34;
+  sequential = kwargs.get(&#39;sequential&#39;, False)
 
   if dataset == &#39;SUPPORT&#39;:
     return _load_support_dataset()
   if dataset == &#39;PBC&#39;:
-    sequential = kwargs.get(&#39;sequential&#39;, False)
     return _load_pbc_dataset(sequential)
+  if dataset == &#39;FRAMINGHAM&#39;:
+    return _load_framingham_dataset(sequential)
   else:
-    return NotImplementedError(&#39;Dataset &#39;+dataset+&#39; not implemented.&#39;)</code></pre>
+    raise NotImplementedError(&#39;Dataset &#39;+dataset+&#39; not implemented.&#39;)</code></pre>
 </details>
 </dd>
 </dl>

diff --git a/docs/dsm_api.html b/docs/dsm_api.html
@@ -142,7 +142,6 @@ <h1 class="title">Module <code>dsm.dsm_api</code></h1>
         A numpy array of the event/censoring times, \( t \).
     e: np.ndarray
         A numpy array of the event/censoring indicators, \( \delta \).
-
         \( \delta = 1 \) means the event took place.
     vsize: float
         Amount of data to set aside as the validation set.
@@ -259,6 +258,7 @@ <h1 class="title">Module <code>dsm.dsm_api</code></h1>
                       &#34;before calling `predict_risk`.&#34;)
 
 class DeepRecurrentSurvivalMachines(DeepSurvivalMachines):
+
   __doc__ = &#34;..warning:: Not Implemented&#34;
   pass
 
@@ -321,6 +321,7 @@ <h3>Inherited members</h3>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python">class DeepRecurrentSurvivalMachines(DeepSurvivalMachines):
+
   __doc__ = &#34;..warning:: Not Implemented&#34;
   pass</code></pre>
 </details>
@@ -467,7 +468,6 @@ <h2 id="example">Example</h2>
         A numpy array of the event/censoring times, \( t \).
     e: np.ndarray
         A numpy array of the event/censoring indicators, \( \delta \).
-
         \( \delta = 1 \) means the event took place.
     vsize: float
         Amount of data to set aside as the validation set.
@@ -601,10 +601,8 @@ <h2 id="parameters">Parameters</h2>
 <dt><strong><code>t</code></strong> :&ensp;<code>np.ndarray</code></dt>
 <dd>A numpy array of the event/censoring times, <span><span class="MathJax_Preview"> t </span><script type="math/tex"> t </script></span>.</dd>
 <dt><strong><code>e</code></strong> :&ensp;<code>np.ndarray</code></dt>
-<dd>
-<p>A numpy array of the event/censoring indicators, <span><span class="MathJax_Preview"> \delta </span><script type="math/tex"> \delta </script></span>.</p>
-<p><span><span class="MathJax_Preview"> \delta = 1 </span><script type="math/tex"> \delta = 1 </script></span> means the event took place.</p>
-</dd>
+<dd>A numpy array of the event/censoring indicators, <span><span class="MathJax_Preview"> \delta </span><script type="math/tex"> \delta </script></span>.
+<span><span class="MathJax_Preview"> \delta = 1 </span><script type="math/tex"> \delta = 1 </script></span> means the event took place.</dd>
 <dt><strong><code>vsize</code></strong> :&ensp;<code>float</code></dt>
 <dd>Amount of data to set aside as the validation set.</dd>
 <dt><strong><code>iters</code></strong> :&ensp;<code>int</code></dt>
@@ -641,7 +639,6 @@ <h2 id="parameters">Parameters</h2>
       A numpy array of the event/censoring times, \( t \).
   e: np.ndarray
       A numpy array of the event/censoring indicators, \( \delta \).
-
       \( \delta = 1 \) means the event took place.
   vsize: float
       Amount of data to set aside as the validation set.

diff --git a/docs/dsm_torch.html b/docs/dsm_torch.html
@@ -281,7 +281,6 @@ <h1 class="title">Module <code>dsm.dsm_torch</code></h1>
       self.embedding = nn.RNN(inputdim, hidden, layers,
                               bias=False, batch_first=True)
 
-    #self.embedding = nn.ReLU6(self.embedding)
 
 
   def forward(self, x):
@@ -518,7 +517,6 @@ <h2 id="parameters">Parameters</h2>
       self.embedding = nn.RNN(inputdim, hidden, layers,
                               bias=False, batch_first=True)
 
-    #self.embedding = nn.ReLU6(self.embedding)
 
 
   def forward(self, x):

diff --git a/docs/index.html b/docs/index.html
@@ -63,7 +63,13 @@ <h2 id="deep-recurrent-survival-machines">Deep Recurrent Survival Machines</h2>
 model and allows for learning of representations of the input covariates using
 <strong>Recurrent Neural Networks</strong> like <strong>LSTMs, GRUs</strong>. Deep Recurrent Survival
 Machines is a natural fit to model problems where there are time dependendent
-covariates.</p>
+covariates. Examples include situations where we are working with streaming
+data like vital signs, degradation monitoring signals in predictive
+maintainance. <strong>DRSM</strong> allows the learnt representations at each time step to
+involve historical context from previous time steps. <strong>DRSM</strong> implementation in
+<code><a title="dsm" href="#dsm">dsm</a></code> is carried out through an easy to use API that accepts lists of data
+streams and corresponding failure times. The module automatically takes care of
+appropriate batching and padding of variable length sequences.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning:&ensp;Not Implemented Yet!</p>
 </div>
@@ -188,7 +194,14 @@ <h2 id="license">License</h2>
 model and allows for learning of representations of the input covariates using
 **Recurrent Neural Networks** like **LSTMs, GRUs**. Deep Recurrent Survival
 Machines is a natural fit to model problems where there are time dependendent
-covariates.
+covariates. Examples include situations where we are working with streaming
+data like vital signs, degradation monitoring signals in predictive
+maintainance. **DRSM** allows the learnt representations at each time step to
+involve historical context from previous time steps. **DRSM** implementation in
+`dsm` is carried out through an easy to use API that accepts lists of data
+streams and corresponding failure times. The module automatically takes care of
+appropriate batching and padding of variable length sequences.
+
 
 ..warning:: Not Implemented Yet!
 

diff --git a/docs/losses.html b/docs/losses.html
@@ -192,8 +192,6 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
   alpha = model.discount
   shape, scale, logits = model.forward(x)
 
-  #print (shape, scale, logits)
-
   k_ = shape
   b_ = scale
 
@@ -258,7 +256,7 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
   b_ = scale
 
   t_horz = torch.tensor(t_horizon).double()
-  t_horz = t_horz.repeat(x.shape[0], 1)
+  t_horz = t_horz.repeat(shape.shape[0], 1)
 
   cdfs = []
   for j in range(len(t_horizon)):
@@ -292,7 +290,7 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
   b_ = scale
 
   t_horz = torch.tensor(t_horizon).double()
-  t_horz = t_horz.repeat(x.shape[0], 1)
+  t_horz = t_horz.repeat(shape.shape[0], 1)
 
   cdfs = []