diff --git a/.travis.yml b/.travis.yml
index 1f74dbe..a817389 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,7 +5,6 @@ python:
   - "3.8"
 os:
   - linux
-  - osx
 # command to install dependencies
 install:
   - pip install -r requirements.txt
@@ -15,4 +14,4 @@ install:
 # command to run tests
 script:
   - python -m pytest tests/
-  - pylint --fail-under=9 dsm/
+  - pylint --fail-under=8 dsm/
diff --git a/docs/datasets.html b/docs/datasets.html
index 1e77ccd..8a0a89a 100644
--- a/docs/datasets.html
+++ b/docs/datasets.html
@@ -31,19 +31,24 @@ <h1 class="title">Module <code>dsm.datasets</code></h1>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python"># coding=utf-8
-# Copyright 2020 Chirag Nagpal, Auton Lab.
+# Copyright 2020 Chirag Nagpal
 #
-# Licensed under the Apache License, Version 2.0 (the &#34;License&#34;);
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an &#34;AS IS&#34; BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# This file is part of Deep Survival Machines.
+
+# Deep Survival Machines is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# Deep Survival Machines is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with Deep Survival Machines.
+# If not, see &lt;https://www.gnu.org/licenses/&gt;.
+
 
 &#34;&#34;&#34;Utility functions to load standard datasets to train and evaluate the
 Deep Survival Machines models.
@@ -75,13 +80,21 @@ <h1 class="title">Module <code>dsm.datasets</code></h1>
 
   return e, t
 
-def _load_pbc_dataset():
+def _load_pbc_dataset(sequential):
   &#34;&#34;&#34;Helper function to load and preprocess the PBC dataset
 
   The Primary biliary cirrhosis (PBC) Dataset [1] is well known
   dataset for evaluating survival analysis models with time
   dependent covariates.
 
+  Parameters
+  ----------
+  sequential: bool
+    If True returns a list of np.arrays for each individual.
+    else, returns collapsed results for each time step. To train
+    recurrent neural models you would typically use True.
+
+
   References
   ----------
   [1] Fleming, Thomas R., and David P. Harrington. Counting processes and
@@ -89,7 +102,36 @@ <h1 class="title">Module <code>dsm.datasets</code></h1>
 
   &#34;&#34;&#34;
 
-  raise NotImplementedError(&#39;&#39;)
+  data = pkgutil.get_data(__name__, &#39;datasets/pbc2.csv&#39;)
+  data = pd.read_csv(io.BytesIO(data))
+
+  data[&#39;histologic&#39;] = data[&#39;histologic&#39;].astype(str)
+  dat_cat = data[[&#39;drug&#39;, &#39;sex&#39;, &#39;ascites&#39;, &#39;hepatomegaly&#39;,
+                  &#39;spiders&#39;, &#39;edema&#39;, &#39;histologic&#39;]]
+  dat_num = data[[&#39;serBilir&#39;, &#39;serChol&#39;, &#39;albumin&#39;, &#39;alkaline&#39;,
+                  &#39;SGOT&#39;, &#39;platelets&#39;, &#39;prothrombin&#39;]]
+  age = data[&#39;age&#39;] + data[&#39;years&#39;]
+
+  x1 = pd.get_dummies(dat_cat).values
+  x2 = dat_num.values
+  x3 = age.values.reshape(-1, 1)
+  x = np.hstack([x1, x2, x3])
+
+  time = (data[&#39;years&#39;] - data[&#39;year&#39;]).values
+  event = data[&#39;status2&#39;].values
+
+  x = SimpleImputer(missing_values=np.nan, strategy=&#39;mean&#39;).fit_transform(x)
+  x_ = StandardScaler().fit_transform(x)
+
+  if not sequential:
+    return x_, time, event
+  else:
+    x, t, e = [], [], []
+    for id_ in sorted(list(set(data[&#39;id&#39;]))):
+      x.append(x_[data[&#39;id&#39;] == id_])
+      t.append(time[data[&#39;id&#39;] == id_])
+      e.append(event[data[&#39;id&#39;] == id_])
+    return x, t, e
 
 def _load_support_dataset():
   &#34;&#34;&#34;Helper function to load and preprocess the SUPPORT dataset.
@@ -128,13 +170,16 @@ <h1 class="title">Module <code>dsm.datasets</code></h1>
   return x[remove], t[remove], e[remove]
 
 
-def load_dataset(dataset=&#39;SUPPORT&#39;):
+def load_dataset(dataset=&#39;SUPPORT&#39;, **kwargs):
   &#34;&#34;&#34;Helper function to load datasets to test Survival Analysis models.
 
   Parameters
   ----------
   dataset: str
-      The choice of dataset to load. Currently implemented is &#39;SUPPORT&#39;.
+      The choice of dataset to load. Currently implemented is &#39;SUPPORT&#39;
+      and &#39;PBC&#39;.
+  **kwargs: dict
+      Dataset specific keyword arguments.
 
   Returns
   ----------
@@ -146,6 +191,9 @@ <h1 class="title">Module <code>dsm.datasets</code></h1>
 
   if dataset == &#39;SUPPORT&#39;:
     return _load_support_dataset()
+  if dataset == &#39;PBC&#39;:
+    sequential = kwargs.get(&#39;sequential&#39;, False)
+    return _load_pbc_dataset(sequential)
   else:
     return NotImplementedError(&#39;Dataset &#39;+dataset+&#39; not implemented.&#39;)</code></pre>
 </details>
@@ -184,14 +232,17 @@ <h2 class="section-title" id="header-functions">Functions</h2>
 </details>
 </dd>
 <dt id="dsm.datasets.load_dataset"><code class="name flex">
-<span>def <span class="ident">load_dataset</span></span>(<span>dataset='SUPPORT')</span>
+<span>def <span class="ident">load_dataset</span></span>(<span>dataset='SUPPORT', **kwargs)</span>
 </code></dt>
 <dd>
 <div class="desc"><p>Helper function to load datasets to test Survival Analysis models.</p>
 <h2 id="parameters">Parameters</h2>
 <dl>
 <dt><strong><code>dataset</code></strong> :&ensp;<code>str</code></dt>
-<dd>The choice of dataset to load. Currently implemented is 'SUPPORT'.</dd>
+<dd>The choice of dataset to load. Currently implemented is 'SUPPORT'
+and 'PBC'.</dd>
+<dt><strong><code>**kwargs</code></strong> :&ensp;<code>dict</code></dt>
+<dd>Dataset specific keyword arguments.</dd>
 </dl>
 <h2 id="returns">Returns</h2>
 <dl>
@@ -203,13 +254,16 @@ <h2 id="returns">Returns</h2>
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">def load_dataset(dataset=&#39;SUPPORT&#39;):
+<pre><code class="python">def load_dataset(dataset=&#39;SUPPORT&#39;, **kwargs):
   &#34;&#34;&#34;Helper function to load datasets to test Survival Analysis models.
 
   Parameters
   ----------
   dataset: str
-      The choice of dataset to load. Currently implemented is &#39;SUPPORT&#39;.
+      The choice of dataset to load. Currently implemented is &#39;SUPPORT&#39;
+      and &#39;PBC&#39;.
+  **kwargs: dict
+      Dataset specific keyword arguments.
 
   Returns
   ----------
@@ -221,6 +275,9 @@ <h2 id="returns">Returns</h2>
 
   if dataset == &#39;SUPPORT&#39;:
     return _load_support_dataset()
+  if dataset == &#39;PBC&#39;:
+    sequential = kwargs.get(&#39;sequential&#39;, False)
+    return _load_pbc_dataset(sequential)
   else:
     return NotImplementedError(&#39;Dataset &#39;+dataset+&#39; not implemented.&#39;)</code></pre>
 </details>
diff --git a/docs/datautils.html b/docs/datautils.html
deleted file mode 100644
index bd5779f..0000000
--- a/docs/datautils.html
+++ /dev/null
@@ -1,278 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
-<meta name="generator" content="pdoc 0.9.1" />
-<title>dsm.datautils API documentation</title>
-<meta name="description" content="" />
-<link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
-<link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
-<link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>dsm.datautils</code></h1>
-</header>
-<section id="section-intro">
-<details class="source">
-<summary>
-<span>Expand source code</span>
-</summary>
-<pre><code class="python"># coding=utf-8
-# Copyright 2020 Chirag Nagpal, Auton Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the &#34;License&#34;);
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an &#34;AS IS&#34; BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import io, pkgutil
-
-import pandas as pd
-import numpy as np
-
-from sklearn.impute import SimpleImputer
-from sklearn.preprocessing import StandardScaler
-
-def increase_censoring(e, t, p):
-
-  uncens = np.where(e == 1)[0]
-  mask = np.random.choice([False, True], len(uncens), p=[1-p, p])
-  toswitch = uncens[mask]
-
-  e[toswitch] = 0
-  t_ = t[toswitch]
-
-  newt = []
-  for t__ in t_:
-    newt.append(np.random.uniform(1, t__))
-  t[toswitch] = newt
-
-  return e, t
-
-def load_support_dataset():
-
-  &#34;&#34;&#34;Helper function to load and preprocess the SUPPORT dataset.
-
-  The SUPPORT Dataset comes from the Vanderbilt University study
-  to estimate survival for seriously ill hospitalized adults [1].
-
-  Please refer to http://biostat.mc.vanderbilt.edu/wiki/Main/SupportDesc.
-  for the original datasource.
-
-  [1]: Knaus WA, Harrell FE, Lynn J et al. (1995): The SUPPORT prognostic
-  model: Objective estimates of survival for seriously ill hospitalized
-  adults. Annals of Internal Medicine 122:191-203.
-
-  &#34;&#34;&#34;
-  data = pkgutil.get_data(__name__, &#39;datasets/support2.csv&#39;)
-  data = pd.read_csv(io.BytesIO(data))
-  x1 = data[[&#39;age&#39;, &#39;num.co&#39;, &#39;meanbp&#39;, &#39;wblc&#39;, &#39;hrt&#39;, &#39;resp&#39;, &#39;temp&#39;,
-             &#39;pafi&#39;, &#39;alb&#39;, &#39;bili&#39;, &#39;crea&#39;, &#39;sod&#39;, &#39;ph&#39;, &#39;glucose&#39;, &#39;bun&#39;,
-             &#39;urine&#39;, &#39;adlp&#39;, &#39;adls&#39;]]
-
-  catfeats = [&#39;sex&#39;, &#39;dzgroup&#39;, &#39;dzclass&#39;, &#39;income&#39;, &#39;race&#39;, &#39;ca&#39;]
-  x2 = pd.get_dummies(data[catfeats])
-
-  x = np.concatenate([x1, x2], axis=1)
-  t = data[&#39;d.time&#39;].values
-  e = data[&#39;death&#39;].values
-
-  x = SimpleImputer(missing_values=np.nan, strategy=&#39;mean&#39;).fit_transform(x)
-  x = StandardScaler().fit_transform(x)
-
-  remove = ~np.isnan(t)
-  return x[remove], t[remove], e[remove]
-
-
-def load_dataset(dataset=&#39;SUPPORT&#39;):
-  &#34;&#34;&#34;Helper function to load datasets to test Survival Analysis models.
-
-  Parameters
-  ----------
-  dataset: str
-      The choice of dataset to load. Currently implemented is &#39;SUPPORT&#39;.
-
-  Returns
-  ----------
-  tuple: (np.ndarray, np.ndarray, np.ndarray)
-      A tuple of the form of (x, t, e) where x, t, e are the input covariates,
-      event times and the censoring indicators respectively.
-
-  &#34;&#34;&#34;
-
-  if dataset == &#39;SUPPORT&#39;:
-    return _load_support_dataset()</code></pre>
-</details>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-functions">Functions</h2>
-<dl>
-<dt id="dsm.datautils.increase_censoring"><code class="name flex">
-<span>def <span class="ident">increase_censoring</span></span>(<span>e, t, p)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-<details class="source">
-<summary>
-<span>Expand source code</span>
-</summary>
-<pre><code class="python">def increase_censoring(e, t, p):
-
-  uncens = np.where(e == 1)[0]
-  mask = np.random.choice([False, True], len(uncens), p=[1-p, p])
-  toswitch = uncens[mask]
-
-  e[toswitch] = 0
-  t_ = t[toswitch]
-
-  newt = []
-  for t__ in t_:
-    newt.append(np.random.uniform(1, t__))
-  t[toswitch] = newt
-
-  return e, t</code></pre>
-</details>
-</dd>
-<dt id="dsm.datautils.load_dataset"><code class="name flex">
-<span>def <span class="ident">load_dataset</span></span>(<span>dataset='SUPPORT')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Helper function to load datasets to test Survival Analysis models.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>dataset</code></strong> :&ensp;<code>str</code></dt>
-<dd>The choice of dataset to load. Currently implemented is 'SUPPORT'.</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>tuple</code></strong> :&ensp;<code>(np.ndarray, np.ndarray, np.ndarray)</code></dt>
-<dd>A tuple of the form of (x, t, e) where x, t, e are the input covariates,
-event times and the censoring indicators respectively.</dd>
-</dl></div>
-<details class="source">
-<summary>
-<span>Expand source code</span>
-</summary>
-<pre><code class="python">def load_dataset(dataset=&#39;SUPPORT&#39;):
-  &#34;&#34;&#34;Helper function to load datasets to test Survival Analysis models.
-
-  Parameters
-  ----------
-  dataset: str
-      The choice of dataset to load. Currently implemented is &#39;SUPPORT&#39;.
-
-  Returns
-  ----------
-  tuple: (np.ndarray, np.ndarray, np.ndarray)
-      A tuple of the form of (x, t, e) where x, t, e are the input covariates,
-      event times and the censoring indicators respectively.
-
-  &#34;&#34;&#34;
-
-  if dataset == &#39;SUPPORT&#39;:
-    return _load_support_dataset()</code></pre>
-</details>
-</dd>
-<dt id="dsm.datautils.load_support_dataset"><code class="name flex">
-<span>def <span class="ident">load_support_dataset</span></span>(<span>)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Helper function to load and preprocess the SUPPORT dataset.</p>
-<p>The SUPPORT Dataset comes from the Vanderbilt University study
-to estimate survival for seriously ill hospitalized adults [1].</p>
-<p>Please refer to <a href="http://biostat.mc.vanderbilt.edu/wiki/Main/SupportDesc.">http://biostat.mc.vanderbilt.edu/wiki/Main/SupportDesc.</a>
-for the original datasource.</p>
-<p>[1]: Knaus WA, Harrell FE, Lynn J et al. (1995): The SUPPORT prognostic
-model: Objective estimates of survival for seriously ill hospitalized
-adults. Annals of Internal Medicine 122:191-203.</p></div>
-<details class="source">
-<summary>
-<span>Expand source code</span>
-</summary>
-<pre><code class="python">def load_support_dataset():
-
-  &#34;&#34;&#34;Helper function to load and preprocess the SUPPORT dataset.
-
-  The SUPPORT Dataset comes from the Vanderbilt University study
-  to estimate survival for seriously ill hospitalized adults [1].
-
-  Please refer to http://biostat.mc.vanderbilt.edu/wiki/Main/SupportDesc.
-  for the original datasource.
-
-  [1]: Knaus WA, Harrell FE, Lynn J et al. (1995): The SUPPORT prognostic
-  model: Objective estimates of survival for seriously ill hospitalized
-  adults. Annals of Internal Medicine 122:191-203.
-
-  &#34;&#34;&#34;
-  data = pkgutil.get_data(__name__, &#39;datasets/support2.csv&#39;)
-  data = pd.read_csv(io.BytesIO(data))
-  x1 = data[[&#39;age&#39;, &#39;num.co&#39;, &#39;meanbp&#39;, &#39;wblc&#39;, &#39;hrt&#39;, &#39;resp&#39;, &#39;temp&#39;,
-             &#39;pafi&#39;, &#39;alb&#39;, &#39;bili&#39;, &#39;crea&#39;, &#39;sod&#39;, &#39;ph&#39;, &#39;glucose&#39;, &#39;bun&#39;,
-             &#39;urine&#39;, &#39;adlp&#39;, &#39;adls&#39;]]
-
-  catfeats = [&#39;sex&#39;, &#39;dzgroup&#39;, &#39;dzclass&#39;, &#39;income&#39;, &#39;race&#39;, &#39;ca&#39;]
-  x2 = pd.get_dummies(data[catfeats])
-
-  x = np.concatenate([x1, x2], axis=1)
-  t = data[&#39;d.time&#39;].values
-  e = data[&#39;death&#39;].values
-
-  x = SimpleImputer(missing_values=np.nan, strategy=&#39;mean&#39;).fit_transform(x)
-  x = StandardScaler().fit_transform(x)
-
-  remove = ~np.isnan(t)
-  return x[remove], t[remove], e[remove]</code></pre>
-</details>
-</dd>
-</dl>
-</section>
-<section>
-</section>
-</article>
-<nav id="sidebar">
-<h1>Index</h1>
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="dsm" href="index.html">dsm</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-functions">Functions</a></h3>
-<ul class="">
-<li><code><a title="dsm.datautils.increase_censoring" href="#dsm.datautils.increase_censoring">increase_censoring</a></code></li>
-<li><code><a title="dsm.datautils.load_dataset" href="#dsm.datautils.load_dataset">load_dataset</a></code></li>
-<li><code><a title="dsm.datautils.load_support_dataset" href="#dsm.datautils.load_support_dataset">load_support_dataset</a></code></li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.9.1</a>.</p>
-</footer>
-</body>
-</html>
\ No newline at end of file
diff --git a/docs/dsm_api.html b/docs/dsm_api.html
index 8d180e5..d8009a2 100644
--- a/docs/dsm_api.html
+++ b/docs/dsm_api.html
@@ -31,19 +31,24 @@ <h1 class="title">Module <code>dsm.dsm_api</code></h1>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python"># coding=utf-8
-# Copyright 2020 Chirag Nagpal, Auton Lab.
+# Copyright 2020 Chirag Nagpal
 #
-# Licensed under the Apache License, Version 2.0 (the &#34;License&#34;);
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an &#34;AS IS&#34; BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# This file is part of Deep Survival Machines.
+
+# Deep Survival Machines is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# Deep Survival Machines is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with Deep Survival Machines.
+# If not, see &lt;https://www.gnu.org/licenses/&gt;.
+
 
 &#34;&#34;&#34;
 This module is a wrapper around torch implementations and
@@ -55,7 +60,6 @@ <h1 class="title">Module <code>dsm.dsm_api</code></h1>
 from dsm.utilities import train_dsm
 
 import torch
-
 import numpy as np
 
 class DeepSurvivalMachines():
@@ -70,7 +74,7 @@ <h1 class="title">Module <code>dsm.dsm_api</code></h1>
 
   References
   ----------
-  [1] &lt;a href=&#34;https://arxiv.org/abs/2003.01176&#34;&gt;Deep Survival Machines: 
+  [1] &lt;a href=&#34;https://arxiv.org/abs/2003.01176&#34;&gt;Deep Survival Machines:
   Fully Parametric Survival Regression and
   Representation Learning for Censored Data with Competing Risks.&#34;
   arXiv preprint arXiv:2003.01176 (2020)&lt;/a&gt;
@@ -124,7 +128,7 @@ <h1 class="title">Module <code>dsm.dsm_api</code></h1>
     print(&#34;Distribution Choice:&#34;, self.dist)
 
 
-  def fit(self, x, t, e, vsize=0.15, 
+  def fit(self, x, t, e, vsize=0.15,
           iters=1, learning_rate=1e-3, batch_size=100,
           elbo=True, optimizer=&#34;Adam&#34;, random_state=100):
 
@@ -150,10 +154,10 @@ <h1 class="title">Module <code>dsm.dsm_api</code></h1>
         learning is performed on mini-batches of input data. this parameter
         specifies the size of each mini-batch.
     elbo: bool
-        Whether to use the Evidence Lower Bound for Optimization.
+        Whether to use the Evidence Lower Bound for optimization.
         Default is True.
     optimizer: str
-        The choice of the gradient based optimization method. One of 
+        The choice of the gradient based optimization method. One of
         &#39;Adam&#39;, &#39;RMSProp&#39; or &#39;SGD&#39;.
     random_state: float
         random seed that determines how the validation set is chosen.
@@ -182,7 +186,6 @@ <h1 class="title">Module <code>dsm.dsm_api</code></h1>
       model = DeepSurvivalMachinesTorch(inputdim,
                                         k=self.k,
                                         layers=self.layers,
-                                        init=False, 
                                         dist=self.dist,
                                         temp=self.temp,
                                         discount=self.discount,
@@ -204,7 +207,7 @@ <h1 class="title">Module <code>dsm.dsm_api</code></h1>
 
 
   def predict_risk(self, x, t):
-    &#34;&#34;&#34;Returns the estimated risk of an event occuring before time \( t \),
+    &#34;&#34;&#34;Returns the estimated risk of an event occuring before time \( t \)
       \( \widehat{\mathbb{P}}(T\leq t|X) \) for some input data \( x \).
 
     Parameters
@@ -396,7 +399,7 @@ <h2 id="example">Example</h2>
 
   References
   ----------
-  [1] &lt;a href=&#34;https://arxiv.org/abs/2003.01176&#34;&gt;Deep Survival Machines: 
+  [1] &lt;a href=&#34;https://arxiv.org/abs/2003.01176&#34;&gt;Deep Survival Machines:
   Fully Parametric Survival Regression and
   Representation Learning for Censored Data with Competing Risks.&#34;
   arXiv preprint arXiv:2003.01176 (2020)&lt;/a&gt;
@@ -450,7 +453,7 @@ <h2 id="example">Example</h2>
     print(&#34;Distribution Choice:&#34;, self.dist)
 
 
-  def fit(self, x, t, e, vsize=0.15, 
+  def fit(self, x, t, e, vsize=0.15,
           iters=1, learning_rate=1e-3, batch_size=100,
           elbo=True, optimizer=&#34;Adam&#34;, random_state=100):
 
@@ -476,10 +479,10 @@ <h2 id="example">Example</h2>
         learning is performed on mini-batches of input data. this parameter
         specifies the size of each mini-batch.
     elbo: bool
-        Whether to use the Evidence Lower Bound for Optimization.
+        Whether to use the Evidence Lower Bound for optimization.
         Default is True.
     optimizer: str
-        The choice of the gradient based optimization method. One of 
+        The choice of the gradient based optimization method. One of
         &#39;Adam&#39;, &#39;RMSProp&#39; or &#39;SGD&#39;.
     random_state: float
         random seed that determines how the validation set is chosen.
@@ -508,7 +511,6 @@ <h2 id="example">Example</h2>
       model = DeepSurvivalMachinesTorch(inputdim,
                                         k=self.k,
                                         layers=self.layers,
-                                        init=False, 
                                         dist=self.dist,
                                         temp=self.temp,
                                         discount=self.discount,
@@ -530,7 +532,7 @@ <h2 id="example">Example</h2>
 
 
   def predict_risk(self, x, t):
-    &#34;&#34;&#34;Returns the estimated risk of an event occuring before time \( t \),
+    &#34;&#34;&#34;Returns the estimated risk of an event occuring before time \( t \)
       \( \widehat{\mathbb{P}}(T\leq t|X) \) for some input data \( x \).
 
     Parameters
@@ -613,7 +615,7 @@ <h2 id="parameters">Parameters</h2>
 <dd>learning is performed on mini-batches of input data. this parameter
 specifies the size of each mini-batch.</dd>
 <dt><strong><code>elbo</code></strong> :&ensp;<code>bool</code></dt>
-<dd>Whether to use the Evidence Lower Bound for Optimization.
+<dd>Whether to use the Evidence Lower Bound for optimization.
 Default is True.</dd>
 <dt><strong><code>optimizer</code></strong> :&ensp;<code>str</code></dt>
 <dd>The choice of the gradient based optimization method. One of
@@ -625,7 +627,7 @@ <h2 id="parameters">Parameters</h2>
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">def fit(self, x, t, e, vsize=0.15, 
+<pre><code class="python">def fit(self, x, t, e, vsize=0.15,
         iters=1, learning_rate=1e-3, batch_size=100,
         elbo=True, optimizer=&#34;Adam&#34;, random_state=100):
 
@@ -651,10 +653,10 @@ <h2 id="parameters">Parameters</h2>
       learning is performed on mini-batches of input data. this parameter
       specifies the size of each mini-batch.
   elbo: bool
-      Whether to use the Evidence Lower Bound for Optimization.
+      Whether to use the Evidence Lower Bound for optimization.
       Default is True.
   optimizer: str
-      The choice of the gradient based optimization method. One of 
+      The choice of the gradient based optimization method. One of
       &#39;Adam&#39;, &#39;RMSProp&#39; or &#39;SGD&#39;.
   random_state: float
       random seed that determines how the validation set is chosen.
@@ -683,7 +685,6 @@ <h2 id="parameters">Parameters</h2>
     model = DeepSurvivalMachinesTorch(inputdim,
                                       k=self.k,
                                       layers=self.layers,
-                                      init=False, 
                                       dist=self.dist,
                                       temp=self.temp,
                                       discount=self.discount,
@@ -708,7 +709,7 @@ <h2 id="parameters">Parameters</h2>
 <span>def <span class="ident">predict_risk</span></span>(<span>self, x, t)</span>
 </code></dt>
 <dd>
-<div class="desc"><p>Returns the estimated risk of an event occuring before time <span><span class="MathJax_Preview"> t </span><script type="math/tex"> t </script></span>,
+<div class="desc"><p>Returns the estimated risk of an event occuring before time <span><span class="MathJax_Preview"> t </span><script type="math/tex"> t </script></span>
 <span><span class="MathJax_Preview"> \widehat{\mathbb{P}}(T\leq t|X) </span><script type="math/tex"> \widehat{\mathbb{P}}(T\leq t|X) </script></span> for some input data <span><span class="MathJax_Preview"> x </span><script type="math/tex"> x </script></span>.</p>
 <h2 id="parameters">Parameters</h2>
 <dl>
@@ -728,7 +729,7 @@ <h2 id="returns">Returns</h2>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python">def predict_risk(self, x, t):
-  &#34;&#34;&#34;Returns the estimated risk of an event occuring before time \( t \),
+  &#34;&#34;&#34;Returns the estimated risk of an event occuring before time \( t \)
     \( \widehat{\mathbb{P}}(T\leq t|X) \) for some input data \( x \).
 
   Parameters
diff --git a/docs/dsm_torch.html b/docs/dsm_torch.html
index a823b89..36d8a1d 100644
--- a/docs/dsm_torch.html
+++ b/docs/dsm_torch.html
@@ -33,19 +33,24 @@ <h1 class="title">Module <code>dsm.dsm_torch</code></h1>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python"># coding=utf-8
-# Copyright 2020 Chirag Nagpal, Auton Lab.
+# Copyright 2020 Chirag Nagpal
 #
-# Licensed under the Apache License, Version 2.0 (the &#34;License&#34;);
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an &#34;AS IS&#34; BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# This file is part of Deep Survival Machines.
+
+# Deep Survival Machines is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# Deep Survival Machines is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with Deep Survival Machines.
+# If not, see &lt;https://www.gnu.org/licenses/&gt;.
+
 
 &#34;&#34;&#34;Torch model definitons for the Deep Survival Machines model
 
@@ -63,8 +68,8 @@ <h1 class="title">Module <code>dsm.dsm_torch</code></h1>
 def create_representation(inputdim, layers, activation):
   &#34;&#34;&#34;Helper function to generate the representation function for DSM.
 
-  Deep Survival Machines learns a representation $$\Phi(X)$$ for the input data.
-  This representation is parameterized using a Non Linear Multilayer
+  Deep Survival Machines learns a representation (\ Phi(X) \) for the input
+  data. This representation is parameterized using a Non Linear Multilayer
   Perceptron (`torch.nn.Module`). This is a helper function designed to
   instantiate the representation for Deep Survival Machines.
 
@@ -145,7 +150,7 @@ <h1 class="title">Module <code>dsm.dsm_torch</code></h1>
       Default is 1.
   &#34;&#34;&#34;
 
-  def __init__(self, inputdim, k, layers=None, init=False, dist=&#39;Weibull&#39;,
+  def __init__(self, inputdim, k, layers=None, dist=&#39;Weibull&#39;,
                temp=1000., discount=1.0, optimizer=&#39;Adam&#39;):
     super(DeepSurvivalMachinesTorch, self).__init__()
 
@@ -163,11 +168,13 @@ <h1 class="title">Module <code>dsm.dsm_torch</code></h1>
       self.act = nn.SELU()
       self.scale = nn.Parameter(-torch.ones(k))
       self.shape = nn.Parameter(-torch.ones(k))
-
     elif self.dist == &#39;LogNormal&#39;:
       self.act = nn.Tanh()
       self.scale = nn.Parameter(torch.ones(k))
       self.shape = nn.Parameter(torch.ones(k))
+    else:
+      raise NotImplementedError(&#39;Distribution: &#39;+self.dist+&#39; not implemented&#39;+
+                                &#39; yet.&#39;)
 
     self.embedding = create_representation(inputdim, layers, &#39;ReLU6&#39;)
 
@@ -181,10 +188,6 @@ <h1 class="title">Module <code>dsm.dsm_torch</code></h1>
       self.scaleg = nn.Sequential(nn.Linear(layers[-1], k, bias=True))
       self.shapeg = nn.Sequential(nn.Linear(layers[-1], k, bias=True))
 
-    if init is not False:
-      self.shape.data.fill_(init[0])
-      self.scale.data.fill_(init[1])
-
   def forward(self, x):
     &#34;&#34;&#34;The forward function that is called when data is passed through DSM.
 
@@ -197,6 +200,112 @@ <h1 class="title">Module <code>dsm.dsm_torch</code></h1>
            self.act(self.scaleg(xrep))+self.scale.expand(x.shape[0], -1),
            self.gate(xrep)/self.temp)
 
+  def get_shape_scale(self):
+    return(self.shape,
+           self.scale)
+
+class DeepRecurrentSurvivalMachinesTorch(DeepSurvivalMachinesTorch):
+  &#34;&#34;&#34;A Torch implementation of Deep Recurrent Survival Machines model.
+
+  This is an implementation of Deep Recurrent Survival Machines model
+  in torch. It inherits from `DeepSurvivalMachinesTorch` and replaces the
+  input representation learning MLP with an LSTM or RNN, the parameters of the
+  underlying distributions and the forward function which is called whenever
+  data is passed to the module. Each of the parameters are nn.Parameters and
+  torch automatically keeps track and computes gradients for them.
+
+  .. warning::
+    Not designed to be used directly.
+    Please use the API inferface `dsm.dsm_api.DeepRecurrentSurvivalMachines`!!
+
+  Parameters
+  ----------
+  inputdim: int
+      Dimensionality of the input features.
+  k: int
+      The number of underlying parametric distributions.
+  layers: int
+      The number of hidden layers in the LSTM or RNN cell.
+  hidden: int
+      The number of neurons in each hidden layer.
+  init: tuple
+      A tuple for initialization of the parameters for the underlying
+      distributions. (shape, scale).
+  dist: str
+      Choice of the underlying survival distributions.
+      One of &#39;Weibull&#39;, &#39;LogNormal&#39;.
+      Default is &#39;Weibull&#39;.
+  temp: float
+      The logits for the gate are rescaled with this value.
+      Default is 1000.
+  discount: float
+      a float in [0,1] that determines how to discount the tail bias
+      from the uncensored instances.
+      Default is 1.
+  &#34;&#34;&#34;
+
+  def __init__(self, inputdim, k, typ=&#39;LSTM&#39;, layers=1,
+               hidden=None, dist=&#39;Weibull&#39;,
+               temp=1000., discount=1.0, optimizer=&#39;Adam&#39;):
+    super(DeepSurvivalMachinesTorch, self).__init__()
+
+    self.k = k
+    self.dist = dist
+    self.temp = float(temp)
+    self.discount = float(discount)
+    self.optimizer = optimizer
+    self.hidden = hidden
+    self.layers = layers
+    self.typ = typ
+
+    if self.dist == &#39;Weibull&#39;:
+      self.act = nn.SELU()
+      self.scale = nn.Parameter(-torch.ones(k))
+      self.shape = nn.Parameter(-torch.ones(k))
+    elif self.dist == &#39;LogNormal&#39;:
+      self.act = nn.Tanh()
+      self.scale = nn.Parameter(torch.ones(k))
+      self.shape = nn.Parameter(torch.ones(k))
+    else:
+      raise NotImplementedError(&#39;Distribution: &#39;+self.dist+&#39; not implemented&#39;+
+                                &#39; yet.&#39;)
+
+    self.gate = nn.Sequential(nn.Linear(hidden, k, bias=False))
+    self.scaleg = nn.Sequential(nn.Linear(hidden, k, bias=True))
+    self.shapeg = nn.Sequential(nn.Linear(hidden, k, bias=True))
+
+    if self.typ == &#39;LSTM&#39;:
+      self.embedding = nn.LSTM(inputdim, hidden, layers,
+                               bias=False, batch_first=True)
+    if self.typ == &#39;RNN&#39;:
+      self.embedding = nn.RNN(inputdim, hidden, layers,
+                              bias=False, batch_first=True)
+
+    #self.embedding = nn.ReLU6(self.embedding)
+
+
+  def forward(self, x):
+    &#34;&#34;&#34;The forward function that is called when data is passed through DSM.
+
+    Note: As compared to DSM, the input data for DRSM is a tensor. The forward
+    function involves unpacking the tensor in-order to directly use the
+    DSM loss functions.
+
+    Args:
+      x:
+        a torch.tensor of the input features.
+    &#34;&#34;&#34;
+    x = x.detach().clone()
+    inputmask = ~torch.isnan(x[:, :, 0]).reshape(-1)
+    x[torch.isnan(x)] = 0
+    xrep, _ = self.embedding(x)
+    xrep = xrep.contiguous().view(-1, self.hidden)
+    xrep = xrep[inputmask]
+    #xrep = nn.ReLU6()(xrep)
+    return(self.act(self.shapeg(xrep))+self.shape.expand(xrep.shape[0], -1),
+           self.act(self.scaleg(xrep))+self.scale.expand(xrep.shape[0], -1),
+           self.gate(xrep)/self.temp)
+
   def get_shape_scale(self):
     return(self.shape,
            self.scale)</code></pre>
@@ -214,8 +323,8 @@ <h2 class="section-title" id="header-functions">Functions</h2>
 </code></dt>
 <dd>
 <div class="desc"><p>Helper function to generate the representation function for DSM.</p>
-<p>Deep Survival Machines learns a representation <span><span class="MathJax_Preview">\Phi(X)</span><script type="math/tex; mode=display">\Phi(X)</script></span> for the input data.
-This representation is parameterized using a Non Linear Multilayer
+<p>Deep Survival Machines learns a representation (\ Phi(X) ) for the input
+data. This representation is parameterized using a Non Linear Multilayer
 Perceptron (<code>torch.nn.Module</code>). This is a helper function designed to
 instantiate the representation for Deep Survival Machines.</p>
 <div class="admonition warning">
@@ -240,8 +349,8 @@ <h2 id="returns">Returns</h2>
 <pre><code class="python">def create_representation(inputdim, layers, activation):
   &#34;&#34;&#34;Helper function to generate the representation function for DSM.
 
-  Deep Survival Machines learns a representation $$\Phi(X)$$ for the input data.
-  This representation is parameterized using a Non Linear Multilayer
+  Deep Survival Machines learns a representation (\ Phi(X) \) for the input
+  data. This representation is parameterized using a Non Linear Multilayer
   Perceptron (`torch.nn.Module`). This is a helper function designed to
   instantiate the representation for Deep Survival Machines.
 
@@ -285,9 +394,234 @@ <h2 id="returns">Returns</h2>
 <section>
 <h2 class="section-title" id="header-classes">Classes</h2>
 <dl>
+<dt id="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch"><code class="flex name class">
+<span>class <span class="ident">DeepRecurrentSurvivalMachinesTorch</span></span>
+<span>(</span><span>inputdim, k, typ='LSTM', layers=1, hidden=None, dist='Weibull', temp=1000.0, discount=1.0, optimizer='Adam')</span>
+</code></dt>
+<dd>
+<div class="desc"><p>A Torch implementation of Deep Recurrent Survival Machines model.</p>
+<p>This is an implementation of Deep Recurrent Survival Machines model
+in torch. It inherits from <code><a title="dsm.dsm_torch.DeepSurvivalMachinesTorch" href="#dsm.dsm_torch.DeepSurvivalMachinesTorch">DeepSurvivalMachinesTorch</a></code> and replaces the
+input representation learning MLP with an LSTM or RNN, the parameters of the
+underlying distributions and the forward function which is called whenever
+data is passed to the module. Each of the parameters are nn.Parameters and
+torch automatically keeps track and computes gradients for them.</p>
+<div class="admonition warning">
+<p class="admonition-title">Warning</p>
+<p>Not designed to be used directly.
+Please use the API inferface <code><a title="dsm.dsm_api.DeepRecurrentSurvivalMachines" href="dsm_api.html#dsm.dsm_api.DeepRecurrentSurvivalMachines">DeepRecurrentSurvivalMachines</a></code>!!</p>
+</div>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>inputdim</code></strong> :&ensp;<code>int</code></dt>
+<dd>Dimensionality of the input features.</dd>
+<dt><strong><code>k</code></strong> :&ensp;<code>int</code></dt>
+<dd>The number of underlying parametric distributions.</dd>
+<dt><strong><code>layers</code></strong> :&ensp;<code>int</code></dt>
+<dd>The number of hidden layers in the LSTM or RNN cell.</dd>
+<dt><strong><code>hidden</code></strong> :&ensp;<code>int</code></dt>
+<dd>The number of neurons in each hidden layer.</dd>
+<dt><strong><code>init</code></strong> :&ensp;<code>tuple</code></dt>
+<dd>A tuple for initialization of the parameters for the underlying
+distributions. (shape, scale).</dd>
+<dt><strong><code>dist</code></strong> :&ensp;<code>str</code></dt>
+<dd>Choice of the underlying survival distributions.
+One of 'Weibull', 'LogNormal'.
+Default is 'Weibull'.</dd>
+<dt><strong><code>temp</code></strong> :&ensp;<code>float</code></dt>
+<dd>The logits for the gate are rescaled with this value.
+Default is 1000.</dd>
+<dt><strong><code>discount</code></strong> :&ensp;<code>float</code></dt>
+<dd>a float in [0,1] that determines how to discount the tail bias
+from the uncensored instances.
+Default is 1.</dd>
+</dl>
+<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class DeepRecurrentSurvivalMachinesTorch(DeepSurvivalMachinesTorch):
+  &#34;&#34;&#34;A Torch implementation of Deep Recurrent Survival Machines model.
+
+  This is an implementation of Deep Recurrent Survival Machines model
+  in torch. It inherits from `DeepSurvivalMachinesTorch` and replaces the
+  input representation learning MLP with an LSTM or RNN, the parameters of the
+  underlying distributions and the forward function which is called whenever
+  data is passed to the module. Each of the parameters are nn.Parameters and
+  torch automatically keeps track and computes gradients for them.
+
+  .. warning::
+    Not designed to be used directly.
+    Please use the API inferface `dsm.dsm_api.DeepRecurrentSurvivalMachines`!!
+
+  Parameters
+  ----------
+  inputdim: int
+      Dimensionality of the input features.
+  k: int
+      The number of underlying parametric distributions.
+  layers: int
+      The number of hidden layers in the LSTM or RNN cell.
+  hidden: int
+      The number of neurons in each hidden layer.
+  init: tuple
+      A tuple for initialization of the parameters for the underlying
+      distributions. (shape, scale).
+  dist: str
+      Choice of the underlying survival distributions.
+      One of &#39;Weibull&#39;, &#39;LogNormal&#39;.
+      Default is &#39;Weibull&#39;.
+  temp: float
+      The logits for the gate are rescaled with this value.
+      Default is 1000.
+  discount: float
+      a float in [0,1] that determines how to discount the tail bias
+      from the uncensored instances.
+      Default is 1.
+  &#34;&#34;&#34;
+
+  def __init__(self, inputdim, k, typ=&#39;LSTM&#39;, layers=1,
+               hidden=None, dist=&#39;Weibull&#39;,
+               temp=1000., discount=1.0, optimizer=&#39;Adam&#39;):
+    super(DeepSurvivalMachinesTorch, self).__init__()
+
+    self.k = k
+    self.dist = dist
+    self.temp = float(temp)
+    self.discount = float(discount)
+    self.optimizer = optimizer
+    self.hidden = hidden
+    self.layers = layers
+    self.typ = typ
+
+    if self.dist == &#39;Weibull&#39;:
+      self.act = nn.SELU()
+      self.scale = nn.Parameter(-torch.ones(k))
+      self.shape = nn.Parameter(-torch.ones(k))
+    elif self.dist == &#39;LogNormal&#39;:
+      self.act = nn.Tanh()
+      self.scale = nn.Parameter(torch.ones(k))
+      self.shape = nn.Parameter(torch.ones(k))
+    else:
+      raise NotImplementedError(&#39;Distribution: &#39;+self.dist+&#39; not implemented&#39;+
+                                &#39; yet.&#39;)
+
+    self.gate = nn.Sequential(nn.Linear(hidden, k, bias=False))
+    self.scaleg = nn.Sequential(nn.Linear(hidden, k, bias=True))
+    self.shapeg = nn.Sequential(nn.Linear(hidden, k, bias=True))
+
+    if self.typ == &#39;LSTM&#39;:
+      self.embedding = nn.LSTM(inputdim, hidden, layers,
+                               bias=False, batch_first=True)
+    if self.typ == &#39;RNN&#39;:
+      self.embedding = nn.RNN(inputdim, hidden, layers,
+                              bias=False, batch_first=True)
+
+    #self.embedding = nn.ReLU6(self.embedding)
+
+
+  def forward(self, x):
+    &#34;&#34;&#34;The forward function that is called when data is passed through DSM.
+
+    Note: As compared to DSM, the input data for DRSM is a tensor. The forward
+    function involves unpacking the tensor in-order to directly use the
+    DSM loss functions.
+
+    Args:
+      x:
+        a torch.tensor of the input features.
+    &#34;&#34;&#34;
+    x = x.detach().clone()
+    inputmask = ~torch.isnan(x[:, :, 0]).reshape(-1)
+    x[torch.isnan(x)] = 0
+    xrep, _ = self.embedding(x)
+    xrep = xrep.contiguous().view(-1, self.hidden)
+    xrep = xrep[inputmask]
+    #xrep = nn.ReLU6()(xrep)
+    return(self.act(self.shapeg(xrep))+self.shape.expand(xrep.shape[0], -1),
+           self.act(self.scaleg(xrep))+self.scale.expand(xrep.shape[0], -1),
+           self.gate(xrep)/self.temp)
+
+  def get_shape_scale(self):
+    return(self.shape,
+           self.scale)</code></pre>
+</details>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li><a title="dsm.dsm_torch.DeepSurvivalMachinesTorch" href="#dsm.dsm_torch.DeepSurvivalMachinesTorch">DeepSurvivalMachinesTorch</a></li>
+<li>torch.nn.modules.module.Module</li>
+</ul>
+<h3>Class variables</h3>
+<dl>
+<dt id="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.dump_patches"><code class="name">var <span class="ident">dump_patches</span> : bool</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.training"><code class="name">var <span class="ident">training</span> : bool</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+<h3>Methods</h3>
+<dl>
+<dt id="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.forward"><code class="name flex">
+<span>def <span class="ident">forward</span></span>(<span>self, x) ‑> Callable[..., Any]</span>
+</code></dt>
+<dd>
+<div class="desc"><p>The forward function that is called when data is passed through DSM.</p>
+<p>Note: As compared to DSM, the input data for DRSM is a tensor. The forward
+function involves unpacking the tensor in-order to directly use the
+DSM loss functions.</p>
+<h2 id="args">Args</h2>
+<p>x:
+a torch.tensor of the input features.</p></div>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def forward(self, x):
+  &#34;&#34;&#34;The forward function that is called when data is passed through DSM.
+
+  Note: As compared to DSM, the input data for DRSM is a tensor. The forward
+  function involves unpacking the tensor in-order to directly use the
+  DSM loss functions.
+
+  Args:
+    x:
+      a torch.tensor of the input features.
+  &#34;&#34;&#34;
+  x = x.detach().clone()
+  inputmask = ~torch.isnan(x[:, :, 0]).reshape(-1)
+  x[torch.isnan(x)] = 0
+  xrep, _ = self.embedding(x)
+  xrep = xrep.contiguous().view(-1, self.hidden)
+  xrep = xrep[inputmask]
+  #xrep = nn.ReLU6()(xrep)
+  return(self.act(self.shapeg(xrep))+self.shape.expand(xrep.shape[0], -1),
+         self.act(self.scaleg(xrep))+self.scale.expand(xrep.shape[0], -1),
+         self.gate(xrep)/self.temp)</code></pre>
+</details>
+</dd>
+<dt id="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.get_shape_scale"><code class="name flex">
+<span>def <span class="ident">get_shape_scale</span></span>(<span>self)</span>
+</code></dt>
+<dd>
+<div class="desc"></div>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_shape_scale(self):
+  return(self.shape,
+         self.scale)</code></pre>
+</details>
+</dd>
+</dl>
+</dd>
 <dt id="dsm.dsm_torch.DeepSurvivalMachinesTorch"><code class="flex name class">
 <span>class <span class="ident">DeepSurvivalMachinesTorch</span></span>
-<span>(</span><span>inputdim, k, layers=None, init=False, dist='Weibull', temp=1000.0, discount=1.0, optimizer='Adam')</span>
+<span>(</span><span>inputdim, k, layers=None, dist='Weibull', temp=1000.0, discount=1.0, optimizer='Adam')</span>
 </code></dt>
 <dd>
 <div class="desc"><p>A Torch implementation of Deep Survival Machines model.</p>
@@ -378,7 +712,7 @@ <h2 id="parameters">Parameters</h2>
       Default is 1.
   &#34;&#34;&#34;
 
-  def __init__(self, inputdim, k, layers=None, init=False, dist=&#39;Weibull&#39;,
+  def __init__(self, inputdim, k, layers=None, dist=&#39;Weibull&#39;,
                temp=1000., discount=1.0, optimizer=&#39;Adam&#39;):
     super(DeepSurvivalMachinesTorch, self).__init__()
 
@@ -396,11 +730,13 @@ <h2 id="parameters">Parameters</h2>
       self.act = nn.SELU()
       self.scale = nn.Parameter(-torch.ones(k))
       self.shape = nn.Parameter(-torch.ones(k))
-
     elif self.dist == &#39;LogNormal&#39;:
       self.act = nn.Tanh()
       self.scale = nn.Parameter(torch.ones(k))
       self.shape = nn.Parameter(torch.ones(k))
+    else:
+      raise NotImplementedError(&#39;Distribution: &#39;+self.dist+&#39; not implemented&#39;+
+                                &#39; yet.&#39;)
 
     self.embedding = create_representation(inputdim, layers, &#39;ReLU6&#39;)
 
@@ -414,10 +750,6 @@ <h2 id="parameters">Parameters</h2>
       self.scaleg = nn.Sequential(nn.Linear(layers[-1], k, bias=True))
       self.shapeg = nn.Sequential(nn.Linear(layers[-1], k, bias=True))
 
-    if init is not False:
-      self.shape.data.fill_(init[0])
-      self.scale.data.fill_(init[1])
-
   def forward(self, x):
     &#34;&#34;&#34;The forward function that is called when data is passed through DSM.
 
@@ -438,6 +770,10 @@ <h3>Ancestors</h3>
 <ul class="hlist">
 <li>torch.nn.modules.module.Module</li>
 </ul>
+<h3>Subclasses</h3>
+<ul class="hlist">
+<li><a title="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch" href="#dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch">DeepRecurrentSurvivalMachinesTorch</a></li>
+</ul>
 <h3>Class variables</h3>
 <dl>
 <dt id="dsm.dsm_torch.DeepSurvivalMachinesTorch.dump_patches"><code class="name">var <span class="ident">dump_patches</span> : bool</code></dt>
@@ -514,6 +850,15 @@ <h1>Index</h1>
 <li><h3><a href="#header-classes">Classes</a></h3>
 <ul>
 <li>
+<h4><code><a title="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch" href="#dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch">DeepRecurrentSurvivalMachinesTorch</a></code></h4>
+<ul class="">
+<li><code><a title="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.dump_patches" href="#dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.dump_patches">dump_patches</a></code></li>
+<li><code><a title="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.forward" href="#dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.forward">forward</a></code></li>
+<li><code><a title="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.get_shape_scale" href="#dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.get_shape_scale">get_shape_scale</a></code></li>
+<li><code><a title="dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.training" href="#dsm.dsm_torch.DeepRecurrentSurvivalMachinesTorch.training">training</a></code></li>
+</ul>
+</li>
+<li>
 <h4><code><a title="dsm.dsm_torch.DeepSurvivalMachinesTorch" href="#dsm.dsm_torch.DeepSurvivalMachinesTorch">DeepSurvivalMachinesTorch</a></code></h4>
 <ul class="">
 <li><code><a title="dsm.dsm_torch.DeepSurvivalMachinesTorch.dump_patches" href="#dsm.dsm_torch.DeepSurvivalMachinesTorch.dump_patches">dump_patches</a></code></li>
diff --git a/docs/index.html b/docs/index.html
index 0abbc9e..f460ff4 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -99,24 +99,46 @@ <h2 id="contributing">Contributing</h2>
 <h2 id="license">License</h2>
 <p>Copyright 2020 <a href="http://cs.cmu.edu/~chiragn">Chirag Nagpal</a>,
 <a href="http://www.autonlab.org">Auton Lab</a>.</p>
-<p>Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at</p>
-<p><a href="http://www.apache.org/licenses/LICENSE-2.0">http://www.apache.org/licenses/LICENSE-2.0</a></p>
-<p>Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.</p>
+<p>Deep Survival Machines is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.</p>
+<p>Deep Survival Machines is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the
+GNU General Public License for more details.</p>
+<p>You should have received a copy of the GNU General Public License
+along with Foobar.
+If not, see <a href="https://www.gnu.org/licenses/">https://www.gnu.org/licenses/</a>.</p>
 <p><img style="float: right;" width ="200px" src="https://www.cmu.edu/brand/downloads/assets/images/wordmarks-600x600-min.jpg">
-<img style="float: right;padding-top:50px" src="https://www.autonlab.org/user/themes/auton/images/AutonLogo.png"> </p>
+<img style="float: right;padding-top:50px" src="https://www.autonlab.org/user/themes/auton/images/AutonLogo.png"></p>
 <p><br><br><br><br><br>
 <br><br><br><br><br></p>
 <details class="source">
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">&#34;&#34;&#34;
+<pre><code class="python"># coding=utf-8
+# Copyright 2020 Chirag Nagpal
+#
+# This file is part of Deep Survival Machines.
+
+# Deep Survival Machines is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# Deep Survival Machines is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with Deep Survival Machines.
+# If not, see &lt;https://www.gnu.org/licenses/&gt;.
+
+&#34;&#34;&#34;
 Python package `dsm` provides an API to train the Deep Survival Machines
 and associated models for problems in survival analysis. The underlying model
 is implemented in `pytorch`.
@@ -144,9 +166,9 @@ <h2 id="license">License</h2>
 
    This is the caption of the figure (a simple paragraph).
 
-**Deep Survival Machines (DSM)** is a fully parametric approach to model 
-Time-to-Event outcomes in the presence of Censoring first introduced in 
-[\[1\]](https://arxiv.org/abs/2003.01176). 
+**Deep Survival Machines (DSM)** is a fully parametric approach to model
+Time-to-Event outcomes in the presence of Censoring first introduced in
+[\[1\]](https://arxiv.org/abs/2003.01176).
 In the context of Healthcare ML and Biostatistics, this is known as &#39;Survival
 Analysis&#39;. The key idea behind Deep Survival Machines is to model the
 underlying event outcome distribution as a mixure of some fixed \( k \)
@@ -187,7 +209,7 @@ <h2 id="license">License</h2>
 
 Please cite the following papers if you are using the `dsm` package.
 
-[1] [Deep Survival Machines: 
+[1] [Deep Survival Machines:
 Fully Parametric Survival Regression and
 Representation Learning for Censored Data with Competing Risks.&#34;
 arXiv preprint arXiv:2003.01176 (2020)](https://arxiv.org/abs/2003.01176)&lt;/a&gt;
@@ -220,22 +242,23 @@ <h2 id="license">License</h2>
 Copyright 2020 [Chirag Nagpal](http://cs.cmu.edu/~chiragn),
 [Auton Lab](http://www.autonlab.org).
 
-Licensed under the Apache License, Version 2.0 (the &#34;License&#34;);
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+Deep Survival Machines is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
 
-http://www.apache.org/licenses/LICENSE-2.0
+Deep Survival Machines is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
 
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an &#34;AS IS&#34; BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
+You should have received a copy of the GNU General Public License
+along with Foobar.  If not, see &lt;https://www.gnu.org/licenses/&gt;.
 
 
 
 &lt;img style=&#34;float: right;&#34; width =&#34;200px&#34; src=&#34;https://www.cmu.edu/brand/downloads/assets/images/wordmarks-600x600-min.jpg&#34;&gt;
-&lt;img style=&#34;float: right;padding-top:50px&#34; src=&#34;https://www.autonlab.org/user/themes/auton/images/AutonLogo.png&#34;&gt; 
+&lt;img style=&#34;float: right;padding-top:50px&#34; src=&#34;https://www.autonlab.org/user/themes/auton/images/AutonLogo.png&#34;&gt;
 
 &lt;br&gt;&lt;br&gt;&lt;br&gt;&lt;br&gt;&lt;br&gt;
 &lt;br&gt;&lt;br&gt;&lt;br&gt;&lt;br&gt;&lt;br&gt;
diff --git a/docs/losses.html b/docs/losses.html
index 494b3e7..5df6074 100644
--- a/docs/losses.html
+++ b/docs/losses.html
@@ -40,19 +40,23 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python"># coding=utf-8
-# Copyright 2020 Chirag Nagpal, Auton Lab.
+# Copyright 2020 Chirag Nagpal
 #
-# Licensed under the Apache License, Version 2.0 (the &#34;License&#34;);
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an &#34;AS IS&#34; BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# This file is part of Deep Survival Machines.
+
+# Deep Survival Machines is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# Deep Survival Machines is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with Deep Survival Machines.
+# If not, see &lt;https://www.gnu.org/licenses/&gt;.
 
 &#34;&#34;&#34;Loss function definitions for the Deep Survival Machines model
 
@@ -93,7 +97,6 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
 
     uncens = np.where(e == 1)[0]
     cens = np.where(e == 0)[0]
-
     ll += f[uncens].sum() + s[cens].sum()
 
   return -ll.mean()
@@ -179,9 +182,9 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
 
   uncens = np.where(e.cpu().data.numpy() == 1)[0]
   cens = np.where(e.cpu().data.numpy() == 0)[0]
-
   ll = lossf[uncens].sum() + alpha*losss[cens].sum()
-  return -ll/x.shape[0]
+
+  return -ll.mean()
 
 
 def _conditional_weibull_loss(model, x, t, e, elbo=True):
@@ -189,6 +192,8 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
   alpha = model.discount
   shape, scale, logits = model.forward(x)
 
+  #print (shape, scale, logits)
+
   k_ = shape
   b_ = scale
 
@@ -228,9 +233,9 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
 
   uncens = np.where(e.cpu().data.numpy() == 1)[0]
   cens = np.where(e.cpu().data.numpy() == 0)[0]
-
   ll = lossf[uncens].sum() + alpha*losss[cens].sum()
-  return -ll/x.shape[0]
+
+  return -ll.mean()
 
 
 def conditional_loss(model, x, t, e, elbo=True):
@@ -269,9 +274,7 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
       lcdfs.append(s)
 
     lcdfs = torch.stack(lcdfs, dim=1)
-
     lcdfs = lcdfs+logits
-
     lcdfs = torch.logsumexp(lcdfs, dim=1)
     cdfs.append(lcdfs.detach().numpy())
 
@@ -309,7 +312,6 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
       lcdfs.append(s)
 
     lcdfs = torch.stack(lcdfs, dim=1)
-
     lcdfs = lcdfs+logits
     lcdfs = torch.logsumexp(lcdfs, dim=1)
     cdfs.append(lcdfs.detach().numpy())
@@ -321,7 +323,6 @@ <h1 class="title">Module <code>dsm.losses</code></h1>
   torch.no_grad()
   if model.dist == &#39;Weibull&#39;:
     return _weibull_cdf(model, x, t_horizon)
-
   if model.dist == &#39;LogNormal&#39;:
     return _lognormal_cdf(model, x, t_horizon)</code></pre>
 </details>
@@ -364,7 +365,6 @@ <h2 class="section-title" id="header-functions">Functions</h2>
   torch.no_grad()
   if model.dist == &#39;Weibull&#39;:
     return _weibull_cdf(model, x, t_horizon)
-
   if model.dist == &#39;LogNormal&#39;:
     return _lognormal_cdf(model, x, t_horizon)</code></pre>
 </details>
diff --git a/docs/utilities.html b/docs/utilities.html
index 7a86166..aa9dece 100644
--- a/docs/utilities.html
+++ b/docs/utilities.html
@@ -29,22 +29,27 @@ <h1 class="title">Module <code>dsm.utilities</code></h1>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python"># coding=utf-8
-# Copyright 2020 Chirag Nagpal, Auton Lab.
+# Copyright 2020 Chirag Nagpal
 #
-# Licensed under the Apache License, Version 2.0 (the &#34;License&#34;);
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an &#34;AS IS&#34; BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# This file is part of Deep Survival Machines.
+
+# Deep Survival Machines is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# Deep Survival Machines is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with Deep Survival Machines.
+# If not, see &lt;https://www.gnu.org/licenses/&gt;.
 
 &#34;&#34;&#34;Utility functions to train the Deep Survival Machines models&#34;&#34;&#34;
 
+from dsm.dsm_torch import DeepSurvivalMachinesTorch
 from dsm.losses import unconditional_loss, conditional_loss
 
 from tqdm import tqdm
@@ -55,7 +60,6 @@ <h1 class="title">Module <code>dsm.utilities</code></h1>
 
 import gc
 
-from dsm.dsm_torch import DeepSurvivalMachinesTorch
 
 def get_optimizer(model, lr):
 
@@ -66,18 +70,18 @@ <h1 class="title">Module <code>dsm.utilities</code></h1>
   elif model.optimizer == &#39;RMSProp&#39;:
     return torch.optim.RMSprop(model.parameters(), lr=lr)
   else:
-    raise NotImplementedError(&#34;Optimizer &#34;+model.optimizer+
-                              &#34; is not implemented&#34;)
-    
+    raise NotImplementedError(&#39;Optimizer &#39;+model.optimizer+
+                              &#39; is not implemented&#39;)
+
 def pretrain_dsm(model, t_train, e_train, t_valid, e_valid,
                  n_iter=10000, lr=1e-2, thres=1e-4):
 
   premodel = DeepSurvivalMachinesTorch(1, 1,
-                                       init=False, dist=model.dist)
+                                       dist=model.dist)
   premodel.double()
 
   optimizer = torch.optim.Adam(premodel.parameters(), lr=lr)
-  oldcost = -float(&#39;inf&#39;)
+  oldcost = float(&#39;inf&#39;)
   patience = 0
 
   costs = []
@@ -102,6 +106,10 @@ <h1 class="title">Module <code>dsm.utilities</code></h1>
 
   return premodel
 
+def _reshape_tensor_with_nans(data):
+  &#34;&#34;&#34;Helper function to unroll padded RNN inputs&#34;&#34;&#34;
+  data = data.reshape(-1)
+  return data[~torch.isnan(data)]
 
 def train_dsm(model,
               x_train, t_train, e_train,
@@ -111,24 +119,27 @@ <h1 class="title">Module <code>dsm.utilities</code></h1>
 
   print(&#39;Pretraining the Underlying Distributions...&#39;)
 
+  print(t_train.shape, e_train.shape)
+
+  t_train_ = _reshape_tensor_with_nans(t_train)
+  e_train_ = _reshape_tensor_with_nans(e_train)
+  t_valid_ = _reshape_tensor_with_nans(t_valid)
+  e_valid_ = _reshape_tensor_with_nans(e_valid)
+
+  print(t_train_.shape, e_train_.shape)
+
   premodel = pretrain_dsm(model,
-                          t_train,
-                          e_train,
-                          t_valid,
-                          e_valid,
+                          t_train_,
+                          e_train_,
+                          t_valid_,
+                          e_valid_,
                           n_iter=10000,
                           lr=1e-2,
                           thres=1e-4)
   model.shape.data.fill_(float(premodel.shape))
   model.scale.data.fill_(float(premodel.scale))
 
-  # print(premodel.shape, premodel.scale)
-  # print(model.shape, model.scale)
-
-  # init=(float(premodel.shape[0]),
-  # float(premodel.scale[0])),
-  # print(torch.exp(-premodel.scale).cpu().data.numpy()[0],
-  #       torch.exp(premodel.shape).cpu().data.numpy()[0])
+  print(float(premodel.shape), float(premodel.scale))
 
   model.double()
   optimizer = torch.optim.Adam(model.parameters(), lr=lr)
@@ -144,19 +155,24 @@ <h1 class="title">Module <code>dsm.utilities</code></h1>
   for i in tqdm(range(n_iter)):
     for j in range(nbatches):
 
+      xb = x_train[j*bs:(j+1)*bs]
+      tb = t_train[j*bs:(j+1)*bs]
+      eb = e_train[j*bs:(j+1)*bs]
+
       optimizer.zero_grad()
       loss = conditional_loss(model,
-                              x_train[j*bs:(j+1)*bs],
-                              t_train[j*bs:(j+1)*bs],
-                              e_train[j*bs:(j+1)*bs],
+                              xb,
+                              _reshape_tensor_with_nans(tb),
+                              _reshape_tensor_with_nans(eb),
                               elbo=elbo)
+      #print (&#34;Train Loss:&#34;, float(loss))
       loss.backward()
       optimizer.step()
 
     valid_loss = conditional_loss(model,
                                   x_valid,
-                                  t_valid,
-                                  e_valid,
+                                  t_valid_,
+                                  e_valid_,
                                   elbo=False)
 
     valid_loss = valid_loss.detach().cpu().numpy()
@@ -206,8 +222,8 @@ <h2 class="section-title" id="header-functions">Functions</h2>
   elif model.optimizer == &#39;RMSProp&#39;:
     return torch.optim.RMSprop(model.parameters(), lr=lr)
   else:
-    raise NotImplementedError(&#34;Optimizer &#34;+model.optimizer+
-                              &#34; is not implemented&#34;)</code></pre>
+    raise NotImplementedError(&#39;Optimizer &#39;+model.optimizer+
+                              &#39; is not implemented&#39;)</code></pre>
 </details>
 </dd>
 <dt id="dsm.utilities.pretrain_dsm"><code class="name flex">
@@ -223,11 +239,11 @@ <h2 class="section-title" id="header-functions">Functions</h2>
                  n_iter=10000, lr=1e-2, thres=1e-4):
 
   premodel = DeepSurvivalMachinesTorch(1, 1,
-                                       init=False, dist=model.dist)
+                                       dist=model.dist)
   premodel.double()
 
   optimizer = torch.optim.Adam(premodel.parameters(), lr=lr)
-  oldcost = -float(&#39;inf&#39;)
+  oldcost = float(&#39;inf&#39;)
   patience = 0
 
   costs = []
@@ -270,24 +286,27 @@ <h2 class="section-title" id="header-functions">Functions</h2>
 
   print(&#39;Pretraining the Underlying Distributions...&#39;)
 
+  print(t_train.shape, e_train.shape)
+
+  t_train_ = _reshape_tensor_with_nans(t_train)
+  e_train_ = _reshape_tensor_with_nans(e_train)
+  t_valid_ = _reshape_tensor_with_nans(t_valid)
+  e_valid_ = _reshape_tensor_with_nans(e_valid)
+
+  print(t_train_.shape, e_train_.shape)
+
   premodel = pretrain_dsm(model,
-                          t_train,
-                          e_train,
-                          t_valid,
-                          e_valid,
+                          t_train_,
+                          e_train_,
+                          t_valid_,
+                          e_valid_,
                           n_iter=10000,
                           lr=1e-2,
                           thres=1e-4)
   model.shape.data.fill_(float(premodel.shape))
   model.scale.data.fill_(float(premodel.scale))
 
-  # print(premodel.shape, premodel.scale)
-  # print(model.shape, model.scale)
-
-  # init=(float(premodel.shape[0]),
-  # float(premodel.scale[0])),
-  # print(torch.exp(-premodel.scale).cpu().data.numpy()[0],
-  #       torch.exp(premodel.shape).cpu().data.numpy()[0])
+  print(float(premodel.shape), float(premodel.scale))
 
   model.double()
   optimizer = torch.optim.Adam(model.parameters(), lr=lr)
@@ -303,19 +322,24 @@ <h2 class="section-title" id="header-functions">Functions</h2>
   for i in tqdm(range(n_iter)):
     for j in range(nbatches):
 
+      xb = x_train[j*bs:(j+1)*bs]
+      tb = t_train[j*bs:(j+1)*bs]
+      eb = e_train[j*bs:(j+1)*bs]
+
       optimizer.zero_grad()
       loss = conditional_loss(model,
-                              x_train[j*bs:(j+1)*bs],
-                              t_train[j*bs:(j+1)*bs],
-                              e_train[j*bs:(j+1)*bs],
+                              xb,
+                              _reshape_tensor_with_nans(tb),
+                              _reshape_tensor_with_nans(eb),
                               elbo=elbo)
+      #print (&#34;Train Loss:&#34;, float(loss))
       loss.backward()
       optimizer.step()
 
     valid_loss = conditional_loss(model,
                                   x_valid,
-                                  t_valid,
-                                  e_valid,
+                                  t_valid_,
+                                  e_valid_,
                                   elbo=False)
 
     valid_loss = valid_loss.detach().cpu().numpy()