diff --git a/METABRIC.ipynb b/METABRIC.ipynb index f451bd4..e1a2693 100644 --- a/METABRIC.ipynb +++ b/METABRIC.ipynb @@ -35,26 +35,36 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 43, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[43.68333435058594, 86.86666870117188, 146.33333587646484, 283.5426806640625]" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "\n", - "\n", "dat1 = df[['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8']]\n", "times = (df['duration'].values+1)\n", "events = df['event'].values\n", "data = dat1.to_numpy()\n", - "folds = np.array([1]*191 + [2]*191 + [3]*191 + [4]*191 + [5]*190 + [6]*190 + [7]*190 + [8]*190 + [9]*190 + [10]*190 )\n", - "np.random.seed(100)\n", + "folds = np.array([1]*381 + [2]*381 + [3]*381 + [4]*381 + [5]*380 )\n", + "np.random.seed(0)\n", "np.random.shuffle(folds)\n", - "quantiles = np.quantile(times[events==1], [0.25, .5, .75, .99]).tolist()" + "np.quantile(times[events==1], [0.25, .5, .75, .99]).tolist()" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -65,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -74,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 74, "metadata": {}, "outputs": [ { @@ -83,7 +93,7 @@ "" ] }, - "execution_count": 32, + "execution_count": 74, "metadata": {}, "output_type": "execute_result" } @@ -98,252 +108,88 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - " 0%| | 0/10000 [00:00\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdouble\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 77\u001b[0;31m \u001b[0mdsm_utilites\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrainDSM\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mquantiles\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mx_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mt_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx_valid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mt_valid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me_valid\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mbs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0malpha\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 78\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Research/ICML2020/DeepSurvivalMachines/dsm_utilites.py\u001b[0m in \u001b[0;36mtrainDSM\u001b[0;34m(model, quantiles, x_train, t_train, e_train, x_valid, t_valid, e_valid, n_iter, lr, ELBO, lambd, alpha, thres, bs)\u001b[0m\n\u001b[1;32m 159\u001b[0m ELBO=ELBO, lambd=lambd, alpha=alpha)\n\u001b[1;32m 160\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 161\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 162\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda2/envs/py375/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 194\u001b[0m \"\"\"\n\u001b[0;32m--> 195\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 196\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda2/envs/py375/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m 97\u001b[0m Variable._execution_engine.run_backward(\n\u001b[1;32m 98\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 99\u001b[0;31m allow_unreachable=True) # allow_unreachable flag\n\u001b[0m\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "39716c9dc58548e5afb3f314a08febe3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.5295747584993196 (0.7436527098215582, 0.6811342904030737, 0.5970621901813846, 0.611855947999837)\n", + "3.515131386970598 (0.7386963678694428, 0.6716928435166729, 0.5849140201392863, 0.62534764825785)\n", + "3.5158226769628036 (0.7318455746593371, 0.6699128823791801, 0.5911467483533817, 0.6331405037220301)\n", + "3.515479804334142 (0.7297725821056623, 0.672073403791732, 0.5936461114676398, 0.6369502882527434)\n", + "3.5152090747964597 (0.7299993252087804, 0.6723811713623161, 0.5948084129361593, 0.6397806508689314)\n", + "3.515229667328146 (0.727796010086639, 0.6730755262613353, 0.5972531310617756, 0.6420898707648605)\n", + "3.515459661315446 (0.7282370222892353, 0.6703104281959925, 0.5979750789683245, 0.6429366444808753)\n", + "3.5152011552733837 (0.7284817501550502, 0.6689068376100378, 0.5977576124523662, 0.6441363144872003)\n", + "3.5156224070684705 (0.7298338654512917, 0.6682243802957398, 0.5976506274847705, 0.645576700159252)\n", + "3.516007568690427 (0.7302225119493331, 0.6672430764040999, 0.5983333250469652, 0.6398588975503352)\n", + "3.516402141571075 (0.7294949009807973, 0.6672938918473681, 0.5990394217700785, 0.6353057345001489)\n" ] } ], "source": [ "#set parameter grid\n", - "params = [{'G':4, 'mlptyp':1,'HIDDEN':[], 'n_iter':int(500), 'lr':1e-3, 'ELBO':True, 'mean':False, \\\n", - " 'lambd':0, 'alpha':1,'thres':1e-3, 'bs':int(25)}] \n", + "params = [{'G':6, 'mlptyp':2,'HIDDEN':[100], 'n_iter':int(1000), 'lr':1e-3, 'ELBO':True, 'mean':False, \\\n", + " 'lambd':0, 'alpha':1,'thres':1e-3, 'bs':int(25)}]\n", + "\n", "\n", "#set val data size\n", "vsize = int(0.15*1523)\n", @@ -417,7 +263,7 @@ " model = dsm.DeepSurvivalMachines(D, K, mlptyp, HIDDEN, dist='Weibull')\n", " model.double()\n", " \n", - " dsm_utilites.trainDSM(model,quantiles,x_train, t_train, e_train, x_valid, t_valid, e_valid,lr=lr,bs=bs,alpha=alpha )\n", + " model, i = dsm_utilites.trainDSM(model,quantiles,x_train, t_train, e_train, x_valid, t_valid, e_valid,lr=lr,bs=bs,alpha=alpha )\n", " \n", " \n", " print (\"TEST PERFORMANCE\")\n", diff --git a/dsm_utilites.py b/dsm_utilites.py index 1c03337..e1b8970 100644 --- a/dsm_utilites.py +++ b/dsm_utilites.py @@ -54,9 +54,10 @@ def increaseCensoring(e, t, p): def pretrainDSM(model, x_train, t_train, e_train, x_valid, t_valid, e_valid, \ - n_iter=10000, lr=1e-3, thres=1e-4): + n_iter=1000, lr=1e-2, thres=1e-4): - from tqdm import tqdm + + from tqdm import tqdm_notebook as tqdm from dsm_loss import unConditionalLoss from dsm import DeepSurvivalMachines import torch @@ -69,7 +70,7 @@ def pretrainDSM(model, x_train, t_train, e_train, x_valid, t_valid, e_valid, \ torch.manual_seed(0) - optimizer = torch.optim.Adam(model.parameters(), lr=lr) + optimizer = torch.optim.Adam(premodel.parameters(), lr=lr) oldcost = -float('inf') @@ -79,6 +80,7 @@ def pretrainDSM(model, x_train, t_train, e_train, x_valid, t_valid, e_valid, \ for i in tqdm(range(n_iter)): + optimizer.zero_grad() loss = unConditionalLoss(premodel, x_train, t_train, e_train) # not conditioned on X @@ -92,8 +94,9 @@ def pretrainDSM(model, x_train, t_train, e_train, x_valid, t_valid, e_valid, \ valid_loss = valid_loss.detach().cpu().numpy() costs.append(valid_loss) - - + + # print(valid_loss) + if np.abs(costs[-1] - oldcost) < thres: patience += 1 @@ -104,7 +107,7 @@ def pretrainDSM(model, x_train, t_train, e_train, x_valid, t_valid, e_valid, \ oldcost = costs[-1] - return model + return premodel def trainDSM(model,quantiles , x_train, t_train, e_train, x_valid, t_valid, e_valid, \ @@ -114,7 +117,7 @@ def trainDSM(model,quantiles , x_train, t_train, e_train, x_valid, t_valid, e_va import torch import numpy as np - from tqdm import tqdm + from tqdm import tqdm_notebook as tqdm from dsm import DeepSurvivalMachines from dsm_loss import conditionalLoss from copy import deepcopy @@ -127,9 +130,14 @@ def trainDSM(model,quantiles , x_train, t_train, e_train, x_valid, t_valid, e_va print ("Pretraining the Underlying Distributions...") premodel = pretrainDSM(model, x_train, t_train, e_train, x_valid, t_valid, e_valid, \ - n_iter=10000, lr=1e-3, thres=1e-4) + n_iter=1000, lr=1e-2, thres=1e-4) + + + print(torch.exp(-premodel.scale).cpu().data.numpy()[0], \ + torch.exp(premodel.shape).cpu().data.numpy()[0]) + model = DeepSurvivalMachines(x_train.shape[1], G, mlptyp=mlptyp, HIDDEN=HIDDEN, \ init=(float(premodel.shape[0]), float(premodel.scale[0]) )) @@ -169,6 +177,8 @@ def trainDSM(model,quantiles , x_train, t_train, e_train, x_valid, t_valid, e_va out = computeCIScores(model,quantiles, x_valid, t_valid, e_valid, t_train, e_train) + #print(valid_loss, out) + valid_loss = np.mean(out) costs.append(valid_loss)