From ddedb91e680e48fcd3054c1089e13d4bea61412a Mon Sep 17 00:00:00 2001 From: Chirag Nagpal Date: Wed, 16 Feb 2022 06:48:35 -0500 Subject: [PATCH] modified: auton_survival/__init__.py modified: auton_survival/datasets.py modified: auton_survival/metrics.py --- auton_survival/__init__.py | 35 +++++-- auton_survival/datasets.py | 5 +- auton_survival/metrics.py | 197 +++++++++++++++++++------------------ 3 files changed, 128 insertions(+), 109 deletions(-) diff --git a/auton_survival/__init__.py b/auton_survival/__init__.py index 3adb247..65c1a0c 100644 --- a/auton_survival/__init__.py +++ b/auton_survival/__init__.py @@ -1,4 +1,4 @@ -''' +r''' [![Build Status](https://travis-ci.org/autonlab/DeepSurvivalMachines.svg?branch=master)](https://travis-ci.org/autonlab/DeepSurvivalMachines)     @@ -6,7 +6,7 @@     [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)     -[![GitHub Repo stars](https://img.shields.io/github/stars/autonlab/DeepSurvivalMachines?style=social)](https://github.com/autonlab/DeepSurvivalMachines) +[![GitHub Repo stars](https://img.shields.io/github/stars/autonlab/auton-survival?style=social)](https://github.com/autonlab/auton-survival) Python package `auton_survival` provides a flexible API for various problems @@ -14,7 +14,7 @@ and phenotyping. What is Survival Analysis? ------------------------- +-------------------------- **Survival Analysis** involves estimating when an event of interest, \( T \) would take places given some features or covariates \( X \). In statistics @@ -27,11 +27,13 @@ * There is presence of censoring ie. a large number of instances of data are lost to follow up. -# Auton Survival +Auton Survival +---------------- Repository of reusable code utilities for Survival Analysis projects. -## `auton_survival.datasets` +Dataset Loading and Preprocessing +--------------------------------- Helper functions to load various trial data like `TOPCAT`, `BARI2D` and `ALLHAT`. @@ -41,7 +43,7 @@ features, outcomes = datasets.load_topcat() ``` -## `auton_survival.preprocessing` +### `auton_survival.preprocessing` This module provides a flexible API to perform imputation and data normalization for downstream machine learning models. The module has 3 distinct classes, `Scaler`, `Imputer` and `Preprocessor`. The `Preprocessor` @@ -58,9 +60,14 @@ class is a composite transform that does both Imputing ***and*** Scaling. num_feats=['height', 'weight']) # The `cat_feats` and `num_feats` lists would contain all the categorical and numerical features in the dataset. + ``` -## `auton_survival.estimators` + +Survival Regression +------------------- + +### `auton_survival.estimators` This module provids a wrapper to model BioLINNC datasets with standard survival (time-to-event) analysis methods. @@ -87,7 +94,7 @@ class is a composite transform that does both Imputing ***and*** Scaling. ``` -## `auton_survival.experiments` +### `auton_survival.experiments` Modules to perform standard survival analysis experiments. This module provides a top-level interface to run `auton_survival` Style experiments @@ -118,7 +125,17 @@ class is a composite transform that does both Imputing ***and*** Scaling. print(scores) ``` -## `auton_survival.reporting` + +Phenotyping and Knowledge Discovery +----------------------------------- + +### `auton_survival.phenotyping` + + +Reporting +---------- + +### `auton_survival.reporting` Helper functions to generate standard reports for popular Survival Analysis problems. diff --git a/auton_survival/datasets.py b/auton_survival/datasets.py index 68566db..e39b5e0 100644 --- a/auton_survival/datasets.py +++ b/auton_survival/datasets.py @@ -296,8 +296,9 @@ def load_dataset(dataset='SUPPORT', **kwargs): Returns ---------- tuple: (np.ndarray, np.ndarray, np.ndarray) - A tuple of the form of (x, t, e) where x, t, e are the input covariates, - event times and the censoring indicators respectively. + A tuple of the form of \( (x, t, e) \) where \( x \) + are the input covariates, \( t \) the event times and + \( e \) the censoring indicators. """ sequential = kwargs.get('sequential', False) diff --git a/auton_survival/metrics.py b/auton_survival/metrics.py index 321c76a..afae64e 100644 --- a/auton_survival/metrics.py +++ b/auton_survival/metrics.py @@ -8,6 +8,105 @@ from tqdm import tqdm +def survival_diff_metric(metric, outcomes, treatment_indicator, + weights=None, horizon=None, interpolate=True, + weights_clip=1e-2, + n_bootstrap=None, size_bootstrap=1.0, random_seed=0): + + r"""Metrics for comparing population level survival outcomes across treatment arms. + + Parameters + ---------- + metric : str + The metric to evalute. One of: + + - **`hazard_ratio`** + - **`restricted_mean`** + - **`survival_at`** + outcomes : pd.DataFrame + The outcomes to compare. A pd.Dataframe with columns 'time' and 'event'. + treatment_indicator : np.array + Boolean numpy array of treatment indicators. True means individual was + assigned treatment. + weights : pd.Series + Treatment assignment propensity scores, \( \widehat{\mathbb{P}}(A|X=x) \). + If None, all weights are set to 0.5. Default is None. + horizon : float + The time horizon at which to compare the survival curves. + Must be specified for metric 'restricted_mean' and 'survival_at'. + For 'hazard_ratio' this is ignored. + interpolate : bool + Whether to interpolate the survival curves. Default is True. + weights_clip : float + Weights below this value are clamped. This is to ensure IPTW estimation + is numerically stable. Large weights can result in estimator with high + variance. + n_bootstrap : int + The number of bootstrap samples to use. Default is None. + If None, no bootrapping is performed. + size_bootstrap : float + The fraction of the population to sample for each bootstrap sample. + Default is 1.0. + random_seed : int + The random seed to use for bootstrapping. Default is 0. + Returns: + float or list: The metric value(s) for the specified metric. + """ + + assert metric in ['median', 'hazard_ratio', 'restricted_mean', 'survival_at', 'time_to'] + + if metric in ['restricted_mean', 'survival_at', 'time_to']: + assert horizon is not None, "Please specify Event Horizon" + + if metric == 'hazard_ratio': + raise Warning("WARNING: You are computing Hazard Ratios.\n Make sure you have tested the PH Assumptions.") + if (n_bootstrap is None) and (weights is not None): + raise Warning("Treatment Propensity weights would be ignored, Since no boostrapping is performed."+ + "In order to incorporate IPTW weights please specify number of bootstrap iterations n_bootstrap>=1") + # Bootstrapping ... + if n_bootstrap is not None: + assert isinstance(n_bootstrap, int), '`bootstrap` must be None or int' + + if isinstance(n_bootstrap, int): + print('Bootstrapping... ', n_bootstrap, + ' number of times. This may take a while. Please be Patient...') + + is_treated = treatment_indicator.astype(float) + if weights is None: + weights = 0.5*np.ones(len(outcomes)) + + weights[weights>weights_clip] = 1-weights_clip + weights[weights=1") - # Bootstrapping ... - if n_bootstrap is not None: - assert isinstance(n_bootstrap, int), '`bootstrap` must be None or int' - - if isinstance(n_bootstrap, int): - print('Bootstrapping... ', n_bootstrap, - ' number of times. This may take a while. Please be Patient...') - - is_treated = treatment_indicator.astype(float) - if weights is None: - weights = 0.5*np.ones(len(outcomes)) - - weights[weights>weights_clip] = 1-weights_clip - weights[weights