From e926c5de8ca55e0ea4f29155e769ad5d2a9ddb94 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 26 Sep 2016 16:28:16 -0400 Subject: [PATCH] Review comments: spacing and results doc --- cognoml/analysis.py | 10 +- hippo-output-schema.json | 345 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 353 insertions(+), 2 deletions(-) create mode 100644 hippo-output-schema.json diff --git a/cognoml/analysis.py b/cognoml/analysis.py index 06c88cc..6bf403f 100644 --- a/cognoml/analysis.py +++ b/cognoml/analysis.py @@ -31,6 +31,12 @@ def classify(sample_id, mutation_status, **kwargs): Sample IDs of the observations. mutation_status : list Mutation status (0 or 1) of each sample. + + Returns + ------- + results : dict + A JSON-serializable object. OrderedDicts are used for dictionaries to + enable reproducibile export. """ results = collections.OrderedDict() @@ -80,8 +86,8 @@ def classify(sample_id, mutation_status, **kwargs): performance = collections.OrderedDict() for part, df in ('training', obs_train_df), ('testing', obs_test_df): - y_true=df.status - y_pred=df.predicted_status + y_true = df.status + y_pred = df.predicted_status metrics = utils.class_metrics(y_true, y_pred) metrics.update(utils.threshold_metrics(y_true, y_pred)) performance[part] = utils.value_map(metrics, round, ndigits=5) diff --git a/hippo-output-schema.json b/hippo-output-schema.json new file mode 100644 index 0000000..0f7283f --- /dev/null +++ b/hippo-output-schema.json @@ -0,0 +1,345 @@ +{ + "required": [ + "dimensions", + "grid_search", + "model", + "observations", + "performance" + ], + "type": "object", + "properties": { + "performance": { + "required": [ + "cv", + "testing", + "training" + ], + "type": "object", + "properties": { + "cv": { + "required": [ + "auroc" + ], + "type": "object", + "properties": { + "auroc": { + "type": "number" + } + } + }, + "training": { + "required": [ + "accuracy", + "auprc", + "auroc", + "balanced_accuracy", + "f1", + "precision", + "recall" + ], + "type": "object", + "properties": { + "accuracy": { + "type": "number" + }, + "recall": { + "type": "number" + }, + "auroc": { + "type": "number" + }, + "balanced_accuracy": { + "type": "number" + }, + "precision": { + "type": "number" + }, + "auprc": { + "type": "number" + }, + "f1": { + "type": "number" + } + } + }, + "testing": { + "required": [ + "accuracy", + "auprc", + "auroc", + "balanced_accuracy", + "f1", + "precision", + "recall" + ], + "type": "object", + "properties": { + "accuracy": { + "type": "number" + }, + "recall": { + "type": "number" + }, + "auroc": { + "type": "number" + }, + "balanced_accuracy": { + "type": "number" + }, + "precision": { + "type": "number" + }, + "auprc": { + "type": "number" + }, + "f1": { + "type": "number" + } + } + } + } + }, + "grid_search": { + "required": [ + "fold_scores", + "mean_scores" + ], + "type": "object", + "properties": { + "mean_scores": { + "required": [ + "columns", + "data" + ], + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "columns": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "fold_scores": { + "required": [ + "columns", + "data" + ], + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "columns": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } + }, + "model": { + "required": [ + "class", + "features", + "module", + "parameters" + ], + "type": "object", + "properties": { + "class": { + "type": "string" + }, + "parameters": { + "required": [ + "alpha", + "average", + "class_weight", + "epsilon", + "eta0", + "fit_intercept", + "l1_ratio", + "learning_rate", + "loss", + "n_iter", + "n_jobs", + "penalty", + "power_t", + "random_state", + "shuffle", + "verbose", + "warm_start" + ], + "type": "object", + "properties": { + "penalty": { + "type": "string" + }, + "shuffle": { + "type": "boolean" + }, + "n_iter": { + "type": "integer" + }, + "fit_intercept": { + "type": "boolean" + }, + "eta0": { + "type": "number" + }, + "epsilon": { + "type": "number" + }, + "average": { + "type": "boolean" + }, + "random_state": { + "type": "integer" + }, + "alpha": { + "type": "number" + }, + "power_t": { + "type": "number" + }, + "n_jobs": { + "type": "integer" + }, + "verbose": { + "type": "integer" + }, + "l1_ratio": { + "type": "number" + }, + "loss": { + "type": "string" + }, + "learning_rate": { + "type": "string" + }, + "warm_start": { + "type": "boolean" + }, + "class_weight": { + "type": "string" + } + } + }, + "features": { + "required": [ + "columns", + "data" + ], + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": [ + "number", + "string" + ] + } + } + }, + "columns": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "module": { + "type": "string" + } + } + }, + "observations": { + "required": [ + "columns", + "data" + ], + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": [ + "number", + "string" + ] + } + } + }, + "columns": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "dimensions": { + "required": [ + "features", + "negatives", + "observations_selected", + "observations_unselected", + "positive_prevalence", + "positives", + "testing_observations", + "training_observations" + ], + "type": "object", + "properties": { + "testing_observations": { + "type": "integer" + }, + "training_observations": { + "type": "integer" + }, + "positives": { + "type": "integer" + }, + "positive_prevalence": { + "type": "number" + }, + "negatives": { + "type": "integer" + }, + "features": { + "type": "integer" + }, + "observations_unselected": { + "type": "integer" + }, + "observations_selected": { + "type": "integer" + } + } + } + } +}