Skip to content

Commit

Permalink
Figures for revision
Browse files Browse the repository at this point in the history
  • Loading branch information
JulianKlug committed Apr 3, 2024
1 parent 004b0ce commit 29ebf34
Show file tree
Hide file tree
Showing 18 changed files with 2,207 additions and 702 deletions.
124 changes: 124 additions & 0 deletions meta_data/imaging_meta_data.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2024-04-03T14:25:43.481497Z",
"start_time": "2024-04-03T14:25:43.479279Z"
}
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3dbb8e89f490b071",
"metadata": {},
"outputs": [],
"source": [
"imaging_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/stroke_research/geneva_stroke_unit_dataset/data/perfusion_imaging_data/random_subset_for_imaging_extraction.xlsx'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8fec3bfc2ca0804",
"metadata": {},
"outputs": [],
"source": [
"imaging_data_df = pd.read_excel(imaging_data_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a6f22df3a61f7151",
"metadata": {},
"outputs": [],
"source": [
"imaging_data_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c17bc33aacb6df67",
"metadata": {},
"outputs": [],
"source": [
"n_patients_selected = imaging_data_df.shape[0]\n",
"print(f'Number of patients selected: {n_patients_selected}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48f6992787c67e97",
"metadata": {},
"outputs": [],
"source": [
"n_patients_with_perfusion_imaging = imaging_data_df['CTP_present'].sum()\n",
"print(f'Number of patients with perfusion imaging: {n_patients_with_perfusion_imaging} ({n_patients_with_perfusion_imaging/n_patients_selected*100:.2f}%)')\n",
"n_missing_perfusion_imaging = n_patients_selected - n_patients_with_perfusion_imaging\n",
"print(f'Number of patients without perfusion imaging: {n_missing_perfusion_imaging} ({n_missing_perfusion_imaging/n_patients_selected*100:.1f}%)')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77d2d74fc785bfb5",
"metadata": {},
"outputs": [],
"source": [
"imaging_params = ['T10', 'T8', 'T6', 'T4', 'CBF']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ad704555a5f54afa",
"metadata": {},
"outputs": [],
"source": [
"# get max for each parameter\n",
"max_values = imaging_data_df[imaging_params].max()\n",
"print(max_values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce7210d5ccdb9b83",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
126 changes: 108 additions & 18 deletions meta_data/number_of_datapoints.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-17T11:54:40.131356Z",
"start_time": "2023-08-17T11:54:38.631454Z"
"end_time": "2024-03-04T15:26:23.988812Z",
"start_time": "2024-03-04T15:26:23.979340Z"
}
},
"outputs": [],
Expand All @@ -21,13 +21,14 @@
"id": "f45adda9b4cf666c",
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-17T11:55:11.489080Z",
"start_time": "2023-08-17T11:55:11.481973Z"
"end_time": "2024-03-04T15:27:26.491841Z",
"start_time": "2024-03-04T15:27:26.484353Z"
}
},
"outputs": [],
"source": [
"missingness_data_path = '/Users/jk1/temp/opsum_prepro_output/gsu_prepro_01012023_233050/logs_01012023_233050/missingness.csv'"
"missingness_data_path = '/Users/jk1/temp/opsum_prepro_output/gsu_prepro_01012023_233050/logs_01012023_233050/missingness.csv'\n",
"features_data_path = '/Users/jk1/temp/opsum_prepro_output/gsu_prepro_01012023_233050/preprocessed_features_01012023_233050.csv'"
]
},
{
Expand All @@ -36,13 +37,24 @@
"id": "58b321fddfdee71",
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-17T11:55:14.150679Z",
"start_time": "2023-08-17T11:55:14.129991Z"
"end_time": "2024-03-04T15:28:19.009425Z",
"start_time": "2024-03-04T15:28:04.474567Z"
}
},
"outputs": [],
"source": [
"missingness_df = pd.read_csv(missingness_data_path)"
"missingness_df = pd.read_csv(missingness_data_path)\n",
"features_df = pd.read_csv(features_data_path)"
]
},
{
"cell_type": "markdown",
"id": "453a14adfba59798",
"metadata": {
"collapsed": false
},
"source": [
"Compute datapoints from missingness logs"
]
},
{
Expand All @@ -51,8 +63,8 @@
"id": "f218389f748b52c8",
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-17T12:01:40.205640Z",
"start_time": "2023-08-17T12:01:40.199785Z"
"end_time": "2024-03-04T15:26:24.023796Z",
"start_time": "2024-03-04T15:26:24.012096Z"
}
},
"outputs": [],
Expand All @@ -67,8 +79,8 @@
"id": "d5a26108dcdfb251",
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-17T12:01:40.409068Z",
"start_time": "2023-08-17T12:01:40.400954Z"
"end_time": "2024-03-04T15:26:24.025205Z",
"start_time": "2024-03-04T15:26:24.018568Z"
}
},
"outputs": [],
Expand All @@ -82,8 +94,8 @@
"id": "56c144032cd549d8",
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-17T12:01:40.835104Z",
"start_time": "2023-08-17T12:01:40.830606Z"
"end_time": "2024-03-04T15:26:24.050675Z",
"start_time": "2024-03-04T15:26:24.022837Z"
}
},
"outputs": [],
Expand All @@ -99,8 +111,8 @@
"id": "8d2f5add049b3471",
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-17T12:01:44.750147Z",
"start_time": "2023-08-17T12:01:44.737256Z"
"end_time": "2024-03-04T15:26:24.116486Z",
"start_time": "2024-03-04T15:26:24.108130Z"
}
},
"outputs": [],
Expand All @@ -114,8 +126,8 @@
"id": "476eb17f4397c782",
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-17T12:03:59.049302Z",
"start_time": "2023-08-17T12:03:59.036585Z"
"end_time": "2024-03-04T15:26:24.127301Z",
"start_time": "2024-03-04T15:26:24.115644Z"
}
},
"outputs": [],
Expand All @@ -124,10 +136,88 @@
"n_total_datapoints / n_total"
]
},
{
"cell_type": "markdown",
"id": "4153a35d1c82f905",
"metadata": {
"collapsed": false
},
"source": [
"Compute datapoints from features DF"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b33df4482e2802d9",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-04T15:29:01.677360Z",
"start_time": "2024-03-04T15:29:01.660229Z"
}
},
"outputs": [],
"source": [
"features_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5126acbc577b71f",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-04T18:59:56.926176Z",
"start_time": "2024-03-04T18:59:55.571365Z"
}
},
"outputs": [],
"source": [
"not_imputed_features_df = features_df[features_df['source'].isin(['EHR', 'stroke_registry'])]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "537cda365366611f",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-04T19:04:42.573140Z",
"start_time": "2024-03-04T19:04:42.074521Z"
}
},
"outputs": [],
"source": [
"# number of rows per case_admission_id\n",
"mean_n_obs_per_cid = not_imputed_features_df.groupby('case_admission_id').size().mean()\n",
"median_n_obs_per_cid = not_imputed_features_df.groupby('case_admission_id').size().median()\n",
"p5_n_obs_per_cid = not_imputed_features_df.groupby('case_admission_id').size().quantile(0.05)\n",
"p95_n_obs_per_cid = not_imputed_features_df.groupby('case_admission_id').size().quantile(0.95)\n",
"\n",
"print(f'mean_n_obs_per_cid: {mean_n_obs_per_cid}')\n",
"print(f'median_n_obs_per_cid: {median_n_obs_per_cid} (p5: {p5_n_obs_per_cid}, p95: {p95_n_obs_per_cid})')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "293df2d10cd310e5",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-04T19:05:02.622751Z",
"start_time": "2024-03-04T19:05:02.615617Z"
}
},
"outputs": [],
"source": [
"n_possible_obs_per_cid = 72 * 84\n",
"n_possible_obs_per_cid"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "971c6091123b5b68",
"metadata": {},
"outputs": [],
"source": []
Expand Down
Loading

0 comments on commit 29ebf34

Please sign in to comment.