Skip to content

Commit

Permalink
add comment about hardcoded cytominer path
Browse files Browse the repository at this point in the history
  • Loading branch information
gwaybio committed May 14, 2020
1 parent 4abbbf5 commit 4a2edbc
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 142 deletions.
256 changes: 131 additions & 125 deletions comparison/0.get-cytominer-tool-differences.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -137,51 +137,6 @@
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 3;\n",
" var nbb_unformatted_code = \"# Set batch name\\nbatch = \\\"2016_04_01_a549_48hr_batch1\\\"\\n\\n# Pycytominer plates are saved with 5 floating point decimals\\nround_decimals = 5\\n\\n# Create the output directory\\noutput_dir = pathlib.Path(\\\"results\\\", batch)\\noutput_dir.mkdir(parents=True, exist_ok=True)\";\n",
" var nbb_formatted_code = \"# Set batch name\\nbatch = \\\"2016_04_01_a549_48hr_batch1\\\"\\n\\n# Pycytominer plates are saved with 5 floating point decimals\\nround_decimals = 5\\n\\n# Create the output directory\\noutput_dir = pathlib.Path(\\\"results\\\", batch)\\noutput_dir.mkdir(parents=True, exist_ok=True)\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Set batch name\n",
"batch = \"2016_04_01_a549_48hr_batch1\"\n",
"\n",
"# Pycytominer plates are saved with 5 floating point decimals\n",
"round_decimals = 5\n",
"\n",
"# Create the output directory\n",
"output_dir = pathlib.Path(\"results\", batch)\n",
"output_dir.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 4;\n",
" var nbb_unformatted_code = \"def get_metrics(pycyto_df, cyto_df, features):\\n # Align features\\n pycyto_df = pycyto_df.reindex(features, axis=\\\"columns\\\")\\n cyto_df = cyto_df.reindex(features, axis=\\\"columns\\\")\\n\\n # Assess difference\\n abs_diff = pycyto_df.subtract(cyto_df).abs()\\n mean_diff = abs_diff.mean()\\n median_diff = abs_diff.median()\\n sum_diff = abs_diff.sum()\\n\\n complete_mean_diff = mean_diff.replace([np.inf, -np.inf], np.nan).dropna().mean()\\n complete_median_diff = (\\n median_diff.replace([np.inf, -np.inf], np.nan).dropna().mean()\\n )\\n complete_sum_diff = sum_diff.replace([np.inf, -np.inf], np.nan).dropna().sum()\\n\\n return (\\n mean_diff,\\n complete_mean_diff,\\n median_diff,\\n complete_median_diff,\\n sum_diff,\\n complete_sum_diff,\\n )\\n\\n\\ndef find_feature_diff(pycyto_df, cyto_df, plate, all_features):\\n all_features_df = pd.DataFrame(\\n [\\\"missing\\\"] * len(all_features), index=all_features, columns=[plate]\\n )\\n pycyto_features = set(pycyto_df.columns.tolist())\\n cyto_features = set(cyto_df.columns.tolist())\\n present_both = pycyto_features.intersection(cyto_features)\\n\\n all_features_df.loc[\\n all_features_df.index.isin(pycyto_features), plate\\n ] = \\\"only_pycytominer\\\"\\n all_features_df.loc[\\n all_features_df.index.isin(cyto_features), plate\\n ] = \\\"only_cytominer\\\"\\n all_features_df.loc[\\n all_features_df.index.isin(present_both), plate\\n ] = \\\"present_both\\\"\\n\\n return all_features_df\";\n",
" var nbb_formatted_code = \"def get_metrics(pycyto_df, cyto_df, features):\\n # Align features\\n pycyto_df = pycyto_df.reindex(features, axis=\\\"columns\\\")\\n cyto_df = cyto_df.reindex(features, axis=\\\"columns\\\")\\n\\n # Assess difference\\n abs_diff = pycyto_df.subtract(cyto_df).abs()\\n mean_diff = abs_diff.mean()\\n median_diff = abs_diff.median()\\n sum_diff = abs_diff.sum()\\n\\n complete_mean_diff = mean_diff.replace([np.inf, -np.inf], np.nan).dropna().mean()\\n complete_median_diff = (\\n median_diff.replace([np.inf, -np.inf], np.nan).dropna().mean()\\n )\\n complete_sum_diff = sum_diff.replace([np.inf, -np.inf], np.nan).dropna().sum()\\n\\n return (\\n mean_diff,\\n complete_mean_diff,\\n median_diff,\\n complete_median_diff,\\n sum_diff,\\n complete_sum_diff,\\n )\\n\\n\\ndef find_feature_diff(pycyto_df, cyto_df, plate, all_features):\\n all_features_df = pd.DataFrame(\\n [\\\"missing\\\"] * len(all_features), index=all_features, columns=[plate]\\n )\\n pycyto_features = set(pycyto_df.columns.tolist())\\n cyto_features = set(cyto_df.columns.tolist())\\n present_both = pycyto_features.intersection(cyto_features)\\n\\n all_features_df.loc[\\n all_features_df.index.isin(pycyto_features), plate\\n ] = \\\"only_pycytominer\\\"\\n all_features_df.loc[\\n all_features_df.index.isin(cyto_features), plate\\n ] = \\\"only_cytominer\\\"\\n all_features_df.loc[\\n all_features_df.index.isin(present_both), plate\\n ] = \\\"present_both\\\"\\n\\n return all_features_df\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
Expand Down Expand Up @@ -253,6 +208,52 @@
" return all_features_df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 4;\n",
" var nbb_unformatted_code = \"# Set batch name\\nproject = \\\"2015_10_05_DrugRepurposing_AravindSubramanian_GolubLab_Broad\\\"\\nbatch = \\\"2016_04_01_a549_48hr_batch1\\\"\\n\\n# Pycytominer plates are saved with 5 floating point decimals\\nround_decimals = 5\\n\\n# Create the output directory\\noutput_dir = pathlib.Path(\\\"results\\\", batch)\\noutput_dir.mkdir(parents=True, exist_ok=True)\";\n",
" var nbb_formatted_code = \"# Set batch name\\nproject = \\\"2015_10_05_DrugRepurposing_AravindSubramanian_GolubLab_Broad\\\"\\nbatch = \\\"2016_04_01_a549_48hr_batch1\\\"\\n\\n# Pycytominer plates are saved with 5 floating point decimals\\nround_decimals = 5\\n\\n# Create the output directory\\noutput_dir = pathlib.Path(\\\"results\\\", batch)\\noutput_dir.mkdir(parents=True, exist_ok=True)\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Set batch name\n",
"project = \"2015_10_05_DrugRepurposing_AravindSubramanian_GolubLab_Broad\"\n",
"batch = \"2016_04_01_a549_48hr_batch1\"\n",
"\n",
"# Pycytominer plates are saved with 5 floating point decimals\n",
"round_decimals = 5\n",
"\n",
"# Create the output directory\n",
"output_dir = pathlib.Path(\"results\", batch)\n",
"output_dir.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
Expand All @@ -264,8 +265,8 @@
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 5;\n",
" var nbb_unformatted_code = \"pycytominer_dir = pathlib.Path(\\\"../profiles/backend/\\\", batch)\\ncytominer_dir = pathlib.Path(\\n \\\"/Users/gway/work/projects/\\\"\\n + \\\"2015_10_05_DrugRepurposing_AravindSubramanian_GolubLab_Broad\\\"\\n + f\\\"/workspace/backend/{batch}/\\\"\\n)\";\n",
" var nbb_formatted_code = \"pycytominer_dir = pathlib.Path(\\\"../profiles/backend/\\\", batch)\\ncytominer_dir = pathlib.Path(\\n \\\"/Users/gway/work/projects/\\\"\\n + \\\"2015_10_05_DrugRepurposing_AravindSubramanian_GolubLab_Broad\\\"\\n + f\\\"/workspace/backend/{batch}/\\\"\\n)\";\n",
" var nbb_unformatted_code = \"# Set input directories\\n# Note, pycytominer profiles are processed and exist in this repository\\npycytominer_dir = pathlib.Path(\\\"../profiles/backend/\\\", batch)\\n\\n# Note, cytominer profiles were processed separately and exist in many different locations.\\n# This location represents the exact files that were previously profiled using cytominer.\\n# The files were deposited on the Imaging Platform AWS S3 Bucket and downloaded locally.\\n# To reproduce the analysis, update the appropriate cytominer path.\\ncytominer_dir = pathlib.Path(\\n f\\\"/Users/gway/work/projects/{project}/workspace/backend/{batch}/\\\"\\n)\";\n",
" var nbb_formatted_code = \"# Set input directories\\n# Note, pycytominer profiles are processed and exist in this repository\\npycytominer_dir = pathlib.Path(\\\"../profiles/backend/\\\", batch)\\n\\n# Note, cytominer profiles were processed separately and exist in many different locations.\\n# This location represents the exact files that were previously profiled using cytominer.\\n# The files were deposited on the Imaging Platform AWS S3 Bucket and downloaded locally.\\n# To reproduce the analysis, update the appropriate cytominer path.\\ncytominer_dir = pathlib.Path(\\n f\\\"/Users/gway/work/projects/{project}/workspace/backend/{batch}/\\\"\\n)\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
Expand All @@ -287,11 +288,16 @@
}
],
"source": [
"# Set input directories\n",
"# Note, pycytominer profiles are processed and exist in this repository\n",
"pycytominer_dir = pathlib.Path(\"../profiles/backend/\", batch)\n",
"\n",
"# Note, cytominer profiles were processed separately and exist in many different locations.\n",
"# This location represents the exact files that were previously profiled using cytominer.\n",
"# The files were deposited on the Imaging Platform AWS S3 Bucket and downloaded locally.\n",
"# To reproduce the analysis, update the appropriate cytominer path.\n",
"cytominer_dir = pathlib.Path(\n",
" \"/Users/gway/work/projects/\"\n",
" + \"2015_10_05_DrugRepurposing_AravindSubramanian_GolubLab_Broad\"\n",
" + f\"/workspace/backend/{batch}/\"\n",
" f\"/Users/gway/work/projects/{project}/workspace/backend/{batch}/\"\n",
")"
]
},
Expand Down Expand Up @@ -1065,104 +1071,104 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>SQ00014816</th>\n",
" <td>0.001737</td>\n",
" <td>0.000500</td>\n",
" <td>1188.99632</td>\n",
" <td>0.008944</td>\n",
" <td>0.001521</td>\n",
" <td>5725.56706</td>\n",
" <td>0.002675</td>\n",
" <td>0.001683</td>\n",
" <td>338.91369</td>\n",
" <th>SQ00015196</th>\n",
" <td>0.001272</td>\n",
" <td>0.000513</td>\n",
" <td>870.57825</td>\n",
" <td>0.004736</td>\n",
" <td>0.000990</td>\n",
" <td>3036.90706</td>\n",
" <td>0.002064</td>\n",
" <td>0.001253</td>\n",
" <td>270.23095</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SQ00014819</th>\n",
" <td>0.001230</td>\n",
" <td>0.000507</td>\n",
" <td>842.46790</td>\n",
" <td>0.010358</td>\n",
" <td>0.001685</td>\n",
" <td>6630.55941</td>\n",
" <td>0.002171</td>\n",
" <td>0.001222</td>\n",
" <td>279.21883</td>\n",
" <th>SQ00014820</th>\n",
" <td>0.001546</td>\n",
" <td>0.000514</td>\n",
" <td>1058.23707</td>\n",
" <td>0.021597</td>\n",
" <td>0.001396</td>\n",
" <td>13882.97519</td>\n",
" <td>0.003080</td>\n",
" <td>0.001687</td>\n",
" <td>404.54706</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SQ00015206</th>\n",
" <td>0.001691</td>\n",
" <td>0.000514</td>\n",
" <td>1158.06582</td>\n",
" <td>0.004321</td>\n",
" <td>0.001692</td>\n",
" <td>2766.17526</td>\n",
" <td>0.004534</td>\n",
" <td>0.002043</td>\n",
" <td>583.21178</td>\n",
" <th>SQ00015058</th>\n",
" <td>0.001069</td>\n",
" <td>0.000491</td>\n",
" <td>732.00186</td>\n",
" <td>0.006276</td>\n",
" <td>0.001059</td>\n",
" <td>4034.42844</td>\n",
" <td>0.001824</td>\n",
" <td>0.001007</td>\n",
" <td>234.65259</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SQ00015130</th>\n",
" <td>0.002783</td>\n",
" <td>0.000534</td>\n",
" <td>1905.44884</td>\n",
" <td>0.008791</td>\n",
" <td>0.002357</td>\n",
" <td>5637.74964</td>\n",
" <td>0.004945</td>\n",
" <td>0.002238</td>\n",
" <td>620.90409</td>\n",
" <th>SQ00015046</th>\n",
" <td>0.001878</td>\n",
" <td>0.000500</td>\n",
" <td>1285.96016</td>\n",
" <td>0.015458</td>\n",
" <td>0.001509</td>\n",
" <td>9912.96052</td>\n",
" <td>0.002283</td>\n",
" <td>0.001328</td>\n",
" <td>311.25483</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SQ00015133</th>\n",
" <td>0.001486</td>\n",
" <td>0.000509</td>\n",
" <td>1017.17515</td>\n",
" <td>0.004028</td>\n",
" <td>0.001618</td>\n",
" <td>2575.43054</td>\n",
" <td>0.002996</td>\n",
" <td>0.001604</td>\n",
" <td>384.28666</td>\n",
" <th>SQ00015210</th>\n",
" <td>0.002196</td>\n",
" <td>0.000530</td>\n",
" <td>1503.62634</td>\n",
" <td>0.003769</td>\n",
" <td>0.001419</td>\n",
" <td>2413.93925</td>\n",
" <td>0.003181</td>\n",
" <td>0.001719</td>\n",
" <td>414.09969</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" level_3_complete_mean_diff level_3_complete_median_diff \\\n",
"SQ00014816 0.001737 0.000500 \n",
"SQ00014819 0.001230 0.000507 \n",
"SQ00015206 0.001691 0.000514 \n",
"SQ00015130 0.002783 0.000534 \n",
"SQ00015133 0.001486 0.000509 \n",
"SQ00015196 0.001272 0.000513 \n",
"SQ00014820 0.001546 0.000514 \n",
"SQ00015058 0.001069 0.000491 \n",
"SQ00015046 0.001878 0.000500 \n",
"SQ00015210 0.002196 0.000530 \n",
"\n",
" level_3_complete_sum_diff level_4a_complete_mean_diff \\\n",
"SQ00014816 1188.99632 0.008944 \n",
"SQ00014819 842.46790 0.010358 \n",
"SQ00015206 1158.06582 0.004321 \n",
"SQ00015130 1905.44884 0.008791 \n",
"SQ00015133 1017.17515 0.004028 \n",
"SQ00015196 870.57825 0.004736 \n",
"SQ00014820 1058.23707 0.021597 \n",
"SQ00015058 732.00186 0.006276 \n",
"SQ00015046 1285.96016 0.015458 \n",
"SQ00015210 1503.62634 0.003769 \n",
"\n",
" level_4a_complete_median_diff level_4a_complete_sum_diff \\\n",
"SQ00014816 0.001521 5725.56706 \n",
"SQ00014819 0.001685 6630.55941 \n",
"SQ00015206 0.001692 2766.17526 \n",
"SQ00015130 0.002357 5637.74964 \n",
"SQ00015133 0.001618 2575.43054 \n",
"SQ00015196 0.000990 3036.90706 \n",
"SQ00014820 0.001396 13882.97519 \n",
"SQ00015058 0.001059 4034.42844 \n",
"SQ00015046 0.001509 9912.96052 \n",
"SQ00015210 0.001419 2413.93925 \n",
"\n",
" level_4b_complete_mean_diff level_4b_complete_median_diff \\\n",
"SQ00014816 0.002675 0.001683 \n",
"SQ00014819 0.002171 0.001222 \n",
"SQ00015206 0.004534 0.002043 \n",
"SQ00015130 0.004945 0.002238 \n",
"SQ00015133 0.002996 0.001604 \n",
"SQ00015196 0.002064 0.001253 \n",
"SQ00014820 0.003080 0.001687 \n",
"SQ00015058 0.001824 0.001007 \n",
"SQ00015046 0.002283 0.001328 \n",
"SQ00015210 0.003181 0.001719 \n",
"\n",
" level_4b_complete_sum_diff \n",
"SQ00014816 338.91369 \n",
"SQ00014819 279.21883 \n",
"SQ00015206 583.21178 \n",
"SQ00015130 620.90409 \n",
"SQ00015133 384.28666 "
"SQ00015196 270.23095 \n",
"SQ00014820 404.54706 \n",
"SQ00015058 234.65259 \n",
"SQ00015046 311.25483 \n",
"SQ00015210 414.09969 "
]
},
"execution_count": 17,
Expand Down
Loading

0 comments on commit 4a2edbc

Please sign in to comment.