WayScience · jenna-tomkinson · May 22, 2024 · May 21, 2024 · May 21, 2024 · May 22, 2024
diff --git a/4.figures/figures/all_genotypes_montage.png b/4.figures/figures/all_genotypes_montage.png
diff --git a/4.figures/figures/main_figure_1_workflow.png b/4.figures/figures/main_figure_1_workflow.png
diff --git a/4.figures/figures/workflow.png b/4.figures/figures/workflow.png
diff --git a/4.figures/main_figure_1/1.find_sc_crops.ipynb b/4.figures/main_figure_1/1.find_sc_crops.ipynb
@@ -0,0 +1,303 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Generate random single-cell crops of cells per genotype from Plate 5 for main figure"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pathlib\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import cv2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set paths and variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Images are accessible in the nf1_schwanncell_data repo\n",
+    "path_to_images_dir = pathlib.Path(\n",
+    "    \"../../../nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_5\"\n",
+    ")  # Focus on plate 5\n",
+    "\n",
+    "# Path to wear single-cell crops are saved\n",
+    "path_to_sc_dir = pathlib.Path(\"./sc_crops\")\n",
+    "path_to_sc_dir.mkdir(exist_ok=True)\n",
+    "\n",
+    "# URL path to annotated parquet file from Plate 5 (versioned)\n",
+    "url = \"https://github.com/WayScience/nf1_cellpainting_data/raw/main/3.processing_features/data/single_cell_profiles/Plate_5_sc_annotated.parquet\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load in annotated data frame and only include metadata \n",
+    "\n",
+    "NOTE: We normally use random seed = 0 but we have changed it here to find best random cells for viewing that are not cells going through mitosis or cell death."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(2, 7)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Metadata_Well</th>\n",
+       "      <th>Metadata_Site</th>\n",
+       "      <th>Metadata_genotype</th>\n",
+       "      <th>Metadata_Nuclei_Location_Center_X</th>\n",
+       "      <th>Metadata_Nuclei_Location_Center_Y</th>\n",
+       "      <th>Metadata_Cells_Location_Center_X</th>\n",
+       "      <th>Metadata_Cells_Location_Center_Y</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>F10</td>\n",
+       "      <td>12</td>\n",
+       "      <td>Null</td>\n",
+       "      <td>602.916622</td>\n",
+       "      <td>232.647782</td>\n",
+       "      <td>602.217532</td>\n",
+       "      <td>186.650247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>E2</td>\n",
+       "      <td>13</td>\n",
+       "      <td>WT</td>\n",
+       "      <td>536.981504</td>\n",
+       "      <td>146.001233</td>\n",
+       "      <td>529.927170</td>\n",
+       "      <td>145.713534</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Metadata_Well Metadata_Site Metadata_genotype  \\\n",
+       "0           F10            12              Null   \n",
+       "1            E2            13                WT   \n",
+       "\n",
+       "   Metadata_Nuclei_Location_Center_X  Metadata_Nuclei_Location_Center_Y  \\\n",
+       "0                         602.916622                         232.647782   \n",
+       "1                         536.981504                         146.001233   \n",
+       "\n",
+       "   Metadata_Cells_Location_Center_X  Metadata_Cells_Location_Center_Y  \n",
+       "0                        602.217532                        186.650247  \n",
+       "1                        529.927170                        145.713534  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This random seed value does not follow the conventions of the lab, but yields the best visualizations of single-cells\n",
+    "random_seed_value = 58\n",
+    "# Set a seed for reproducibility\n",
+    "np.random.seed(random_seed_value)\n",
+    "\n",
+    "# Load in plate 5 data frame\n",
+    "plate5_df = pd.read_parquet(\n",
+    "    url,\n",
+    "    columns=[\n",
+    "        \"Metadata_Well\",\n",
+    "        \"Metadata_Site\",\n",
+    "        \"Metadata_genotype\",\n",
+    "        \"Metadata_Nuclei_Location_Center_X\",\n",
+    "        \"Metadata_Nuclei_Location_Center_Y\",\n",
+    "        \"Metadata_Cells_Location_Center_X\",\n",
+    "        \"Metadata_Cells_Location_Center_Y\",\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "# Exclude rows where \"Metadata_genotype\" is \"HET\" due to not using during the training of the model\n",
+    "plate5_df = plate5_df[plate5_df[\"Metadata_genotype\"] != \"HET\"]\n",
+    "\n",
+    "# Select one random row per \"Metadata_genotype\"\n",
+    "plate5_df = (\n",
+    "    plate5_df.groupby(\"Metadata_genotype\")\n",
+    "    .apply(lambda x: x.sample(1, random_state=random_seed_value))\n",
+    "    .reset_index(drop=True)\n",
+    ")\n",
+    "\n",
+    "print(plate5_df.shape)\n",
+    "plate5_df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set up dictionary to hold info to find random single-cells per genotype"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'Null_genotype': {'well': 'F10', 'site': '12', 'location_center_x': 602.9166221272047, 'location_center_y': 232.64778193479424}, 'WT_genotype': {'well': 'E2', 'site': '13', 'location_center_x': 536.9815043156597, 'location_center_y': 146.00123304562268}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# B1_01_1_1_DAPI_001_illumcorrect.tiff\n",
+    "\n",
+    "# Create dictionary to run through each single-cell to find crop\n",
+    "random_sc_dict = {}\n",
+    "for _, row in plate5_df.head().iterrows():\n",
+    "    genotype_key = f\"{row['Metadata_genotype']}_genotype\"\n",
+    "    random_sc_dict[genotype_key] = {\n",
+    "        \"well\": row[\"Metadata_Well\"],\n",
+    "        \"site\": row[\"Metadata_Site\"],\n",
+    "        \"location_center_x\": row[\"Metadata_Nuclei_Location_Center_X\"],\n",
+    "        \"location_center_y\": row[\"Metadata_Nuclei_Location_Center_Y\"],\n",
+    "    }\n",
+    "\n",
+    "# Check the created dictionary\n",
+    "print(random_sc_dict)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate single-cell crops"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define a mapping for the suffixes\n",
+    "channel_mapping = {1: \"DAPI\", 2: \"GFP\", 3: \"CY5\", 4: \"RFP\"}\n",
+    "\n",
+    "for genotype, info in random_sc_dict.items():\n",
+    "    # Initialize a list to store file paths\n",
+    "    file_paths = []\n",
+    "\n",
+    "    # Create file paths with well, site, and channel\n",
+    "    for i in range(1, 5):  # Update the range to start from 1\n",
+    "        channel = channel_mapping[i]\n",
+    "        filename = f\"{path_to_images_dir}/{info['well']}_01_{i}_{info['site']}_{channel}_001_illumcorrect.tiff\"\n",
+    "        file_paths.append(filename)\n",
+    "\n",
+    "        # Read the image\n",
+    "        channel_image = cv2.imread(filename, cv2.IMREAD_UNCHANGED)\n",
+    "\n",
+    "        # Use the location_center_x and location_center_y to create a crop\n",
+    "        center_x = info.get(\"location_center_x\")\n",
+    "        center_y = info.get(\"location_center_y\")\n",
+    "\n",
+    "        # Crop dimensions\n",
+    "        crop_size = 250\n",
+    "        half_crop = crop_size // 2\n",
+    "\n",
+    "        # Ensure the center coordinates are valid\n",
+    "        if center_x is not None and center_y is not None:\n",
+    "            # Calculate crop boundaries\n",
+    "            top_left_x = max(int(center_x - half_crop), 0)\n",
+    "            top_left_y = max(int(center_y - half_crop), 0)\n",
+    "            bottom_right_x = min(int(center_x + half_crop), channel_image.shape[1])\n",
+    "            bottom_right_y = min(int(center_y + half_crop), channel_image.shape[0])\n",
+    "\n",
+    "            # Perform cropping\n",
+    "            cropped_channel = channel_image[\n",
+    "                top_left_y:bottom_right_y, top_left_x:bottom_right_x\n",
+    "            ]\n",
+    "\n",
+    "            # Ensure the cropped image is of size 250x250\n",
+    "            cropped_channel = cv2.resize(cropped_channel, (crop_size, crop_size))\n",
+    "\n",
+    "            # Save the cropped image with single_cell and channel information\n",
+    "            output_filename = f\"{path_to_sc_dir}/{genotype}_{channel}_cropped.png\"\n",
+    "            cv2.imwrite(output_filename, cropped_channel)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python_analysis_cfret",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}