angelolab · alex-l-kong · Feb 27, 2024 · Jan 22, 2024 · Jan 24, 2024 · Jan 24, 2024
diff --git a/src/ark/utils/data_utils.py b/src/ark/utils/data_utils.py
@@ -446,6 +446,9 @@ def generate_and_save_cell_cluster_masks(
     cluster_map = cmd.mapping.filter([cmd.cluster_column, cmd.cluster_id_column])
     cluster_map = cluster_map.drop_duplicates()
 
+    # drop the cluster_id column from updated_cluster_map if it already exists, otherwise do nothing
+    gui_map = gui_map.drop(columns="cluster_id", errors="ignore")
+
     # add a cluster_id column corresponding to the new mask integers
     updated_cluster_map = gui_map.merge(cluster_map, on=[cmd.cluster_column], how="left")
     updated_cluster_map.to_csv(cluster_id_to_name_path, index=False)

diff --git a/tests/utils/data_utils_test.py b/tests/utils/data_utils_test.py
@@ -359,33 +359,35 @@ def test_generate_and_save_cell_cluster_masks(tmp_path: pathlib.Path, sub_dir, n
     cluster_mapping.to_csv(os.path.join(tmp_path, 'cluster_mapping.csv'), index=False)
 
     # test various batch_sizes, no sub_dir, name_suffix = ''.
-    data_utils.generate_and_save_cell_cluster_masks(
-        fovs=fovs,
-        save_dir=os.path.join(tmp_path, 'cell_masks'),
-        seg_dir=tmp_path,
-        cell_data=consensus_data_som,
-        cluster_id_to_name_path=mapping_file_path,
-        fov_col=settings.FOV_ID,
-        label_col=settings.CELL_LABEL,
-        cell_cluster_col='cell_som_cluster',
-        seg_suffix='_whole_cell.tiff',
-        sub_dir=sub_dir,
-        name_suffix=name_suffix
-    )
+    # NOTE: test is run twice to ensure that results are same even if existing cluster_id found
+    for i in np.arange(2):
+        data_utils.generate_and_save_cell_cluster_masks(
+            fovs=fovs,
+            save_dir=os.path.join(tmp_path, 'cell_masks'),
+            seg_dir=tmp_path,
+            cell_data=consensus_data_som,
+            cluster_id_to_name_path=mapping_file_path,
+            fov_col=settings.FOV_ID,
+            label_col=settings.CELL_LABEL,
+            cell_cluster_col='cell_som_cluster',
+            seg_suffix='_whole_cell.tiff',
+            sub_dir=sub_dir,
+            name_suffix=name_suffix
+        )
 
-    # open each cell mask and make sure the shape and values are valid
-    if sub_dir is None:
-        sub_dir = ''
+        # open each cell mask and make sure the shape and values are valid
+        if sub_dir is None:
+            sub_dir = ''
 
-    for i, fov in enumerate(fovs):
-        fov_name = fov + name_suffix + ".tiff"
-        cell_mask = io.imread(os.path.join(tmp_path, 'cell_masks', sub_dir, fov_name))
-        actual_img_dims = (40, 40) if i < fov_size_split else (20, 20)
-        assert cell_mask.shape == actual_img_dims
-        assert np.all(cell_mask <= 5)
+        for i, fov in enumerate(fovs):
+            fov_name = fov + name_suffix + ".tiff"
+            cell_mask = io.imread(os.path.join(tmp_path, 'cell_masks', sub_dir, fov_name))
+            actual_img_dims = (40, 40) if i < fov_size_split else (20, 20)
+            assert cell_mask.shape == actual_img_dims
+            assert np.all(cell_mask <= 5)
 
-    new_cluster_mapping = pd.read_csv(mapping_file_path)
-    assert "cluster_id" in new_cluster_mapping.columns
+        new_cluster_mapping = pd.read_csv(mapping_file_path)
+        assert "cluster_id" in new_cluster_mapping.columns
 
 
 def test_generate_pixel_cluster_mask():