SBC-Utrecht · McHaillet · May 8, 2024 · Feb 19, 2024 · Feb 19, 2024 · Feb 21, 2024
diff --git a/src/pytom_tm/entry_points.py b/src/pytom_tm/entry_points.py
@@ -503,14 +503,16 @@ def extract_candidates(argv=None):
     )
     parser.add_argument(
         "--number-of-false-positives",
-        type=int,
+        type=float,
         required=False,
         action=LargerThanZero,
+        default=1.,
         help="Number of false positives to determine the false alarm rate. Here one "
-        "can increase the recall of the particle of interest at the expense of more "
-        "false positives. The default value of 1 is recommended for particles that can "
-        "be distinguished well from the background (high specificity).",
-        default=1,
+             "can increase the recall of the particle of interest at the expense "
+             "of more false positives. The default value of 1 is recommended for "
+             "particles that can be distinguished well from the background (high "
+             "specificity). The value can also be set between 0 and 1 to make "
+             "the cut-off more restrictive.",
     )
     parser.add_argument(
         "-r",
@@ -539,6 +541,17 @@ def extract_candidates(argv=None):
         required=False,
         help="Attempt to filter only sharp correlation peaks with a tophat transform",
     )
+    parser.add_argument(
+        '--tophat-connectivity', 
+        type=int, 
+        required=False, 
+        default=1,
+        action=LargerThanZero,
+        help="Set kernel connectivity for ndimage binary structure used for the "
+             "tophat transform. Integer value in range 1-3. 1 is the most "
+             "restrictive, 3 the least restrictive. Generally recommended to "
+             "leave at 1."
+    )
     parser.add_argument(
         "--log",
         type=str,
@@ -560,6 +573,7 @@ def extract_candidates(argv=None):
         n_false_positives=args.number_of_false_positives,
         tomogram_mask_path=args.tomogram_mask,
         tophat_filter=args.tophat_filter,
+        tophat_connectivity=args.tophat_connectivity,
     )
 
     # write out as a RELION type starfile

diff --git a/src/pytom_tm/extract.py b/src/pytom_tm/extract.py
@@ -28,8 +28,9 @@
 def predict_tophat_mask(
         score_volume: npt.NDArray[float],
         output_path: Optional[pathlib.Path] = None,
-        n_false_positives: int = 1,
-        create_plot: bool = True
+        n_false_positives: float = 1.,
+        create_plot: bool = True,
+        tophat_connectivity: int = 1
 ) -> npt.NDArray[bool]:
     """This function gets as input a score map and returns a peak mask as determined with a tophat transform.
 
@@ -49,10 +50,12 @@ def predict_tophat_mask(
         template matching score map
     output_path: Optional[pathlib.Path], default None
         if provided (and plotting is available), write a figure of the fit to the output folder
-    n_false_positives: int, default 1
+    n_false_positives: float, default 1.0
         number of false positive for error function cutoff calculation
     create_plot: bool, default True
         whether to plot the gaussian fit and cut-off estimation
+    tophat_connectivity: int, default 1
+        connectivity of binary structure
 
     Returns
     -------
@@ -63,7 +66,7 @@ def predict_tophat_mask(
         score_volume,
         structure=ndimage.generate_binary_structure(
             rank=3,
-            connectivity=1
+            connectivity=tophat_connectivity
         )
     )
     y, bins = np.histogram(tophat.flatten(), bins=50)
@@ -127,10 +130,11 @@ def extract_particles(
         particle_radius_px: int,
         n_particles: int,
         cut_off: Optional[float] = None,
-        n_false_positives: int = 1,
+        n_false_positives: float = 1.,
         tomogram_mask_path: Optional[pathlib.Path] = None,
         tophat_filter: bool = False,
-        create_plot: bool = True
+        create_plot: bool = True,
+        tophat_connectivity: int = 1,
 ) -> tuple[pd.DataFrame, list[float, ...]]:
     """
     Parameters
@@ -143,15 +147,17 @@ def extract_particles(
         maximum number of particles to extract
     cut_off: Optional[float]
         manually override the automated score cut-off estimation, value between 0 and 1
-    n_false_positives: int
+    n_false_positives: float, default 1.0
         tune the number of false positives to be included for automated error function cut-off estimation:
-        should be an integer >= 1
+        should be a float > 0
     tomogram_mask_path: Optional[pathlib.Path]
         path to a tomographic binary mask for extraction
     tophat_filter: bool
         attempt to only select sharp peaks with the tophat filter
     create_plot: bool, default True
         flag for creating extraction plots
+    tophat_connectivity: int, default 1
+        connectivity of kernel for tophat transform
 
     Returns
     -------
@@ -171,7 +177,8 @@ def extract_particles(
             score_volume,
             output_path=job.output_dir.joinpath(f'{job.tomo_id}_tophat_filter.svg'),
             n_false_positives=n_false_positives,
-            create_plot=create_plot
+            create_plot=create_plot,
+            tophat_connectivity=tophat_connectivity,
         )
         score_volume *= predicted_peaks  # multiply with predicted peaks to keep only those