Merge pull request #679 from cmurray-astro/log_binning

Adding log binning option for source density
BEAST-Fitting · Dec 10, 2020 · b1950c6 · b1950c6
2 parents a7d4787 + 7e809e7
commit b1950c6
Show file tree

Hide file tree

Showing 7 changed files with 105 additions and 41 deletions.
diff --git a/beast/observationmodel/ast/make_ast_input_list.py b/beast/observationmodel/ast/make_ast_input_list.py
@@ -211,8 +211,6 @@ def pick_models_toothpick_style(
                     counter * chunksize, successes, successes / counter / chunksize
                 )
             )
-            print("Bin array:")
-            print(bin_count)
 
     # Gather the selected model seds in a table
     sedsMags = Table(sedsMags[chosen_idxs, :], names=filters)

diff --git a/beast/observationmodel/ast/make_ast_xy_list.py b/beast/observationmodel/ast/make_ast_xy_list.py
@@ -16,6 +16,7 @@ def pick_positions_from_map(
     catalog,
     chosen_seds,
     input_map,
+    bin_mode,
     N_bins,
     bin_width,
     Npermodel,
@@ -56,6 +57,14 @@ def pick_positions_from_map(
     input_map: str
         Path to a hd5 file containing the file written by a DensityMap
 
+    bin_mode: str
+        The convention for generating bins of source density. The options
+        are "linear" (for linear binning) and "log" (for log binning). If "log",
+        the number of bins (N_bins) must be set. If "linear", either N_bins
+        or the bin width (bin_width), or neither (resulting in
+        default integer binning by sources/arcsec^2) can be set.
+        Default: "linear"
+
     N_bins: int
         The number of bins for the range of background density values.
         The bins will be picked on a linear grid, ranging from the
@@ -253,7 +262,7 @@ def pick_positions_from_map(
     print(Npermodel, " repeats of each model in each map bin")
 
     bdm = density_map.BinnedDensityMap.create(
-        input_map, N_bins=N_bins, bin_width=bin_width
+        input_map, bin_mode=bin_mode, N_bins=N_bins, bin_width=bin_width
     )
     tile_vals = bdm.tile_vals()
     max_val = np.amax(tile_vals)

diff --git a/beast/tools/cut_catalogs.py b/beast/tools/cut_catalogs.py
@@ -248,7 +248,9 @@ def convexhull_path(x_coord, y_coord):
     # commandline parser
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "input_phot_file", type=str, help="file name of the input photometry catalog",
+        "input_phot_file",
+        type=str,
+        help="file name of the input photometry catalog",
     )
     parser.add_argument(
         "output_phot_file",

diff --git a/beast/tools/density_map.py b/beast/tools/density_map.py
@@ -136,7 +136,7 @@ def __init__(self, tile_data, bins=None):
 
         self.bin_indices_used = np.sort(np.unique(bins))
 
-    def create(density_map, N_bins=None, bin_width=None):
+    def create(density_map, bin_mode="linear", N_bins=None, bin_width=None):
         """
         Creates a binned density map from a DensityMap file, or from an
         astropy table loaded from it. The tiles are grouped into
@@ -147,38 +147,82 @@ def create(density_map, N_bins=None, bin_width=None):
         # be file or table object)
         binned_density_map = DensityMap(density_map)
 
-        # Create the extra column here
-        if (N_bins is None) and (bin_width is None):
-            bins = np.array(range(len(binned_density_map.tile_data)))
-
-        elif N_bins is not None:
-            # Create the density bins
-            # [min, ., ., ., max]
-            tile_densities = binned_density_map.tile_data[input_column]
-            min_density = np.amin(tile_densities)
-            max_density = np.amax(tile_densities)
-            bin_edges = np.linspace(
-                min_density - 0.01 * abs(min_density),
-                max_density + 0.01 * abs(max_density),
-                N_bins + 1,
-            )
-
-            # Find which bin each tile belongs to
-            # e.g. one of these numbers: 0 [1, 2, 3, 4, 5] 6
-            # We have purposely chosen our bin boundaries so that no points fall
-            # outside (or on the edge) of the [1,5] range
-            bins = np.digitize(binned_density_map.tile_data[input_column], bin_edges)
-
-        elif bin_width is not None:
-            tile_densities = binned_density_map.tile_data[input_column]
-            min_density = np.amin(tile_densities)
-            max_density = np.amax(tile_densities)
-            tot_bins = np.ceil((max_density - min_density) / bin_width)
-            bin_edges = min_density + np.arange(tot_bins + 1) * bin_width
-            print("bin edges: ", bin_edges)
-
-            # Find which bin each tile belongs to
-            bins = np.digitize(binned_density_map.tile_data[input_column], bin_edges)
+        if bin_mode == "linear":
+
+            # Create the extra column here
+            if (N_bins is None) and (bin_width is None):
+                bins = np.array(range(len(binned_density_map.tile_data)))
+
+            if (N_bins is not None) and (bin_width is not None):
+                raise Exception(
+                    "Both sd_Nbins and sd_binwidth are set in the beast_settings file. Please set only one!"
+                )
+
+            if N_bins is not None:
+                # Create the density bins
+                # [min, ., ., ., max]
+                tile_densities = binned_density_map.tile_data[input_column]
+                min_density = np.amin(tile_densities)
+                max_density = np.amax(tile_densities)
+                bin_edges = np.linspace(
+                    min_density - 0.01 * abs(min_density),
+                    max_density + 0.01 * abs(max_density),
+                    N_bins + 1,
+                )
+
+                # Find which bin each tile belongs to
+                # e.g. one of these numbers: 0 [1, 2, 3, 4, 5] 6
+                # We have purposely chosen our bin boundaries so that no points fall
+                # outside (or on the edge) of the [1,5] range
+                bins = np.digitize(
+                    binned_density_map.tile_data[input_column], bin_edges
+                )
+
+            if bin_width is not None:
+                tile_densities = binned_density_map.tile_data[input_column]
+                min_density = np.amin(tile_densities)
+                max_density = np.amax(tile_densities)
+                tot_bins = np.ceil((max_density - min_density) / bin_width)
+                bin_edges = min_density + np.arange(tot_bins + 1) * bin_width
+                print("bin edges: ", bin_edges)
+
+                # Find which bin each tile belongs to
+                bins = np.digitize(
+                    binned_density_map.tile_data[input_column], bin_edges
+                )
+
+        if bin_mode == "log":
+            # Create the extra column here
+            if N_bins is None:
+                raise Exception(
+                    "Please make sure the number of source density bins (sd_Nbins) "
+                    "is properly set in the beast_settings file for log binning "
+                    "to proceed."
+                )
+
+            elif N_bins is not None:
+                # Create the density bins
+                # [min, ., ., ., max]
+                tile_densities = binned_density_map.tile_data[input_column]
+                # print(tile_densities)
+
+                min_density = np.amin(tile_densities[tile_densities > 0.0])
+                max_density = np.amax(tile_densities)
+
+                bin_edges = np.logspace(
+                    np.log10(min_density - 0.01 * abs(min_density)),
+                    np.log10(max_density + 0.01 * abs(max_density)),
+                    N_bins + 1,
+                )
+                print("bin edges: ", bin_edges)
+
+                # Find which bin each tile belongs to
+                # e.g. one of these numbers: 0 [1, 2, 3, 4, 5] 6
+                # We have purposely chosen our bin boundaries so that no points fall
+                # outside (or on the edge) of the [1,5] range
+                bins = np.digitize(
+                    binned_density_map.tile_data[input_column], bin_edges
+                )
 
         # Upgrade to this subclass, and return
         return BinnedDensityMap(binned_density_map.tile_data, bins)

diff --git a/beast/tools/run/make_ast_inputs.py b/beast/tools/run/make_ast_inputs.py
@@ -165,6 +165,7 @@ def make_ast_inputs(beast_settings_info, pick_method="flux_bin_method"):
                 obsdata,
                 chosen_seds,
                 settings.ast_density_table,
+                settings.sd_binmode,
                 settings.sd_Nbins,
                 settings.sd_binwidth,
                 settings.ast_realization_per_model,

diff --git a/beast/tools/split_catalog_using_map.py b/beast/tools/split_catalog_using_map.py
@@ -80,7 +80,10 @@ def split_main(
         )
 
     bdm = BinnedDensityMap.create(
-        mapfile, N_bins=settings.sd_Nbins, bin_width=settings.sd_binwidth
+        mapfile,
+        bin_mode=settings.sd_binmode,
+        N_bins=settings.sd_Nbins,
+        bin_width=settings.sd_binwidth,
     )
 
     print("Splitting catalog")

diff --git a/docs/workflow.rst b/docs/workflow.rst
@@ -164,8 +164,12 @@ How the sources are placed in the image is determined by the ast_source_density_
 variable in `beast_settings.txt`
 
 1. ast_source_density_table is set to `filebase_sourceden_map.hd5`:
-   For each source density or background bin, randomly place the SEDs
-   within pixels of that bin.  Repeat for each of the bins.
+   The source density or background image is split into bins, and for each bin,
+   all selected SEDs are randomly replicated within pixels of that bin. These bins
+   are determined by the beast_settings parameters, and can have linear (default)
+   or log spacing, where the user can determine the number or width of the bins
+   (set using sd_binmode, sd_binwidth and sd_Nbins in beast_settings). This same
+   binning scheme is used later to split the catalogs (next step).
 
 2. ast_source_density_table = None:
    Randomly choose a star from the photometry catalog, and place the
@@ -211,7 +215,10 @@ that don't have full imaging coverage, and to create ds9 region files:
 
 
 The observed catalog should be split into separate files for each source
-density.  In addition, each source density catalog can be split into a set of
+density bin. These bins are determined by the beast_settings parameters,
+and can have linear (default) or log spacing, where the user can determine
+the number or width of the bins (set using sd_binmode, sd_binwidth and sd_Nbins
+in beast_settings). In addition, each source density catalog can be split into a set of
 sub-files to have at most 'n_per_file' sources (or, if there are very few stars
 in a source density bin, at least 'min_n_subfile' sub-files).  The sources are
 sorted by the 'sort_col' flux before splitting to put sources with similar