diff --git a/src/GAMMA/README.md b/src/GAMMA/README.md
index 505ff12..8c15b6c 100644
--- a/src/GAMMA/README.md
+++ b/src/GAMMA/README.md
@@ -13,7 +13,7 @@ In the basic usage, Gamma will create a map space assuming full flexibility in t
 * l2_size: L2 size (Number of elements)
 * slevel_min: The minimum number of parallelism
 * slevel_max: The maximum number of parallelism. The number of parallelism will be in the range [slevel_min, slevel_max]
-* hwconfig: Read in HW configuration from file. An example of hwconfig can be found [here](data/HWconfigs/hw_config.m). An example of using it can be found [here](../run_gamma_with_hwconfig.sh)
+* hwconfig: Read in HW configuration from file. An example of hwconfig can be found [here](../../data/HWconfigs/hw_config.m). An example of using it can be found [here](../../run_gamma_with_hwconfig.sh)
 * epochs: Number of generation for the optimization
 * outdir: The output result directory
 
diff --git a/src/GAMMA/main.py b/src/GAMMA/main.py
index 9600ede..bd7f0ea 100644
--- a/src/GAMMA/main.py
+++ b/src/GAMMA/main.py
@@ -15,7 +15,7 @@
     parser.add_argument('--offchipBW', type=int, default=-1, help='Off-chip BW')
     parser.add_argument('--hwconfig', type=str, default=None, help='HW configuration file')
     parser.add_argument('--model', type=str, default="resnet18", help='Model to run')
-    parser.add_argument('--num_layer', type=int, default=2, help='Number of layers to optimize')
+    parser.add_argument('--num_layer', type=int, default=0, help='Number of layers to optimize')
     parser.add_argument('--singlelayer', type=int, default=0, help='The layer index to optimize')
     parser.add_argument('--slevel_min', type=int, default=2, help='Minimum number of parallelization level')
     parser.add_argument('--slevel_max', type=int, default=2, help='Maximum number of parallelization level')
diff --git a/src/GAMMA/train.py b/src/GAMMA/train.py
index 284496a..f03106c 100644
--- a/src/GAMMA/train.py
+++ b/src/GAMMA/train.py
@@ -64,6 +64,7 @@ def train_model(model_defs, input_arg, map_cstr=None, chkpt_file='./chkpt'):
                       l2_size=opt.l2_size, NocBW=opt.NocBW, offchipBW=opt.offchipBW, slevel_min=opt.slevel_min, slevel_max=opt.slevel_max,
                       fixedCluster=opt.fixedCluster, log_level=opt.log_level, map_cstr=map_cstr)
     constraints = {"area":opt.area_budget* 1e6}
+    chkpt_list = []
     for dimension in model_defs:
         env.reset_dimension(fitness=fitness, constraints=constraints, dimension=dimension)
         env.reset_hw_parm(num_pe=opt.num_pe, l1_size=opt.l1_size, l2_size=opt.l2_size, pe_limit=opt.pe_limit,area_pebuf_only=False, external_area_model=True)
@@ -87,12 +88,18 @@ def train_model(model_defs, input_arg, map_cstr=None, chkpt_file='./chkpt'):
             "L1_size": best_l1_size,
             "L2_size": best_l2_size
         }
-        columns = ["runtime", "area", "pe_area_ratio", "PE", "L1_size", "L2_size", "PE_area", "L1_area", "L2_area","best_sol"]
-        np_array = np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["best_sol"]}']).reshape(1, -1)
-        df = pd.DataFrame(np_array, columns=columns)
-        df.to_csv(chkpt_file[:-4]+".csv")
-        with open(chkpt_file, "wb") as fd:
-            pickle.dump(chkpt, fd)
+        chkpt_list.append(chkpt)
+    columns = ["runtime", "area", "pe_area_ratio", "PE", "L1_size", "L2_size", "PE_area", "L1_area", "L2_area","best_sol"]
+    np_array = None
+    for chkpt in chkpt_list:
+        if np_array is None:
+            np_array = np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["best_sol"]}']).reshape(1, -1)
+        else:
+            np_array = np.vstack([np_array, np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["best_sol"]}']).reshape(1, -1)])
+    df = pd.DataFrame(np_array, columns=columns)
+    df.to_csv(chkpt_file[:-4]+".csv")
+    with open(chkpt_file, "wb") as fd:
+        pickle.dump(chkpt_list, fd)
 
 def get_cstr_name(mapping_cstr):
     if mapping_cstr: