Minor changes to get full-experimental results in burger (#1722)

* should be ready for actual experiments! * ready for next run of parallel experiments * final set of experiments to run in parallel * fixes to prevent space from being eaten up * relaunch experiment that died due to space issues * try again on vila * try gemini * fatter_burger vila-fewshot * some changes to get interpret working * fix train tasks * fix changes * should be ready to go! * fixes
Learning-and-Intelligent-Systems · Oct 29, 2024 · 90971f6 · 90971f6
1 parent c28ad10
commit 90971f6
Show file tree

Hide file tree

Showing 5 changed files with 64 additions and 17 deletions.
diff --git a/predicators/datasets/generate_atom_trajs_with_vlm.py b/predicators/datasets/generate_atom_trajs_with_vlm.py
@@ -335,12 +335,16 @@ def _parse_unique_atom_proposals_from_list(
                 for og in other_groundings:
                     all_atom_groundings.add(og)
             logging.debug(f"Proposed atom: {atom} is valid: {atom_is_valid}")
+    logging.info("VISUAL PREDICATES PROPOSED")
+    for unique_pred in unique_predicates:
+        logging.info(unique_pred)
     logging.info(f"VLM proposed a total of {num_atoms_considered} atoms.")
     logging.info(f"Of these, {len(atoms_strs_set)} were valid and unique.")
     logging.info(
         f"For the {len(unique_predicates)} predicates, there were " \
         f"{len(all_atom_groundings)} unique groundings."
     )
+    logging.info("END VISUAL PREDICATES PROPOSALS")
     return all_atom_groundings
 
 
@@ -946,7 +950,8 @@ def _parse_predicate_proposals(
             exec(code_str, context)
             # pylint: enable=exec-used
             utils.abstract(tasks[0].init, [context[pred_name]])
-        except (TypeError, AttributeError, ValueError) as e:
+        except (TypeError, AttributeError, ValueError, IndentationError,
+                NameError) as e:
             # Was using Exception but pylint was complaining, so I'm
             # adding specific exceptions to this tuple as we encounter them.
             error_trace = traceback.format_exc()

diff --git a/predicators/main.py b/predicators/main.py
@@ -416,17 +416,19 @@ def _run_testing(env: BaseEnv, cogman: CogMan) -> Metrics:
                 total_low_level_action_cost += (
                     len(traj[1]) *
                     CFG.refinement_data_low_level_execution_cost)
-            # Save the successful trajectory, e.g., for playback on a robot.
-            traj_file = f"{save_prefix}__task{test_task_idx+1}.traj"
-            traj_file_path = Path(CFG.eval_trajectories_dir) / traj_file
-            # Include the original task too so we know the goal.
-            traj_data = {
-                "task": env_task,
-                "trajectory": traj,
-                "pybullet_robot": CFG.pybullet_robot
-            }
-            with open(traj_file_path, "wb") as f:
-                pkl.dump(traj_data, f)
+            if CFG.save_eval_trajs:
+                # Save the successful trajectory, e.g., for playback on a
+                # robot.
+                traj_file = f"{save_prefix}__task{test_task_idx+1}.traj"
+                traj_file_path = Path(CFG.eval_trajectories_dir) / traj_file
+                # Include the original task too so we know the goal.
+                traj_data = {
+                    "task": env_task,
+                    "trajectory": traj,
+                    "pybullet_robot": CFG.pybullet_robot
+                }
+                with open(traj_file_path, "wb") as f:
+                    pkl.dump(traj_data, f)
         except utils.EnvironmentFailure as e:
             log_message = f"Environment failed with error: {e}"
             caught_exception = True

diff --git a/predicators/settings.py b/predicators/settings.py
@@ -712,6 +712,8 @@ class GlobalSettings:
     # At test-time, we will use the below number of states
     # as part of labelling the current state's VLM atoms.
     vlm_test_time_atom_label_prompt_type = "per_scene_naive"
+    # Whether or not to save eval trajectories
+    save_eval_trajs = True
 
     @classmethod
     def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]:

diff --git a/predicators/utils.py b/predicators/utils.py
@@ -2489,11 +2489,14 @@ def query_vlm_for_atom_vals(
         return set()
     for i, (atom_query, curr_vlm_output_line) in enumerate(
             zip(atom_queries_list, all_vlm_responses)):
-        assert atom_query + ":" in curr_vlm_output_line
-        assert "." in curr_vlm_output_line
-        value = curr_vlm_output_line.split(': ')[-1].strip('.').lower()
-        if value == "true":
-            true_atoms.add(vlm_atoms[i])
+        try:
+            assert atom_query + ":" in curr_vlm_output_line
+            assert "." in curr_vlm_output_line
+            value = curr_vlm_output_line.split(': ')[-1].strip('.').lower()
+            if value == "true":
+                true_atoms.add(vlm_atoms[i])
+        except AssertionError:  # pragma: no cover
+            continue
     return true_atoms
 
 

diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
@@ -78,6 +78,7 @@ ENVS:
       num_train_tasks: 12
       precondition_soft_intersection_threshold_percent: 0.8
       grammar_search_early_termination_heuristic_thresh: 2000
+      vlm_double_check_output: True
   burger_no_move_fatter_burger:
     NAME: "burger_no_move"
     FLAGS:
@@ -109,12 +110,46 @@ ENVS:
       num_train_tasks: 12
       precondition_soft_intersection_threshold_percent: 0.8
       grammar_search_early_termination_heuristic_thresh: 2000
+      vlm_double_check_output: True
+  burger_no_move_combo_burger:
+    NAME: "burger_no_move"
+    FLAGS:
+      burger_no_move_task_type: "combo_burger"
+      bilevel_plan_without_sim: True
+      segmenter: option_changes
+      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+      grammar_search_task_planning_timeout: 10.0
+      sesame_max_skeletons_optimized: 200
+      disable_harmlessness_check: True
+      sesame_task_planner: fdopt
+      excluded_predicates: all
+      option_model_terminate_on_repeat: False
+      grammar_search_vlm_atom_proposal_use_debug: False
+      allow_exclude_goal_predicates: True
+      grammar_search_prune_redundant_preds: True
+      grammar_search_predicate_cost_upper_bound: 13
+      allow_state_allclose_comparison_despite_simulator_state: True
+      grammar_search_max_predicates: 100
+      grammar_search_parallelize_vlm_labeling: True
+      grammar_search_use_handcoded_debug_grammar: False
+      grammar_search_select_all_debug: False
+      cluster_and_intersect_soft_intersection_for_preconditions: True
+      vlm_include_cropped_images: True
+      timeout: 80
+      grammar_search_grammar_includes_givens: False
+      cluster_and_intersect_prune_low_data_pnads: True
+      cluster_and_intersect_min_datastore_fraction: 0.05
+      num_train_tasks: 12
+      precondition_soft_intersection_threshold_percent: 0.8
+      grammar_search_early_termination_heuristic_thresh: 2000
+      vlm_double_check_output: True
 
 
 ARGS: []
 FLAGS: 
   vlm_model_name: gpt-4o
   num_test_tasks: 10
+  save_eval_trajs: False
 START_SEED: 0
 NUM_SEEDS: 5
 ...