From 622c2846ec7e53e6f38817001f26f8e56332dd38 Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <nishanth.kumar20@gmail.com>
Date: Mon, 4 Nov 2024 14:38:31 -0500
Subject: [PATCH] try combo_burger now

---
 .../vlm_planning_prompts/few_shot.txt         |   2 +-
 .../vlm_planning_prompts/no_few_shot.txt      |   2 +-
 predicators/envs/burger.py                    |  15 +-
 scripts/configs/pred_invention_vlm.yaml       | 210 +++++++++---------
 4 files changed, 120 insertions(+), 109 deletions(-)
diff --git a/predicators/approaches/vlm_planning_prompts/few_shot.txt b/predicators/approaches/vlm_planning_prompts/few_shot.txt
index f15c94624..dff8a97a2 100644
--- a/predicators/approaches/vlm_planning_prompts/few_shot.txt
+++ b/predicators/approaches/vlm_planning_prompts/few_shot.txt
@@ -31,7 +31,7 @@ Please return a plan that achieves the provided goal from an initial state depic
 Please provide your output in the following format (excluding the angle brackets and ellipsis, which are just for illustration purposes).
 Be sure to include the parens '(' and ')', as well as square brackets '[' and ']' even if there are no objects/continuous parameters.
 Do not bold or italicize or otherwise apply any extra formaating to the plan text. Do not provide any numbers for steps in the plan, or
-any reasoning for each step below the 'Plan:' heading:
+any reasoning for each step below the 'Plan:' heading in your response. Follow the below formatting exactly:
 <Explanation of scene + your reasoning>
 Plan:
 <skill 1 name>(<obj1_name>:<obj1_type_name>, <obj2_name>:<obj2_type_name>, ...)[<continuous_param1_value>, <continuous_param2_value>, ...]
diff --git a/predicators/approaches/vlm_planning_prompts/no_few_shot.txt b/predicators/approaches/vlm_planning_prompts/no_few_shot.txt
index 41101ff34..915bb8fb5 100644
--- a/predicators/approaches/vlm_planning_prompts/no_few_shot.txt
+++ b/predicators/approaches/vlm_planning_prompts/no_few_shot.txt
@@ -21,7 +21,7 @@ Please return a plan that achieves the provided goal from an initial state depic
 Please provide your output in the following format (excluding the angle brackets and ellipsis, which are just for illustration purposes).
 Be sure to include the parens '(' and ')', as well as square brackets '[' and ']' even if there are no objects/continuous parameters.
 Do not bold or italicize or otherwise apply any extra formaating to the plan text. Do not provide any numbers for steps in the plan, or
-any reasoning for each step below the 'Plan:' heading:
+any reasoning for each step below the 'Plan:' heading in your response. Follow the below formatting exactly:
 <Explanation of scene + your reasoning>
 Plan:
 <skill 1 name>(<obj1_name>:<obj1_type_name>, <obj2_name>:<obj2_type_name>, ...)[<continuous_param1_value>, <continuous_param2_value>, ...]
diff --git a/predicators/envs/burger.py b/predicators/envs/burger.py
index 8d172605d..6112bad87 100644
--- a/predicators/envs/burger.py
+++ b/predicators/envs/burger.py
@@ -1272,17 +1272,28 @@ def create_tasks_for_type_one(
                 patty = d["patty1"]
                 tomato = d["lettuce1"]
                 top_bun = d["top_bun1"]
+                r, c = shuffled_spots[7]  # next empty cell
+                patty2 = Object("patty2", self._patty_type)
+                state_dict[patty2] = {"row": r, "col": c, "z": 0}
+                hidden_state[patty2] = {"is_cooked": 0.0, "is_held": 0.0}
+                r, c = shuffled_spots[8]  # next empty cell
+                lettuce2 = Object("lettuce2", self._tomato_type)
+                state_dict[lettuce2] = {"row": r, "col": c, "z": 0}
+                hidden_state[lettuce2] = {"is_sliced": 1.0, "is_held": 0.0}
                 test_goal = {
                     GroundAtom(self._IsCooked, [patty]),
                     GroundAtom(self._IsSliced, [tomato]),
                     GroundAtom(self._On, [patty, bottom_bun]),
                     GroundAtom(self._On, [tomato, patty]),
-                    GroundAtom(self._On, [top_bun, tomato])
+                    GroundAtom(self._On, [top_bun, tomato]),
+                    GroundAtom(self._IsCooked, [patty2]),
+                    GroundAtom(self._IsSliced, [lettuce2]),
                 }
                 alt_test_goal = {
                     GroundAtom(self._GoalHack2, [bottom_bun, patty]),
                     GroundAtom(self._GoalHack4, [patty, tomato]),
-                    GroundAtom(self._On, [top_bun, tomato])
+                    GroundAtom(self._On, [top_bun, tomato]),
+                    GroundAtom(self._GoalHack4, [patty2, lettuce2]),
                 }
                 test_task = create_task(state_dict, hidden_state, test_goal,
                                         alt_test_goal)
diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
index 8a2d5aabe..747d9106a 100644
--- a/scripts/configs/pred_invention_vlm.yaml
+++ b/scripts/configs/pred_invention_vlm.yaml
@@ -1,116 +1,116 @@
 # Experiments to test predicate invention with VLMs
 ---
 APPROACHES:
-  ours:
-    NAME: "grammar_search_invention"
-    FLAGS:
-      grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
-      offline_data_method: geo_and_demo_with_vlm_imgs
-      grammar_search_invent_geo_predicates_only: False
-  ours-vlm-subselection:
-    NAME: "grammar_search_invention"
-    FLAGS:
-      grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_specific
-      offline_data_method: geo_and_demo_with_vlm_imgs
-      grammar_search_invent_geo_predicates_only: True
-  ours-no-subselection:
-    NAME: "grammar_search_invention"
-    FLAGS:
-      grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
-      offline_data_method: geo_and_demo_with_vlm_imgs
-      grammar_search_pred_selection_approach: no_select
-      grammar_search_invent_geo_predicates_only: False
-  ours-no-invent:
-    NAME: "nsrt_learning"
-    FLAGS: {}
-  ours-no-visual:
-    NAME: "grammar_search_invention"
-    FLAGS: 
-      offline_data_method: demo
-  ours-no-geo:
-    NAME: "grammar_search_invention"
-    FLAGS:
-      grammar_search_vlm_atom_proposal_prompt_type: demo_with_vlm_imgs
-      grammar_search_invent_geo_predicates_only: False
-  interpret:
-    NAME: "grammar_search_invention"
-    FLAGS:
-      offline_data_method: demo_with_vlm_imgs
-      vlm_predicate_vision_api_generate_ground_atoms: True
+  # ours:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS:
+  #     grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
+  #     offline_data_method: geo_and_demo_with_vlm_imgs
+  #     grammar_search_invent_geo_predicates_only: False
+  # ours-vlm-subselection:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS:
+  #     grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_specific
+  #     offline_data_method: geo_and_demo_with_vlm_imgs
+  #     grammar_search_invent_geo_predicates_only: True
+  # ours-no-subselection:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS:
+  #     grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
+  #     offline_data_method: geo_and_demo_with_vlm_imgs
+  #     grammar_search_pred_selection_approach: no_select
+  #     grammar_search_invent_geo_predicates_only: False
+  # ours-no-invent:
+  #   NAME: "nsrt_learning"
+  #   FLAGS: {}
+  # ours-no-visual:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS: 
+  #     offline_data_method: demo
+  # ours-no-geo:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS:
+  #     grammar_search_vlm_atom_proposal_prompt_type: demo_with_vlm_imgs
+  #     grammar_search_invent_geo_predicates_only: False
+  # interpret:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS:
+  #     offline_data_method: demo_with_vlm_imgs
+  #     vlm_predicate_vision_api_generate_ground_atoms: True
   vila-with-fewshot:
     NAME: "vlm_open_loop"
     FLAGS:
       vlm_open_loop_use_training_demos: True
-  vila-pure:
-    NAME: "vlm_open_loop"
-    FLAGS:
-      vlm_open_loop_use_training_demos: False
+  # vila-pure:
+  #   NAME: "vlm_open_loop"
+  #   FLAGS:
+  #     vlm_open_loop_use_training_demos: False
 
 ENVS:
-  burger_no_move_more_stacks:
-    NAME: "burger_no_move"
-    FLAGS:
-      burger_no_move_task_type: "more_stacks"
-      bilevel_plan_without_sim: True
-      segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-      grammar_search_task_planning_timeout: 10.0
-      sesame_max_skeletons_optimized: 200
-      disable_harmlessness_check: True
-      sesame_task_planner: fdopt
-      excluded_predicates: all
-      option_model_terminate_on_repeat: False
-      grammar_search_vlm_atom_proposal_use_debug: False
-      allow_exclude_goal_predicates: True
-      grammar_search_prune_redundant_preds: True
-      grammar_search_predicate_cost_upper_bound: 13
-      allow_state_allclose_comparison_despite_simulator_state: True
-      grammar_search_max_predicates: 100
-      grammar_search_parallelize_vlm_labeling: True
-      grammar_search_use_handcoded_debug_grammar: False
-      grammar_search_select_all_debug: False
-      cluster_and_intersect_soft_intersection_for_preconditions: True
-      vlm_include_cropped_images: True
-      timeout: 80
-      grammar_search_grammar_includes_givens: False
-      cluster_and_intersect_prune_low_data_pnads: True
-      cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 12
-      precondition_soft_intersection_threshold_percent: 0.8
-      grammar_search_early_termination_heuristic_thresh: 2000
-      vlm_double_check_output: True
-  burger_no_move_fatter_burger:
-    NAME: "burger_no_move"
-    FLAGS:
-      burger_no_move_task_type: "fatter_burger"
-      bilevel_plan_without_sim: True
-      segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-      grammar_search_task_planning_timeout: 10.0
-      sesame_max_skeletons_optimized: 200
-      disable_harmlessness_check: True
-      sesame_task_planner: fdopt
-      excluded_predicates: all
-      option_model_terminate_on_repeat: False
-      grammar_search_vlm_atom_proposal_use_debug: False
-      allow_exclude_goal_predicates: True
-      grammar_search_prune_redundant_preds: True
-      grammar_search_predicate_cost_upper_bound: 13
-      allow_state_allclose_comparison_despite_simulator_state: True
-      grammar_search_max_predicates: 100
-      grammar_search_parallelize_vlm_labeling: True
-      grammar_search_use_handcoded_debug_grammar: False
-      grammar_search_select_all_debug: False
-      cluster_and_intersect_soft_intersection_for_preconditions: True
-      vlm_include_cropped_images: True
-      timeout: 80
-      grammar_search_grammar_includes_givens: False
-      cluster_and_intersect_prune_low_data_pnads: True
-      cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 12
-      precondition_soft_intersection_threshold_percent: 0.8
-      grammar_search_early_termination_heuristic_thresh: 2000
-      vlm_double_check_output: True
+  # burger_no_move_more_stacks:
+  #   NAME: "burger_no_move"
+  #   FLAGS:
+  #     burger_no_move_task_type: "more_stacks"
+  #     bilevel_plan_without_sim: True
+  #     segmenter: option_changes
+  #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+  #     grammar_search_task_planning_timeout: 10.0
+  #     sesame_max_skeletons_optimized: 200
+  #     disable_harmlessness_check: True
+  #     sesame_task_planner: fdopt
+  #     excluded_predicates: all
+  #     option_model_terminate_on_repeat: False
+  #     grammar_search_vlm_atom_proposal_use_debug: False
+  #     allow_exclude_goal_predicates: True
+  #     grammar_search_prune_redundant_preds: True
+  #     grammar_search_predicate_cost_upper_bound: 13
+  #     allow_state_allclose_comparison_despite_simulator_state: True
+  #     grammar_search_max_predicates: 100
+  #     grammar_search_parallelize_vlm_labeling: True
+  #     grammar_search_use_handcoded_debug_grammar: False
+  #     grammar_search_select_all_debug: False
+  #     cluster_and_intersect_soft_intersection_for_preconditions: True
+  #     vlm_include_cropped_images: True
+  #     timeout: 80
+  #     grammar_search_grammar_includes_givens: False
+  #     cluster_and_intersect_prune_low_data_pnads: True
+  #     cluster_and_intersect_min_datastore_fraction: 0.05
+  #     num_train_tasks: 12
+  #     precondition_soft_intersection_threshold_percent: 0.8
+  #     grammar_search_early_termination_heuristic_thresh: 2000
+  #     vlm_double_check_output: True
+  # burger_no_move_fatter_burger:
+  #   NAME: "burger_no_move"
+  #   FLAGS:
+  #     burger_no_move_task_type: "fatter_burger"
+  #     bilevel_plan_without_sim: True
+  #     segmenter: option_changes
+  #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+  #     grammar_search_task_planning_timeout: 10.0
+  #     sesame_max_skeletons_optimized: 200
+  #     disable_harmlessness_check: True
+  #     sesame_task_planner: fdopt
+  #     excluded_predicates: all
+  #     option_model_terminate_on_repeat: False
+  #     grammar_search_vlm_atom_proposal_use_debug: False
+  #     allow_exclude_goal_predicates: True
+  #     grammar_search_prune_redundant_preds: True
+  #     grammar_search_predicate_cost_upper_bound: 13
+  #     allow_state_allclose_comparison_despite_simulator_state: True
+  #     grammar_search_max_predicates: 100
+  #     grammar_search_parallelize_vlm_labeling: True
+  #     grammar_search_use_handcoded_debug_grammar: False
+  #     grammar_search_select_all_debug: False
+  #     cluster_and_intersect_soft_intersection_for_preconditions: True
+  #     vlm_include_cropped_images: True
+  #     timeout: 80
+  #     grammar_search_grammar_includes_givens: False
+  #     cluster_and_intersect_prune_low_data_pnads: True
+  #     cluster_and_intersect_min_datastore_fraction: 0.05
+  #     num_train_tasks: 12
+  #     precondition_soft_intersection_threshold_percent: 0.8
+  #     grammar_search_early_termination_heuristic_thresh: 2000
+  #     vlm_double_check_output: True
   burger_no_move_combo_burger:
     NAME: "burger_no_move"
     FLAGS:
@@ -139,7 +139,7 @@ ENVS:
       grammar_search_grammar_includes_givens: False
       cluster_and_intersect_prune_low_data_pnads: True
       cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 12
+      num_train_tasks: 9
       precondition_soft_intersection_threshold_percent: 0.8
       grammar_search_early_termination_heuristic_thresh: 2000
       vlm_double_check_output: True