Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ollmer committed Jan 30, 2025
1 parent a280e6b commit eacbc30
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 9 deletions.
2 changes: 1 addition & 1 deletion conf/gaia_demo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ defaults:
- llm: gpt4o
- _self_

exp_name: gpt4o_demo2
exp_name: gpt4o_demo_hotel2

exp_path: outputs/gaia/runs/${exp_name}
split: validation
Expand Down
2 changes: 1 addition & 1 deletion conf/llm/gpt4o.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ model_name: chatgpt-4o-latest
use_cache: false
context_size: 128000
parameters:
temperature: 0.0
temperature: 0.2
2 changes: 1 addition & 1 deletion examples/gaia_agent/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
Check if the action lead to desired outcome or not. Then explain its effect on the task and the plan.
After that propose the best next step to do, according to the plan. Do not forget to mention the reasoning behind the next step.
If you see the cookie consent form, accept it first.
Quote the relevant part of the observation if possible.
Quote the relevant part of the observation if the action depends on it, for example when interacting with the page.
{FORMAT}"""

VERIFY = f"""
Expand Down
2 changes: 2 additions & 0 deletions examples/gaia_agent/scripts/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ def render_step(step: Step) -> str:
msg = f"No answer found:\n{step.overview}"
elif step.kind in ["python_code_action", "search_action", "watch_video_action"]:
msg = to_pretty_str(step.llm_dict())
elif step and step.kind == "scroll_action":
msg = "Scrolling..."
elif isinstance(step, Action):
msg = "Interacting with the browser..."
elif step and step.kind == "search_results_observation":
Expand Down
16 changes: 10 additions & 6 deletions tapeagents/tools/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,9 @@ class Browser(Multitool):
observations: tuple[type[Observation], ...] = (PageObservation,)
tab_actions: list[type[Action]] = [CloseTabAction, NewTabAction, TabFocusAction]
axtree: bool = True
viewport_size: int = 64000
viewport_chars: int = 64000
vieport_height: int = 768
viewport_width: int = 1024
timeout_ms: int = 30000
headless: bool = True
save_video: bool = False
Expand Down Expand Up @@ -226,7 +228,7 @@ def model_post_init(self, __context: Any):
record_video_dir=self._record_video_dir if self.save_video else None,
action_mapping=HighLevelActionSet(demo_mode="default").to_python_code,
timeout=self.timeout_ms,
viewport={"width": 1024, "height": 768},
viewport={"width": self.viewport_width, "height": self.vieport_height},
task_kwargs={"start_url": "about:blank"},
**self.gym_kwargs,
) # type: ignore
Expand Down Expand Up @@ -325,10 +327,12 @@ def format_error(self, err: str) -> str:
def scroll(self, direction: str) -> PageObservation:
if direction == "down" and self._current_viewport < self._n_viewports:
self._current_viewport += 1
self.run_browser_action(f"scroll(0, {self.vieport_height})")
elif direction == "up" and self._current_viewport > 1:
self._current_viewport -= 1
self.run_browser_action(f"scroll(0, -{self.vieport_height})")
page = self._current_page[
self.viewport_size * (self._current_viewport - 1) : self.viewport_size * self._current_viewport
self.viewport_chars * (self._current_viewport - 1) : self.viewport_chars * self._current_viewport
]
return PageObservation(text=page, current_page=self._current_viewport, total_pages=self._n_viewports)

Expand Down Expand Up @@ -394,10 +398,10 @@ def next_page(self) -> PageObservation:

def get_viewport(self, content: str) -> str:
self._current_page = content
self._n_viewports = len(self._current_page) // self.viewport_size + 1
self._n_viewports = len(self._current_page) // self.viewport_chars + 1
self._current_viewport = 1
return self._current_page[
self.viewport_size * (self._current_viewport - 1) : self.viewport_size * self._current_viewport
self.viewport_chars * (self._current_viewport - 1) : self.viewport_chars * self._current_viewport
]


Expand All @@ -415,7 +419,7 @@ def flatten_axtree(
coord_decimals: int = 0,
ignored_properties=IGNORED_AXTREE_PROPERTIES,
ignore_navigation: bool = False,
hide_bid_if_invisible: bool = False,
hide_bid_if_invisible: bool = True,
hide_all_children: bool = False,
nodes_with_bid: list[str] = NODES_WITH_BID,
) -> str:
Expand Down

0 comments on commit eacbc30

Please sign in to comment.