-
Notifications
You must be signed in to change notification settings - Fork 77
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Consolidating AgentState metadata #814
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,10 +6,7 @@ | |
|
||
from typing import List, Dict, Optional, Any, TYPE_CHECKING | ||
from mephisto.abstractions.blueprint import AgentState | ||
import os | ||
import json | ||
import time | ||
import weakref | ||
import os.path | ||
|
||
if TYPE_CHECKING: | ||
from mephisto.data_model.agent import Agent | ||
|
@@ -31,30 +28,11 @@ def _get_empty_state(self) -> Dict[str, Optional[Dict[str, Any]]]: | |
return { | ||
"inputs": None, | ||
"outputs": None, | ||
"times": {"task_start": 0, "task_end": 0}, | ||
} | ||
|
||
def __init__(self, agent: "Agent"): | ||
""" | ||
Static agent states should store | ||
input dict -> output dict pairs to disc | ||
""" | ||
self.agent = weakref.proxy(agent) | ||
self.state: Dict[str, Optional[Dict[str, Any]]] = self._get_empty_state() | ||
self.load_data() | ||
|
||
def set_init_state(self, data: Any) -> bool: | ||
def _set_init_state(self, data: Any): | ||
"""Set the initial state for this agent""" | ||
if self.get_init_state() is not None: | ||
# Initial state is already set | ||
return False | ||
else: | ||
self.state["inputs"] = data | ||
times_dict = self.state["times"] | ||
assert isinstance(times_dict, dict) | ||
times_dict["task_start"] = time.time() | ||
self.save_data() | ||
return True | ||
self.state["inputs"] = data | ||
|
||
def get_init_state(self) -> Optional[Dict[str, Any]]: | ||
""" | ||
|
@@ -65,27 +43,29 @@ def get_init_state(self) -> Optional[Dict[str, Any]]: | |
return None | ||
return self.state["inputs"].copy() | ||
|
||
def load_data(self) -> None: | ||
def _load_data(self) -> None: | ||
"""Load data for this agent from disk""" | ||
data_dir = self.agent.get_data_dir() | ||
data_path = os.path.join(data_dir, DATA_FILE) | ||
if os.path.exists(data_path): | ||
with open(data_path, "r") as data_file: | ||
self.state = json.load(data_file) | ||
if self.agent.db.key_exists(data_path): | ||
self.state = self.agent.db.read_dict(data_path) | ||
# Old compatibility with saved times | ||
if "times" in self.state: | ||
assert isinstance(self.state["times"], dict) | ||
self.metadata.task_start = self.state["times"]["task_start"] | ||
self.metadata.task_end = self.state["times"]["task_end"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tbh I'm happy keeping the times within the "times" top level field, as library managed metadata... and the new metadata field for plugins and extensions to use. it might make some of the compatibility stuff here easier to manage though i see you've already handled it. I don't have a strong opinion on this though There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the cost of standardization here for these simple fields is overall worth it. I likely should have done this before. |
||
else: | ||
self.state = self._get_empty_state() | ||
|
||
def get_data(self) -> Dict[str, Any]: | ||
"""Return dict of this agent's state""" | ||
return self.state.copy() | ||
|
||
def save_data(self) -> None: | ||
def _save_data(self) -> None: | ||
"""Save static agent data to disk""" | ||
data_dir = self.agent.get_data_dir() | ||
os.makedirs(data_dir, exist_ok=True) | ||
out_filename = os.path.join(data_dir, DATA_FILE) | ||
with open(out_filename, "w+") as data_file: | ||
json.dump(self.state, data_file) | ||
self.agent.db.write_dict(out_filename, self.state) | ||
logger.info(f"SAVED_DATA_TO_DISC at {out_filename}") | ||
|
||
def update_data(self, live_update: Dict[str, Any]) -> None: | ||
|
@@ -94,7 +74,7 @@ def update_data(self, live_update: Dict[str, Any]) -> None: | |
""" | ||
raise Exception("Static tasks should only have final act, but got live update") | ||
|
||
def update_submit(self, submission_data: Dict[str, Any]) -> None: | ||
def _update_submit(self, submission_data: Dict[str, Any]) -> None: | ||
"""Move the submitted output to the local dict""" | ||
outputs: Dict[str, Any] | ||
assert isinstance(submission_data, dict), ( | ||
|
@@ -105,23 +85,3 @@ def update_submit(self, submission_data: Dict[str, Any]) -> None: | |
if output_files is not None: | ||
submission_data["files"] = [f["filename"] for f in submission_data["files"]] | ||
self.state["outputs"] = submission_data | ||
times_dict = self.state["times"] | ||
assert isinstance(times_dict, dict) | ||
times_dict["task_end"] = time.time() | ||
self.save_data() | ||
|
||
def get_task_start(self) -> Optional[float]: | ||
""" | ||
Extract out and return the start time recorded for this task. | ||
""" | ||
stored_times = self.state["times"] | ||
assert stored_times is not None | ||
return stored_times["task_start"] | ||
|
||
def get_task_end(self) -> Optional[float]: | ||
""" | ||
Extract out and return the end time recorded for this task. | ||
""" | ||
stored_times = self.state["times"] | ||
assert stored_times is not None | ||
return stored_times["task_end"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was imagining that adding the tips plugin would be as simple as doing
yarn add mephisto-worker-addons
and using it without having to modify the back-end code much. Would this require end users to update the python code to use different subclasses for AgentStateMetadata?Also what if they wanted to compose different plugins? For example, let's say in the future we introduce a plugin for gamification or leaderboards. It would be nice if adding those plugins just worked with the metadata storage, without having to define and compose the dataclasses.
Not sure if this is the right solution, but just wanted to articulate the use case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Valid concern, but I'm wary of projecting too far here. Additional plugins could piggyback on metadata by adding directly to this data class on functionality we feel is standard or general enough. Anything additional or in development should probably just store on an AgentState+Blueprint if the functionality is too complex to simply be in this metadata dataclass.