diff --git a/docs/architecture_overview.md b/docs/architecture_overview.md
index d03c933d3..66add2403 100644
--- a/docs/architecture_overview.md
+++ b/docs/architecture_overview.md
@@ -78,7 +78,7 @@ The classes are all expected to provide an `Args` class, which is a dataclass th
There's also the abstraction of the `MephistoDB`, which defines the data operations that Mephisto requires to operate properly. If a specific setup requires specialized data handling, any class implementing this interface could stand in for it.
## Blueprints
-The [blueprints](https://github.com/facebookresearch/Mephisto/tree/master/mephisto/server/blueprints) contain all of the related code required to set up a task run. Blueprints may follow a hierarchical structure, in that general functionality can be written into abstract blueprints (which are powerful but perhaps hard to configure) and then downstream blueprints may have more configuration control but less breadth. Much of this can actually require significant overhead, so we've created additional abstract classes that a blueprint must link to an implementation for. These are listed below:
+The [blueprints](https://github.com/facebookresearch/Mephisto/tree/master/mephisto/abstractions/blueprints) contain all of the related code required to set up a task run. Blueprints may follow a hierarchical structure, in that general functionality can be written into abstract blueprints (which are powerful but perhaps hard to configure) and then downstream blueprints may have more configuration control but less breadth. Much of this can actually require significant overhead, so we've created additional abstract classes that a blueprint must link to an implementation for. These are listed below:
### BlueprintArgs
These define the specific arguments for configuring a blueprint. Some base arguments relevant to all blueprints are provided already, and classes that override `BlueprintArgs` can add anything else they want.
### SharedTaskState
diff --git a/docs/hydra_migration.md b/docs/hydra_migration.md
index d44549f16..2cee70af3 100644
--- a/docs/hydra_migration.md
+++ b/docs/hydra_migration.md
@@ -9,10 +9,10 @@ This document shows the transition steps from moving from the old format to the
import os
import time
import shlex
-from mephisto.core.operator import Operator
-from mephisto.core.utils import get_root_dir
-from mephisto.server.blueprints.parlai_chat.parlai_chat_blueprint import BLUEPRINT_TYPE
-from mephisto.utils.scripts import MephistoRunScriptParser, str2bool
+from mephisto.operations.operator import Operator
+from mephisto.operations.utils import get_root_dir
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_blueprint import BLUEPRINT_TYPE
+from mephisto.tools.scripts import MephistoRunScriptParser, str2bool
parser = MephistoRunScriptParser()
parser.add_argument(
@@ -214,7 +214,7 @@ defaults = [
{"conf": "example"},
]
-from mephisto.core.hydra_config import RunScriptConfig, register_script_config
+from mephisto.operations.hydra_config import RunScriptConfig, register_script_config
@dataclass
class TestScriptConfig(RunScriptConfig):
@@ -321,10 +321,10 @@ operator.wait_for_runs_then_shutdown(skip_input=True, log_rate=30)
```python
import os
import shlex # shlex is no longer required, as we're not using arg strings
-from mephisto.core.operator import Operator
-from mephisto.core.utils import get_root_dir
-from mephisto.server.blueprints.parlai_chat.parlai_chat_blueprint import BLUEPRINT_TYPE
-from mephisto.utils.scripts import MephistoRunScriptParser, str2bool # RunScriptParser has been deprecated.
+from mephisto.operations.operator import Operator
+from mephisto.operations.utils import get_root_dir
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_blueprint import BLUEPRINT_TYPE
+from mephisto.tools.scripts import MephistoRunScriptParser, str2bool # RunScriptParser has been deprecated.
```
We remove unnecessary or deprecated imports.
@@ -334,9 +334,9 @@ We'll need to add a few things. First, `load_db_and_process_config` covers the o
Mephisto now defines run scripts and configurations using Hydra and dataclasses, as such you'll need some imports from `dataclasses`, `hydra`, and `omegaconf` (which is the configuration library that powers hydra).
```python
import os
-from mephisto.core.operator import Operator
-from mephisto.utils.scripts import load_db_and_process_config
-from mephisto.server.blueprints.parlai_chat.parlai_chat_blueprint import BLUEPRINT_TYPE, SharedParlAITaskState
+from mephisto.operations.operator import Operator
+from mephisto.tools.scripts import load_db_and_process_config
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_blueprint import BLUEPRINT_TYPE, SharedParlAITaskState
import hydra
from omegaconf import DictConfig
@@ -349,9 +349,9 @@ from typing import List, Any
```python
# parlai_test_script.py
import os
-from mephisto.core.operator import Operator
-from mephisto.utils.scripts import load_db_and_process_config
-from mephisto.server.blueprints.parlai_chat.parlai_chat_blueprint import BLUEPRINT_TYPE, SharedParlAITaskState
+from mephisto.operations.operator import Operator
+from mephisto.tools.scripts import load_db_and_process_config
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_blueprint import BLUEPRINT_TYPE, SharedParlAITaskState
import hydra
from omegaconf import DictConfig
@@ -368,7 +368,7 @@ defaults = [
{"conf": "example"},
]
-from mephisto.core.hydra_config import RunScriptConfig, register_script_config
+from mephisto.operations.hydra_config import RunScriptConfig, register_script_config
@dataclass
class TestScriptConfig(RunScriptConfig):
diff --git a/docs/quickstart.md b/docs/quickstart.md
index c6f45db56..df56882b2 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -103,7 +103,7 @@ $ cd examples/simple_static_task
$ python static_test_script.py mephisto/architect=heroku mephisto.provider.requester_name=my_mturk_user_sandbox
Locating heroku...
INFO - Creating a task run under task name: html-static-task-example
-[mephisto.core.operator][INFO] - Creating a task run under task name: html-static-task-example
+[mephisto.operations.operator][INFO] - Creating a task run under task name: html-static-task-example
Building server files...
...
diff --git a/examples/parlai_chat_task_demo/parlai_test_script.py b/examples/parlai_chat_task_demo/parlai_test_script.py
index 20be32d91..aa7d628ac 100644
--- a/examples/parlai_chat_task_demo/parlai_test_script.py
+++ b/examples/parlai_chat_task_demo/parlai_test_script.py
@@ -6,9 +6,9 @@
import os
-from mephisto.core.operator import Operator
-from mephisto.utils.scripts import load_db_and_process_config
-from mephisto.server.blueprints.parlai_chat.parlai_chat_blueprint import (
+from mephisto.operations.operator import Operator
+from mephisto.tools.scripts import load_db_and_process_config
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_blueprint import (
BLUEPRINT_TYPE,
SharedParlAITaskState,
)
@@ -28,7 +28,7 @@
{"conf": "example"},
]
-from mephisto.core.hydra_config import RunScriptConfig, register_script_config
+from mephisto.operations.hydra_config import RunScriptConfig, register_script_config
@dataclass
diff --git a/examples/simple_static_task/examine_results.py b/examples/simple_static_task/examine_results.py
index 1d8b00275..b85820184 100644
--- a/examples/simple_static_task/examine_results.py
+++ b/examples/simple_static_task/examine_results.py
@@ -4,8 +4,8 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.core.local_database import LocalMephistoDB
-from mephisto.core.data_browser import DataBrowser as MephistoDataBrowser
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
+from mephisto.tools.data_browser import DataBrowser as MephistoDataBrowser
from mephisto.data_model.worker import Worker
from mephisto.data_model.assignment import Unit
diff --git a/examples/simple_static_task/static_run_with_onboarding.py b/examples/simple_static_task/static_run_with_onboarding.py
index 1c43fbb77..664411956 100644
--- a/examples/simple_static_task/static_run_with_onboarding.py
+++ b/examples/simple_static_task/static_run_with_onboarding.py
@@ -5,11 +5,13 @@
# LICENSE file in the root directory of this source tree.
import os
-from mephisto.core.operator import Operator
-from mephisto.core.utils import get_root_dir
-from mephisto.utils.scripts import load_db_and_process_config
-from mephisto.server.blueprints.static_task.static_html_blueprint import BLUEPRINT_TYPE
-from mephisto.server.blueprints.abstract.static_task.static_blueprint import (
+from mephisto.operations.operator import Operator
+from mephisto.operations.utils import get_root_dir
+from mephisto.tools.scripts import load_db_and_process_config
+from mephisto.abstractions.blueprints.static_html_task.static_html_blueprint import (
+ BLUEPRINT_TYPE,
+)
+from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import (
SharedStaticTaskState,
)
@@ -28,7 +30,7 @@
{"conf": "onboarding_example"},
]
-from mephisto.core.hydra_config import RunScriptConfig, register_script_config
+from mephisto.operations.hydra_config import RunScriptConfig, register_script_config
@dataclass
diff --git a/examples/simple_static_task/static_test_script.py b/examples/simple_static_task/static_test_script.py
index 5a98517e6..ecbeb2bc5 100644
--- a/examples/simple_static_task/static_test_script.py
+++ b/examples/simple_static_task/static_test_script.py
@@ -5,10 +5,12 @@
# LICENSE file in the root directory of this source tree.
import os
-from mephisto.core.operator import Operator
-from mephisto.core.utils import get_root_dir
-from mephisto.server.blueprints.static_task.static_html_blueprint import BLUEPRINT_TYPE
-from mephisto.utils.scripts import load_db_and_process_config
+from mephisto.operations.operator import Operator
+from mephisto.operations.utils import get_root_dir
+from mephisto.abstractions.blueprints.static_html_task.static_html_blueprint import (
+ BLUEPRINT_TYPE,
+)
+from mephisto.tools.scripts import load_db_and_process_config
import hydra
from omegaconf import DictConfig
@@ -25,7 +27,7 @@
{"conf": "example"},
]
-from mephisto.core.hydra_config import RunScriptConfig, register_script_config
+from mephisto.operations.hydra_config import RunScriptConfig, register_script_config
@dataclass
diff --git a/examples/static_react_task/run_task.py b/examples/static_react_task/run_task.py
index 482729d1d..85e7b1704 100644
--- a/examples/static_react_task/run_task.py
+++ b/examples/static_react_task/run_task.py
@@ -7,13 +7,13 @@
import os
import shutil
import subprocess
-from mephisto.core.operator import Operator
-from mephisto.core.utils import get_root_dir
-from mephisto.utils.scripts import load_db_and_process_config
-from mephisto.server.blueprints.static_react_task.static_react_blueprint import (
+from mephisto.operations.operator import Operator
+from mephisto.operations.utils import get_root_dir
+from mephisto.tools.scripts import load_db_and_process_config
+from mephisto.abstractions.blueprints.static_react_task.static_react_blueprint import (
BLUEPRINT_TYPE,
)
-from mephisto.server.blueprints.abstract.static_task.static_blueprint import (
+from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import (
SharedStaticTaskState,
)
@@ -31,7 +31,7 @@
{"conf": "example"},
]
-from mephisto.core.hydra_config import RunScriptConfig, register_script_config
+from mephisto.operations.hydra_config import RunScriptConfig, register_script_config
@dataclass
diff --git a/mephisto/__init__.py b/mephisto/__init__.py
index 1a894d18e..24b1850bd 100644
--- a/mephisto/__init__.py
+++ b/mephisto/__init__.py
@@ -3,9 +3,9 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.core.registry import fill_registries
-from mephisto.core.config_handler import init_config
-from mephisto.core.hydra_config import initialize_named_configs
+from mephisto.operations.registry import fill_registries
+from mephisto.operations.config_handler import init_config
+from mephisto.operations.hydra_config import initialize_named_configs
__version__ = "0.1.0"
diff --git a/mephisto/providers/mturk/utils/__init__.py b/mephisto/abstractions/__init__.py
similarity index 100%
rename from mephisto/providers/mturk/utils/__init__.py
rename to mephisto/abstractions/__init__.py
diff --git a/mephisto/abstractions/architect.py b/mephisto/abstractions/architect.py
new file mode 100644
index 000000000..935e1edc2
--- /dev/null
+++ b/mephisto/abstractions/architect.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from omegaconf import MISSING, DictConfig
+from typing import Dict, List, Any, ClassVar, Type, TYPE_CHECKING, Callable
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.channel import Channel
+ from mephsito.data_model.packet import Packet
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.abstractions.blueprint import SharedTaskState
+ from argparse import _ArgumentGroup as ArgumentGroup
+
+
+@dataclass
+class ArchitectArgs:
+ """Base class for arguments to configure architects"""
+
+ _architect_type: str = MISSING
+
+
+class Architect(ABC):
+ """
+ Provides methods for setting up a server somewhere and deploying tasks
+ onto that server.
+ """
+
+ ArgsClass: ClassVar[Type[ArchitectArgs]] = ArchitectArgs
+ ARCHITECT_TYPE: str
+
+ def __init__(
+ self,
+ db: "MephistoDB",
+ args: DictConfig,
+ shared_state: "SharedTaskState",
+ task_run: "TaskRun",
+ build_dir_root: str,
+ ):
+ """
+ Initialize this architect with whatever options are provided given
+ ArgsClass. Parse whatever additional options may be required
+ for the specific task_run.
+
+ Also set up any required database/memory into the MephistoDB so that
+ this data can be stored long-term.
+ """
+ raise NotImplementedError()
+
+ @classmethod
+ def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
+ """
+ Assert that the provided arguments are valid. Should
+ fail if a task launched with these arguments would
+ not work.
+
+ This should include throwing an exception if the architect
+ needs login details or something similar given the
+ arguments passed in.
+ """
+ return
+
+ def get_channels(
+ self,
+ on_channel_open: Callable[[str], None],
+ on_catastrophic_disconnect: Callable[[str], None],
+ on_message: Callable[[str, "Packet"], None],
+ ) -> List["Channel"]:
+ """
+ Return a list of all relevant channels that the Supervisor will
+ need to register to in order to function
+ """
+ raise NotImplementedError()
+
+ def download_file(self, filename: str, save_dir: str) -> None:
+ """
+ Save the file that is noted as stored on the server to
+ the desired save location.
+ """
+ raise NotImplementedError()
+
+ def prepare(self) -> str:
+ """
+ Produce the server files that will be deployed to the server
+ """
+ raise NotImplementedError()
+
+ def deploy(self) -> str:
+ """
+ Launch the server, and push the task files to the server. Return
+ the server URL
+ """
+ raise NotImplementedError()
+
+ def cleanup(self) -> None:
+ """
+ Remove any files that were used for the deployment process that
+ no longer need to be kept track of now that the task has
+ been launched.
+ """
+ raise NotImplementedError()
+
+ def shutdown(self) -> None:
+ """
+ Shut down the server launched by this Surveyor, as stored
+ in the db.
+ """
+ raise NotImplementedError()
diff --git a/mephisto/server/architects/README.md b/mephisto/abstractions/architects/README.md
similarity index 100%
rename from mephisto/server/architects/README.md
rename to mephisto/abstractions/architects/README.md
diff --git a/test/providers/__init__.py b/mephisto/abstractions/architects/__init__.py
similarity index 100%
rename from test/providers/__init__.py
rename to mephisto/abstractions/architects/__init__.py
diff --git a/test/providers/mturk/__init__.py b/mephisto/abstractions/architects/channels/__init__.py
similarity index 100%
rename from test/providers/mturk/__init__.py
rename to mephisto/abstractions/architects/channels/__init__.py
diff --git a/mephisto/abstractions/architects/channels/websocket_channel.py b/mephisto/abstractions/architects/channels/websocket_channel.py
new file mode 100644
index 000000000..57a12b81a
--- /dev/null
+++ b/mephisto/abstractions/architects/channels/websocket_channel.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Callable, Optional
+from mephisto.data_model.packet import Packet
+from mephisto.abstractions.channel import Channel, STATUS_CHECK_TIME
+
+import errno
+import websocket
+import threading
+import json
+import time
+
+from mephisto.operations.logger_core import get_logger
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+
+class WebsocketChannel(Channel):
+ """
+ Channel for communicating with a server via websockets.
+ """
+
+ def __init__(
+ self,
+ channel_id: str,
+ on_channel_open: Callable[[str], None],
+ on_catastrophic_disconnect: Callable[[str], None],
+ on_message: Callable[[str, Packet], None],
+ socket_url: str,
+ ):
+ """
+ Create a channel by the given name, and initialize any resources that
+ will later be required during the `open` call.
+
+ Requires a socket_url to connect with.
+ """
+ super().__init__(
+ channel_id=channel_id,
+ on_channel_open=on_channel_open,
+ on_catastrophic_disconnect=on_catastrophic_disconnect,
+ on_message=on_message,
+ )
+ self.socket_url = socket_url
+ self.socket: Optional[websocket.WebSocketApp] = None
+ self.thread: Optional[threading.Thread] = None
+ self._is_alive = False
+ self._is_closed = False
+
+ def is_closed(self):
+ """
+ Return whether or not this connection has been explicitly closed
+ by the supervisor or another source.
+ """
+ return self._is_closed
+
+ def close(self):
+ """
+ Close this channel, and ensure that all threads and surrounding
+ resources are cleaned up
+ """
+ self._is_closed = True
+ try:
+ self.socket.close()
+ except Exception:
+ # socket already closed
+ pass
+ self._is_alive = False
+ if self.thread is not None:
+ self.thread.join()
+
+ def is_alive(self):
+ """Return if this channel is actively able to send/recieve messages."""
+ return self._is_alive
+
+ def open(self):
+ """Set up a socket handling thread."""
+
+ def on_socket_open(*args):
+ self._is_alive = True
+ self.on_channel_open(self.channel_id)
+ logger.info(f"channel open {args}")
+
+ def on_error(ws, error):
+ if hasattr(error, "errno"):
+ if error.errno == errno.ECONNREFUSED:
+ # TODO(CLEAN) replace with channel exception
+ raise Exception(
+ f"Socket {self.socket_url} refused connection, cancelling"
+ )
+ else:
+ logger.error(f"Socket logged error: {error}")
+ if isinstance(error, websocket._exceptions.WebSocketException):
+ return
+
+ import traceback
+
+ traceback.print_exc()
+ try:
+ # Close the socket to attempt to reconnect
+ self.socket.close()
+ self.socket.keep_running = False
+ except Exception:
+ # TODO(CLEAN) only catch socket closed connection
+ # Already closed
+ pass
+
+ def on_disconnect(*args):
+ """Disconnect event is a no-op for us, as the server reconnects
+ automatically on a retry.
+ """
+ # TODO(OWN) we need to set a timeout for reconnecting to the server,
+ # if it fails it's time to call on_catastrophic_disconnect
+ pass
+
+ def on_message(*args):
+ """Incoming message handler defers to the internal handler"""
+ try:
+ packet_dict = json.loads(args[1])
+ packet = Packet.from_dict(packet_dict)
+ self.on_message(self.channel_id, packet)
+ except Exception as e:
+ # TODO(CLEAN) properly handle only failed from_dict calls
+ logger.exception(repr(e), exc_info=True)
+ raise
+
+ def run_socket(*args):
+ while not self._is_closed:
+ try:
+ socket = websocket.WebSocketApp(
+ self.socket_url,
+ on_message=on_message,
+ on_error=on_error,
+ on_close=on_disconnect,
+ )
+ self.socket = socket
+ socket.on_open = on_socket_open
+ socket.run_forever(ping_interval=8 * STATUS_CHECK_TIME)
+ except Exception as e:
+ logger.exception(
+ f"Socket error {repr(e)}, attempting restart", exc_info=True
+ )
+ time.sleep(0.2)
+
+ # Start listening thread
+ self.thread = threading.Thread(
+ target=run_socket, name=f"socket-thread-{self.socket_url}"
+ )
+ self.thread.start()
+
+ def send(self, packet: "Packet") -> bool:
+ """
+ Send the packet given to the intended recipient.
+ Return True on success and False on failure.
+ """
+ if self.socket is None:
+ return False
+ try:
+ data = packet.to_sendable_dict()
+ self.socket.send(json.dumps(data))
+ except websocket.WebSocketConnectionClosedException:
+ # The channel died mid-send, wait for it to come back up
+ return False
+ except BrokenPipeError:
+ # The channel died mid-send, wait for it to come back up
+ return False
+ except Exception as e:
+ logger.exception(
+ f"Unexpected socket error occured: {repr(e)}", exc_info=True
+ )
+ return False
+ return True
diff --git a/mephisto/abstractions/architects/heroku_architect.py b/mephisto/abstractions/architects/heroku_architect.py
new file mode 100644
index 000000000..9b6dd2cc1
--- /dev/null
+++ b/mephisto/abstractions/architects/heroku_architect.py
@@ -0,0 +1,460 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import getpass
+import glob
+import hashlib
+import netrc
+import os
+import platform
+import sh
+import shlex
+import shutil
+import subprocess
+import sys
+import time
+import requests
+import re
+from dataclasses import dataclass, field
+from omegaconf import MISSING, DictConfig
+from mephisto.operations.utils import get_mephisto_tmp_dir
+from mephisto.abstractions.architect import Architect, ArchitectArgs
+from mephisto.abstractions.architects.router.build_router import build_router
+from mephisto.abstractions.architects.channels.websocket_channel import WebsocketChannel
+from mephisto.operations.registry import register_mephisto_abstraction
+from typing import Any, Tuple, List, Dict, Optional, TYPE_CHECKING, Callable
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.channel import Channel
+ from mephsito.data_model.packet import Packet
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.abstractions.blueprint import SharedTaskState
+ from argparse import _ArgumentGroup as ArgumentGroup
+
+from mephisto.operations.logger_core import get_logger
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+ARCHITECT_TYPE = "heroku"
+
+USER_NAME = getpass.getuser()
+HEROKU_SERVER_BUILD_DIRECTORY = "heroku_server"
+HEROKU_CLIENT_URL = (
+ "https://cli-assets.heroku.com/heroku-cli/channels/stable/heroku-cli"
+)
+
+HEROKU_WAIT_TIME = 3
+
+HEROKU_TMP_DIR = os.path.join(get_mephisto_tmp_dir(), "heroku")
+os.makedirs(HEROKU_TMP_DIR, exist_ok=True)
+
+
+@dataclass
+class HerokuArchitectArgs(ArchitectArgs):
+ """Additional arguments for configuring a heroku architect"""
+
+ _architect_type: str = ARCHITECT_TYPE
+ use_hobby: bool = field(
+ default=False, metadata={"help": "Launch on the Heroku Hobby tier"}
+ )
+ heroku_team: Optional[str] = field(
+ default=MISSING, metadata={"help": "Heroku team to use for this launch"}
+ )
+
+
+@register_mephisto_abstraction()
+class HerokuArchitect(Architect):
+ """
+ Sets up a server on heroku and deploys the task on that server
+ """
+
+ ArgsClass = HerokuArchitectArgs
+ ARCHITECT_TYPE = ARCHITECT_TYPE
+
+ def __init__(
+ self,
+ db: "MephistoDB",
+ args: DictConfig,
+ shared_state: "SharedTaskState",
+ task_run: "TaskRun",
+ build_dir_root: str,
+ ):
+ """
+ Ensure heroku credentials are setup, then prepare the necessary files
+ for launching for this task.
+
+ All necessary paths should be built in the init or stored in the database
+ such that a re-init on the same task run can pull the server information.
+
+ This means that we can shutdown a server that is still running after a
+ catastrophic failure.
+ """
+ # TODO(#102) put the expected info into the MephistoDB rather than storing here?
+ # Servers will have a status which needs to be kept track of.
+ self.args = args
+ self.task_run = task_run
+ self.deploy_name = f"{task_run.get_task().task_name}_{task_run.db_id}"
+ self.build_dir = build_dir_root
+
+ # Cache-able parameters
+ self.__heroku_app_name: Optional[str] = None
+ self.__heroku_executable_path: Optional[str] = None
+ self.__heroku_user_identifier: Optional[str] = None
+
+ def _get_socket_urls(self) -> List[str]:
+ """Returns the path to the heroku app socket"""
+ heroku_app_name = self.__get_app_name()
+ return ["wss://{}.herokuapp.com/".format(heroku_app_name)]
+
+ def get_channels(
+ self,
+ on_channel_open: Callable[[str], None],
+ on_catastrophic_disconnect: Callable[[str], None],
+ on_message: Callable[[str, "Packet"], None],
+ ) -> List["Channel"]:
+ """
+ Return a list of all relevant channels that the Supervisor will
+ need to register to in order to function
+ """
+ urls = self._get_socket_urls()
+ return [
+ WebsocketChannel(
+ f"heroku_channel_{self.deploy_name}_{idx}",
+ on_channel_open=on_channel_open,
+ on_catastrophic_disconnect=on_catastrophic_disconnect,
+ on_message=on_message,
+ socket_url=url,
+ )
+ for idx, url in enumerate(urls)
+ ]
+
+ def download_file(self, target_filename: str, save_dir: str) -> None:
+ """
+ Heroku architects need to download the file
+ """
+ heroku_app_name = self.__get_app_name()
+ target_url = (
+ f"https://{heroku_app_name}.herokuapp.com/download_file/{target_filename}"
+ )
+ dest_path = os.path.join(save_dir, target_filename)
+ r = requests.get(target_url, stream=True)
+
+ with open(dest_path, "wb") as out_file:
+ for chunk in r.iter_content(chunk_size=1024):
+ if chunk:
+ out_file.write(chunk)
+
+ @classmethod
+ def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
+ """
+ Assert that the provided arguments are valid. Should
+ fail if a task launched with these arguments would
+ not work.
+
+ This should include throwing an exception if the architect
+ needs login details or something similar given the
+ arguments passed in.
+ """
+ heroku_executable_path = HerokuArchitect.get_heroku_client_path()
+ try:
+ output = subprocess.check_output(
+ shlex.split(heroku_executable_path + " auth:whoami")
+ )
+ except subprocess.CalledProcessError:
+ raise Exception(
+ "A free Heroku account is required for launching tasks via "
+ "the HerokuArchitect. Please register at "
+ "https://signup.heroku.com/ and run `{} login` at the terminal "
+ "to login to Heroku before trying to use HerokuArchitect."
+ "".format(heroku_executable_path)
+ )
+ return
+
+ @staticmethod
+ def get_heroku_client_path() -> str:
+ """
+ Get the path to the heroku executable client, download a new one if it
+ doesnt exist.
+ """
+ print("Locating heroku...")
+ # Install Heroku CLI
+ os_name = None
+ bit_architecture = None
+
+ # Get the platform we are working on
+ if sys.platform == "darwin": # Mac OS X
+ os_name = "darwin"
+ elif sys.platform.startswith("linux"): # Linux
+ os_name = "linux"
+ else:
+ os_name = "windows"
+
+ # Find our architecture
+ bit_architecture_info = platform.architecture()[0]
+ if "64bit" in bit_architecture_info:
+ bit_architecture = "x64"
+ else:
+ bit_architecture = "x86"
+
+ # Find existing heroku files to use
+ existing_heroku_directory_names = glob.glob(
+ os.path.join(HEROKU_TMP_DIR, "heroku-cli-*")
+ )
+ if len(existing_heroku_directory_names) == 0:
+ print("Getting heroku")
+ if os.path.exists(os.path.join(HEROKU_TMP_DIR, "heroku.tar.gz")):
+ os.remove(os.path.join(HEROKU_TMP_DIR, "heroku.tar.gz"))
+
+ # Get the heroku client and unzip
+ tar_path = os.path.join(HEROKU_TMP_DIR, "heroku.tar.gz")
+ sh.wget(
+ shlex.split(
+ "{}-{}-{}.tar.gz -O {}".format(
+ HEROKU_CLIENT_URL, os_name, bit_architecture, tar_path
+ )
+ )
+ )
+ sh.tar(shlex.split(f"-xvzf {tar_path} -C {HEROKU_TMP_DIR}"))
+
+ # Clean up the tar
+ if os.path.exists(tar_path):
+ os.remove(tar_path)
+
+ heroku_directory_name = os.path.basename(
+ glob.glob(os.path.join(HEROKU_TMP_DIR, "heroku-cli-*"))[0]
+ )
+ heroku_directory_path = os.path.join(HEROKU_TMP_DIR, heroku_directory_name)
+ return os.path.join(heroku_directory_path, "bin", "heroku")
+
+ def __get_heroku_client(self) -> Tuple[str, str]:
+ """
+ Get an authorized heroku client path and authorization token
+ """
+ if (
+ self.__heroku_executable_path is None
+ or self.__heroku_user_identifier is None
+ ):
+ heroku_executable_path = HerokuArchitect.get_heroku_client_path()
+
+ # get heroku credentials
+ heroku_user_identifier = None
+ while not heroku_user_identifier:
+ try:
+ output = subprocess.check_output(
+ shlex.split(heroku_executable_path + " auth:whoami")
+ )
+ output = subprocess.check_output(
+ shlex.split(heroku_executable_path + " auth:token")
+ )
+ heroku_user_identifier = netrc.netrc(
+ os.path.join(os.path.expanduser("~"), ".netrc")
+ ).hosts["api.heroku.com"][0]
+ except subprocess.CalledProcessError:
+ print(
+ "A free Heroku account is required for launching MTurk tasks. "
+ "Please register at https://signup.heroku.com/ and run `{} "
+ "login` at the terminal to login to Heroku, and then run this "
+ "program again.".format(heroku_executable_path)
+ )
+ raise Exception("Please login to heroku before trying again.")
+ self.__heroku_executable_path = heroku_executable_path
+ self.__heroku_user_identifier = heroku_user_identifier
+ return self.__heroku_executable_path, self.__heroku_user_identifier
+
+ def __get_build_directory(self) -> str:
+ """
+ Return the string where the server should be built in.
+ """
+ return os.path.join(
+ self.build_dir,
+ "{}_{}".format(HEROKU_SERVER_BUILD_DIRECTORY, self.deploy_name),
+ )
+
+ def __get_app_name(self) -> str:
+ """
+ Get the name of the heroku app associated with this task
+ """
+ if self.__heroku_app_name is None:
+ _, heroku_user_identifier = self.__get_heroku_client()
+ heroku_app_name = (
+ "{}-{}-{}".format(
+ USER_NAME,
+ self.deploy_name,
+ hashlib.md5(heroku_user_identifier.encode("utf-8")).hexdigest(),
+ )
+ )[:30]
+ heroku_app_name = heroku_app_name.replace("_", "-")
+ while heroku_app_name[-1] == "-":
+ heroku_app_name = heroku_app_name[:-1]
+ self.__heroku_app_name = re.sub(r"[^a-zA-Z0-9-]", "", heroku_app_name)
+ return self.__heroku_app_name
+
+ def __compile_server(self) -> str:
+ """
+ Move the required task files to a specific directory to be deployed to
+ heroku directly. Return the location that the packaged files are
+ now prepared in.
+ """
+ print("Building server files...")
+ heroku_server_development_root = self.__get_build_directory()
+ os.makedirs(heroku_server_development_root)
+ heroku_server_development_path = build_router(
+ heroku_server_development_root, self.task_run
+ )
+ return heroku_server_development_path
+
+ def __setup_heroku_server(self) -> str:
+ """
+ Deploy the server using the setup server directory, return the URL
+ """
+
+ heroku_executable_path, heroku_user_identifier = self.__get_heroku_client()
+ server_dir = self.__get_build_directory()
+
+ print("Heroku: Starting server...")
+
+ heroku_server_directory_path = os.path.join(server_dir, "router")
+ sh.git(shlex.split(f"-C {heroku_server_directory_path} init"))
+
+ heroku_app_name = self.__get_app_name()
+
+ # Create or attach to the server
+ return_dir = os.getcwd()
+ os.chdir(heroku_server_directory_path)
+ try:
+ if self.args.architect.get("heroku_team", None) is not None:
+ subprocess.check_output(
+ shlex.split(
+ "{} create {} --team {}".format(
+ heroku_executable_path,
+ heroku_app_name,
+ self.args.architect.heroku_team,
+ )
+ )
+ )
+ else:
+ subprocess.check_output(
+ shlex.split(
+ "{} create {}".format(heroku_executable_path, heroku_app_name)
+ )
+ )
+ except subprocess.CalledProcessError as e: # User has too many apps?
+ # TODO(#93) check response codes to determine what actually happened
+ logger.exception(e, exc_info=True)
+ sh.rm(shlex.split("-rf {}".format(heroku_server_directory_path)))
+ raise Exception(
+ "You have hit your limit on concurrent apps with heroku, which are"
+ " required to run multiple concurrent tasks.\nPlease wait for some"
+ " of your existing tasks to complete. If you have no tasks "
+ "running, login to heroku and delete some of the running apps or "
+ "verify your account to allow more concurrent apps"
+ )
+
+ # Enable WebSockets
+ try:
+ subprocess.check_output(
+ shlex.split(
+ "{} features:enable http-session-affinity".format(
+ heroku_executable_path
+ )
+ )
+ )
+ except subprocess.CalledProcessError: # Already enabled WebSockets
+ pass
+ os.chdir(return_dir)
+
+ # commit and push to the heroku server
+ sh.git(shlex.split(f"-C {heroku_server_directory_path} add -A"))
+ sh.git(shlex.split(f'-C {heroku_server_directory_path} commit -m "app"'))
+ sh.git(shlex.split(f"-C {heroku_server_directory_path} push -f heroku master"))
+
+ os.chdir(heroku_server_directory_path)
+ subprocess.check_output(
+ shlex.split("{} ps:scale web=1".format(heroku_executable_path))
+ )
+
+ if self.args.architect.use_hobby is True:
+ try:
+ subprocess.check_output(
+ shlex.split("{} dyno:type Hobby".format(heroku_executable_path))
+ )
+ except subprocess.CalledProcessError: # User doesn't have hobby access
+ self.__delete_heroku_server()
+ sh.rm(shlex.split("-rf {}".format(heroku_server_directory_path)))
+ raise Exception(
+ "Server launched with hobby flag but account cannot create "
+ "hobby servers."
+ )
+ os.chdir(return_dir)
+
+ time.sleep(HEROKU_WAIT_TIME)
+
+ return "https://{}.herokuapp.com".format(heroku_app_name)
+
+ def __delete_heroku_server(self):
+ """
+ Remove the heroku server associated with this task run
+ """
+ heroku_executable_path, heroku_user_identifier = self.__get_heroku_client()
+ heroku_app_name = self.__get_app_name()
+ print("Heroku: Deleting server: {}".format(heroku_app_name))
+ subprocess.check_output(
+ shlex.split(
+ "{} destroy {} --confirm {}".format(
+ heroku_executable_path, heroku_app_name, heroku_app_name
+ )
+ )
+ )
+ time.sleep(HEROKU_WAIT_TIME)
+
+ def server_is_running(self) -> bool:
+ """
+ Utility function to check if the given heroku app (by app-name) is
+ still running
+ """
+ heroku_executable_path, _token = self.__get_heroku_client()
+ app_name = self.__get_app_name()
+ output = subprocess.check_output(shlex.split(heroku_executable_path + " apps"))
+ all_apps = str(output, "utf-8")
+ return app_name in all_apps
+
+ def build_is_clean(self) -> bool:
+ """
+ Utility function to see if the build has been cleaned up
+ """
+ server_dir = self.__get_build_directory()
+ return not os.path.exists(server_dir)
+
+ def prepare(self) -> str:
+ """
+ Produce the server files that will be deployed to the server
+ """
+ return self.__compile_server()
+
+ def deploy(self) -> str:
+ """
+ Launch the server, and push the task files to the server. Return
+ the server URL
+ """
+ return self.__setup_heroku_server()
+
+ def cleanup(self) -> None:
+ """
+ Remove any files that were used for the deployment process that
+ no longer need to be kept track of now that the task has
+ been launched.
+ """
+ server_dir = self.__get_build_directory()
+ sh.rm(shlex.split("-rf {}".format(server_dir)))
+
+ def shutdown(self) -> None:
+ """
+ Shut down the server launched by this Architect, as stored
+ in the db.
+ """
+ self.__delete_heroku_server()
diff --git a/mephisto/abstractions/architects/local_architect.py b/mephisto/abstractions/architects/local_architect.py
new file mode 100644
index 000000000..7bbde866b
--- /dev/null
+++ b/mephisto/abstractions/architects/local_architect.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import signal
+import subprocess
+import sh
+import shutil
+import shlex
+import time
+import requests
+
+from mephisto.abstractions.architect import Architect, ArchitectArgs
+from dataclasses import dataclass, field
+from mephisto.operations.registry import register_mephisto_abstraction
+from typing import Any, Optional, Dict, List, TYPE_CHECKING, Callable
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.channel import Channel
+ from mephsito.data_model.packet import Packet
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.database import MephistoDB
+ from argparse import _ArgumentGroup as ArgumentGroup
+ from omegaconf import DictConfig
+ from mephisto.abstractions.blueprint import SharedTaskState
+
+from mephisto.abstractions.architects.router.build_router import build_router
+from mephisto.abstractions.architects.channels.websocket_channel import WebsocketChannel
+from mephisto.operations.utils import get_mephisto_tmp_dir
+
+ARCHITECT_TYPE = "local"
+
+
+@dataclass
+class LocalArchitectArgs(ArchitectArgs):
+ """Additional arguments for configuring a local architect"""
+
+ _architect_type: str = ARCHITECT_TYPE
+ hostname: str = field(
+ default="localhost", metadata={"help": "Addressible location of the server"}
+ )
+ port: str = field(default="3000", metadata={"help": "Port to launch the server on"})
+
+
+@register_mephisto_abstraction()
+class LocalArchitect(Architect):
+ """
+ Provides methods for setting up a server locally and deploying tasks
+ onto that server.
+ """
+
+ ArgsClass = LocalArchitectArgs
+ ARCHITECT_TYPE = ARCHITECT_TYPE
+
+ def __init__(
+ self,
+ db: "MephistoDB",
+ args: "DictConfig",
+ shared_state: "SharedTaskState",
+ task_run: "TaskRun",
+ build_dir_root: str,
+ ):
+ """Create an architect for use in testing"""
+ self.task_run = task_run
+ self.build_dir = build_dir_root
+ self.task_run_id = task_run.db_id
+ # TODO(#102) move some of this into the db, server status
+ # needs to be in order to restart
+ self.server_process_pid: Optional[int] = None
+ self.server_process: Optional[subprocess.Popen] = None
+ self.server_dir: Optional[str] = None
+ self.running_dir: Optional[str] = None
+ self.hostname: Optional[str] = args.architect.hostname
+ self.port: Optional[str] = args.architect.port
+ self.cleanup_called = False
+
+ def _get_socket_urls(self) -> List[str]:
+ """Return the path to the local server socket"""
+ assert self.hostname is not None, "No hostname for socket"
+ assert self.port is not None, "No ports for socket"
+ if "https://" in self.hostname:
+ basename = self.hostname.split("https://")[1]
+ protocol = "wss"
+ elif "http://" in self.hostname:
+ basename = self.hostname.split("http://")[1]
+ protocol = "ws"
+ else:
+ basename = self.hostname
+ protocol = "ws"
+
+ if basename in ["localhost", "127.0.0.1"]:
+ protocol = "ws"
+
+ return [f"{protocol}://{basename}:{self.port}/"]
+
+ def get_channels(
+ self,
+ on_channel_open: Callable[[str], None],
+ on_catastrophic_disconnect: Callable[[str], None],
+ on_message: Callable[[str, "Packet"], None],
+ ) -> List["Channel"]:
+ """
+ Return a list of all relevant channels that the Supervisor will
+ need to register to in order to function
+ """
+ urls = self._get_socket_urls()
+ return [
+ WebsocketChannel(
+ f"local_channel_{self.task_run_id}_{idx}",
+ on_channel_open=on_channel_open,
+ on_catastrophic_disconnect=on_catastrophic_disconnect,
+ on_message=on_message,
+ socket_url=url,
+ )
+ for idx, url in enumerate(urls)
+ ]
+
+ def download_file(self, target_filename: str, save_dir: str) -> None:
+ """
+ Local architects can just move from the local directory
+ """
+ assert self.running_dir is not None, "cannot download a file if not running"
+ source_file = os.path.join("/tmp/", target_filename)
+ dest_path = os.path.join(save_dir, target_filename)
+ shutil.copy2(source_file, dest_path)
+
+ def prepare(self) -> str:
+ """Mark the preparation call"""
+ self.server_dir = build_router(self.build_dir, self.task_run)
+ return self.server_dir
+
+ def deploy(self) -> str:
+ """Deploy the server from a local folder for this task"""
+ assert self.server_dir is not None, "Deploy called before prepare"
+ self.running_dir = os.path.join(
+ get_mephisto_tmp_dir(), f"local_server_{self.task_run_id}", "server"
+ )
+
+ shutil.copytree(self.server_dir, self.running_dir)
+
+ return_dir = os.getcwd()
+ os.chdir(self.running_dir)
+ self.server_process = subprocess.Popen(
+ ["node", "server.js"],
+ preexec_fn=os.setpgrp,
+ env=dict(os.environ, PORT=f"{self.port}"),
+ )
+ self.server_process_pid = self.server_process.pid
+ os.chdir(return_dir)
+
+ time.sleep(1)
+ print("Server running locally with pid {}.".format(self.server_process_pid))
+ host = self.hostname
+ port = self.port
+ if host is None:
+ host = input(
+ "Please enter the public server address, like https://hostname.com: "
+ )
+ self.hostname = host
+ if port is None:
+ port = input("Please enter the port given above, likely 3000: ")
+ self.port = port
+ return "{}:{}".format(host, port)
+
+ def cleanup(self) -> None:
+ """Cleanup the built directory"""
+ assert self.server_dir is not None, "Cleanup called before prepare"
+ sh.rm(shlex.split("-rf " + self.server_dir))
+
+ def shutdown(self) -> None:
+ """Find the server process, shut it down, then remove the build directory"""
+ assert self.running_dir is not None, "shutdown called before deploy"
+ if self.server_process is None:
+ assert self.server_process_pid is not None, "No server id to kill"
+ os.kill(self.server_process_pid, signal.SIGTERM)
+ else:
+ self.server_process.terminate()
+ self.server_process.wait()
+ sh.rm(shlex.split("-rf " + self.running_dir))
diff --git a/mephisto/abstractions/architects/mock_architect.py b/mephisto/abstractions/architects/mock_architect.py
new file mode 100644
index 000000000..70eb07afe
--- /dev/null
+++ b/mephisto/abstractions/architects/mock_architect.py
@@ -0,0 +1,369 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import tornado
+from tornado.websocket import WebSocketHandler
+import os
+import threading
+import uuid
+import json
+import time
+
+from mephisto.abstractions.architect import Architect, ArchitectArgs
+from dataclasses import dataclass, field
+from mephisto.data_model.packet import (
+ PACKET_TYPE_ALIVE,
+ PACKET_TYPE_NEW_WORKER,
+ PACKET_TYPE_NEW_AGENT,
+ PACKET_TYPE_AGENT_ACTION,
+ PACKET_TYPE_SUBMIT_ONBOARDING,
+ PACKET_TYPE_REQUEST_AGENT_STATUS,
+ PACKET_TYPE_GET_INIT_DATA,
+)
+from mephisto.operations.registry import register_mephisto_abstraction
+from mephisto.abstractions.architects.channels.websocket_channel import WebsocketChannel
+from typing import List, Dict, Any, Optional, TYPE_CHECKING, Callable
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.channel import Channel
+ from mephsito.data_model.packet import Packet
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.database import MephistoDB
+ from argparse import _ArgumentGroup as ArgumentGroup
+ from omegaconf import DictConfig
+ from mephisto.abstractions.blueprint import SharedTaskState
+
+MOCK_DEPLOY_URL = "MOCK_DEPLOY_URL"
+ARCHITECT_TYPE = "mock"
+
+
+def get_rand_id():
+ return str(uuid.uuid4())
+
+
+@dataclass
+class MockArchitectArgs(ArchitectArgs):
+ """Additional arguments for configuring a mock architect"""
+
+ _architect_type: str = ARCHITECT_TYPE
+ should_run_server: bool = field(
+ default=False, metadata={"help": "Addressible location of the server"}
+ )
+ port: str = field(default="3000", metadata={"help": "Port to launch the server on"})
+
+
+class SocketHandler(WebSocketHandler):
+ def __init__(self, *args, **kwargs):
+ self.subs: Dict[int, "SocketHandler"] = kwargs.pop("subs")
+ self.app: "MockServer" = kwargs.pop("app")
+ self.sid = get_rand_id()
+ super().__init__(*args, **kwargs)
+
+ def open(self):
+ """
+ Opens a websocket and assigns a random UUID that is stored in the class-level
+ `subs` variable.
+ """
+ if self.sid not in self.subs.values():
+ self.subs[self.sid] = self
+
+ def on_close(self):
+ """
+ Runs when a socket is closed.
+ """
+ del self.subs[self.sid]
+
+ def on_message(self, message_text):
+ """
+ Callback that runs when a new message is received from a client See the
+ chat_service README for the resultant message structure.
+ Args:
+ message_text: A stringified JSON object with a text or attachment key.
+ `text` should contain a string message and `attachment` is a dict.
+ See `WebsocketAgent.put_data` for more information about the
+ attachment dict structure.
+ """
+ message = json.loads(message_text)
+ if message["packet_type"] == PACKET_TYPE_ALIVE:
+ self.app.last_alive_packet = message
+ elif message["packet_type"] == PACKET_TYPE_AGENT_ACTION:
+ self.app.actions_observed += 1
+ elif message["packet_type"] != PACKET_TYPE_REQUEST_AGENT_STATUS:
+ self.app.last_packet = message
+
+ def check_origin(self, origin):
+ return True
+
+
+class AliveHandler(tornado.web.RequestHandler):
+ """Simple handler for is_alive"""
+
+ def get(self, eids):
+ pass # Default behavior returns 200
+
+
+class MockServer(tornado.web.Application):
+ """
+ Tornado-based server that with hooks for sending specific
+ messages through socket connections and such
+ """
+
+ def __init__(self, port):
+ self.subs = {}
+ self.port = port
+ self.running_instance = None
+ self.last_alive_packet: Optional[Dict[str, Any]] = None
+ self.actions_observed = 0
+ self.last_packet: Optional[Dict[str, Any]] = None
+ tornado_settings = {
+ "autoescape": None,
+ "debug": "/dbg/" in __file__,
+ "compiled_template_cache": False,
+ "static_url_prefix": "/static/",
+ "debug": True,
+ }
+ handlers = [
+ ("/socket", SocketHandler, {"subs": self.subs, "app": self}),
+ ("/is_alive", AliveHandler, {}),
+ ]
+ super(MockServer, self).__init__(handlers, **tornado_settings)
+
+ def __server_thread_fn(self):
+ """
+ Main loop for the application
+ """
+ self.running_instance = tornado.ioloop.IOLoop()
+ http_server = tornado.httpserver.HTTPServer(self, max_buffer_size=1024 ** 3)
+ http_server.listen(self.port)
+ self.running_instance.start()
+ http_server.stop()
+
+ def _get_sub(self):
+ """Return the subscriber socket to write to"""
+ return list(self.subs.values())[0]
+
+ def _send_message(self, message):
+ """Send the given message back to the mephisto client"""
+ failed_attempts = 0
+ last_exception = None
+ while failed_attempts < 5:
+ try:
+ socket = self._get_sub()
+ message_json = json.dumps(message)
+ socket.write_message(message_json)
+ last_exception = None
+ break
+ except Exception as e:
+ last_exception = e
+ time.sleep(0.2)
+ failed_attempts += 1
+ finally:
+ time.sleep(0.1)
+ if last_exception is not None:
+ raise last_exception
+
+ def send_agent_act(self, agent_id, act_content):
+ """
+ Send a packet from the given agent with
+ the given content
+ """
+ self._send_message(
+ {
+ "packet_type": PACKET_TYPE_AGENT_ACTION,
+ "sender_id": agent_id,
+ "receiver_id": "Mephisto",
+ "data": act_content,
+ }
+ )
+
+ def request_init_data(self, agent_id):
+ """
+ Send a packet from the given agent with
+ the given content
+ """
+ self._send_message(
+ {
+ "packet_type": PACKET_TYPE_GET_INIT_DATA,
+ "sender_id": agent_id,
+ "receiver_id": "Mephisto",
+ "data": {
+ "request_id": agent_id + str(time.time()),
+ "provider_data": {
+ "agent_id": agent_id,
+ },
+ },
+ }
+ )
+
+ def register_mock_agent(self, worker_id, agent_details):
+ """
+ Send a packet asking to register a mock agent.
+ """
+ self._send_message(
+ {
+ "packet_type": PACKET_TYPE_NEW_AGENT,
+ "sender_id": "MockServer",
+ "receiver_id": "Mephisto",
+ "data": {
+ "request_id": agent_details,
+ "provider_data": {
+ "worker_id": worker_id,
+ "agent_registration_id": agent_details,
+ },
+ },
+ }
+ )
+
+ def register_mock_agent_after_onboarding(self, worker_id, agent_id, onboard_data):
+ """
+ Send a packet asking to register a mock agent.
+ """
+ onboard_data["request_id"] = "1234"
+ self._send_message(
+ {
+ "packet_type": PACKET_TYPE_SUBMIT_ONBOARDING,
+ "sender_id": agent_id,
+ "receiver_id": "Mephisto",
+ "data": onboard_data,
+ }
+ )
+
+ def register_mock_worker(self, worker_name):
+ """
+ send a packet asking to register a mock worker.
+ """
+ self._send_message(
+ {
+ "packet_type": PACKET_TYPE_NEW_WORKER,
+ "sender_id": "MockServer",
+ "receiver_id": "Mephisto",
+ "data": {
+ "request_id": worker_name,
+ "provider_data": {"worker_name": worker_name},
+ },
+ }
+ )
+
+ def disconnect_mock_agent(self, agent_id):
+ """
+ Mark a mock agent as disconnected.
+ """
+ # TODO(#97) implement when handling disconnections
+ pass
+
+ def launch_mock(self):
+ """
+ Start the primary loop for this application
+ """
+ self.__server_thread = threading.Thread(target=self.__server_thread_fn)
+ self.__server_thread.start()
+
+ def shutdown_mock(self):
+ """
+ Defined to shutown the tornado application.
+ """
+
+ def stop_and_free():
+ self.running_instance.stop()
+
+ self.running_instance.add_callback(stop_and_free)
+ self.__server_thread.join()
+
+
+@register_mephisto_abstraction()
+class MockArchitect(Architect):
+ """
+ The MockArchitect runs a mock server on the localhost so that
+ we can send special packets and assert connections have been made
+ """
+
+ ArgsClass = MockArchitectArgs
+ ARCHITECT_TYPE = ARCHITECT_TYPE
+
+ def __init__(
+ self,
+ db: "MephistoDB",
+ args: "DictConfig",
+ shared_state: "SharedTaskState",
+ task_run: "TaskRun",
+ build_dir_root: str,
+ ):
+ """Create an architect for use in testing"""
+ self.task_run = task_run
+ self.build_dir = build_dir_root
+ self.task_run_id = task_run.db_id
+ self.should_run_server = args.architect.should_run_server
+ self.port = args.architect.port
+ self.server: Optional["MockServer"] = None
+ # TODO(#97) track state in parent class?
+ self.prepared = False
+ self.deployed = False
+ self.cleaned = False
+ self.did_shutdown = False
+
+ def _get_socket_urls(self) -> List[str]:
+ """Return the path to the local server socket"""
+ assert self.port is not None, "No ports for socket"
+ return [f"ws://localhost:{self.port}/socket"]
+
+ def get_channels(
+ self,
+ on_channel_open: Callable[[str], None],
+ on_catastrophic_disconnect: Callable[[str], None],
+ on_message: Callable[[str, "Packet"], None],
+ ) -> List["Channel"]:
+ """
+ Return a list of all relevant channels that the Supervisor will
+ need to register to in order to function
+ """
+ urls = self._get_socket_urls()
+ return [
+ WebsocketChannel(
+ f"mock_channel_{self.task_run_id}_{idx}",
+ on_channel_open=on_channel_open,
+ on_catastrophic_disconnect=on_catastrophic_disconnect,
+ on_message=on_message,
+ socket_url=url,
+ )
+ for idx, url in enumerate(urls)
+ ]
+
+ def download_file(self, target_filename: str, save_dir: str) -> None:
+ """
+ Mock architects can just pretend to write a file
+ """
+ with open(os.path.join(save_dir, target_filename), "wb") as fp:
+ fp.write(b"mock\n")
+
+ def prepare(self) -> str:
+ """Mark the preparation call"""
+ self.prepared = True
+ built_dir = os.path.join(
+ self.build_dir, "mock_build_{}".format(self.task_run_id)
+ )
+ os.makedirs(built_dir)
+ return built_dir
+
+ def deploy(self) -> str:
+ """Mock a deploy or deploy a mock server, depending on settings"""
+ self.deployed = True
+ if not self.should_run_server:
+ return MOCK_DEPLOY_URL
+ else:
+ self.server = MockServer(self.port)
+ self.server.launch_mock()
+ return f"http://localhost:{self.port}/"
+
+ def cleanup(self) -> None:
+ """Mark the cleanup call"""
+ self.cleaned = True
+
+ def shutdown(self) -> None:
+ """Mark the shutdown call"""
+ self.did_shutdown = True
+ if self.should_run_server and self.server is not None:
+ self.server.shutdown_mock()
diff --git a/test/providers/mturk_sandbox/__init__.py b/mephisto/abstractions/architects/router/__init__.py
similarity index 100%
rename from test/providers/mturk_sandbox/__init__.py
rename to mephisto/abstractions/architects/router/__init__.py
diff --git a/mephisto/abstractions/architects/router/build_router.py b/mephisto/abstractions/architects/router/build_router.py
new file mode 100644
index 000000000..e4baaa0ed
--- /dev/null
+++ b/mephisto/abstractions/architects/router/build_router.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import mephisto.abstractions.architects.router as router_module
+import os
+import sh
+import shutil
+import shlex
+import subprocess
+import json
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.task_run import TaskRun
+
+ROUTER_ROOT_DIR = os.path.dirname(router_module.__file__)
+SERVER_SOURCE_ROOT = os.path.join(ROUTER_ROOT_DIR, "deploy")
+CROWD_SOURCE_PATH = "static/wrap_crowd_source.js"
+TASK_CONFIG_PATH = "static/task_config.json"
+
+
+def can_build(build_dir: str, task_run: "TaskRun") -> bool:
+ """Determine if the build dir is properly formatted for
+ being able to have the router built within. This is a
+ validation step that should be run before build_router.
+ """
+ # TODO(#97) incorporate this step into the blueprint
+ # task builder test, as once the task is built, it
+ # should be able to have the server build as well.
+ # TODO(#97) actually implement this when the full build
+ # process for the router is decided
+ return True
+
+
+def install_router_files() -> None:
+ """
+ Create a new build including the node_modules
+ """
+ return_dir = os.getcwd()
+ os.chdir(SERVER_SOURCE_ROOT)
+
+ packages_installed = subprocess.call(["npm", "install"])
+ if packages_installed != 0:
+ raise Exception(
+ "please make sure npm is installed, otherwise view "
+ "the above error for more info."
+ )
+ os.chdir(return_dir)
+
+
+def build_router(build_dir: str, task_run: "TaskRun") -> str:
+ """
+ Copy expected files from the router source into the build dir,
+ using existing files in the build dir as replacements for the
+ defaults if available
+ """
+ install_router_files()
+
+ server_source_directory_path = SERVER_SOURCE_ROOT
+ local_server_directory_path = os.path.join(build_dir, "router")
+
+ # Delete old server files
+ sh.rm(shlex.split("-rf " + local_server_directory_path))
+
+ # Copy over a clean copy into the server directory
+ shutil.copytree(server_source_directory_path, local_server_directory_path)
+
+ # Copy the required wrap crowd source path
+ local_crowd_source_path = os.path.join(
+ local_server_directory_path, CROWD_SOURCE_PATH
+ )
+ crowd_provider = task_run.get_provider()
+ shutil.copy2(crowd_provider.get_wrapper_js_path(), local_crowd_source_path)
+
+ # Copy the task_run's json configuration
+ local_task_config_path = os.path.join(local_server_directory_path, TASK_CONFIG_PATH)
+ blueprint = task_run.get_blueprint()
+ with open(local_task_config_path, "w+") as task_fp:
+ json.dump(blueprint.get_frontend_args(), task_fp)
+
+ # Consolidate task files as defined by the task
+ TaskBuilderClass = blueprint.TaskBuilderClass
+ task_builder = TaskBuilderClass(task_run, task_run.args)
+
+ task_builder.build_in_dir(local_server_directory_path)
+
+ return local_server_directory_path
diff --git a/mephisto/abstractions/architects/router/deploy/package-lock.json b/mephisto/abstractions/architects/router/deploy/package-lock.json
new file mode 100644
index 000000000..b549156cb
--- /dev/null
+++ b/mephisto/abstractions/architects/router/deploy/package-lock.json
@@ -0,0 +1,906 @@
+{
+ "name": "server",
+ "version": "1.0.0",
+ "lockfileVersion": 1,
+ "requires": true,
+ "dependencies": {
+ "accepts": {
+ "version": "1.3.7",
+ "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz",
+ "integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==",
+ "requires": {
+ "mime-types": "~2.1.24",
+ "negotiator": "0.6.2"
+ }
+ },
+ "ajv": {
+ "version": "5.5.2",
+ "resolved": "https://registry.npmjs.org/ajv/-/ajv-5.5.2.tgz",
+ "integrity": "sha1-c7Xuyj+rZT49P5Qis0GtQiBdyWU=",
+ "requires": {
+ "co": "^4.6.0",
+ "fast-deep-equal": "^1.0.0",
+ "fast-json-stable-stringify": "^2.0.0",
+ "json-schema-traverse": "^0.3.0"
+ }
+ },
+ "append-field": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz",
+ "integrity": "sha1-HjRA6RXwsSA9I3SOeO3XubW0PlY="
+ },
+ "array-flatten": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
+ "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI="
+ },
+ "asn1": {
+ "version": "0.2.4",
+ "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz",
+ "integrity": "sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==",
+ "requires": {
+ "safer-buffer": "~2.1.0"
+ }
+ },
+ "assert-plus": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz",
+ "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU="
+ },
+ "async-limiter": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.1.tgz",
+ "integrity": "sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ=="
+ },
+ "async-lock": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/async-lock/-/async-lock-1.0.0.tgz",
+ "integrity": "sha1-uBq729Km5RZ3OgRLfmkXriAB83A="
+ },
+ "asynckit": {
+ "version": "0.4.0",
+ "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
+ "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k="
+ },
+ "aws-sign2": {
+ "version": "0.7.0",
+ "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz",
+ "integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg="
+ },
+ "aws4": {
+ "version": "1.9.0",
+ "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.9.0.tgz",
+ "integrity": "sha512-Uvq6hVe90D0B2WEnUqtdgY1bATGz3mw33nH9Y+dmA+w5DHvUmBgkr5rM/KCHpCsiFNRUfokW/szpPPgMK2hm4A=="
+ },
+ "bcrypt-pbkdf": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz",
+ "integrity": "sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=",
+ "requires": {
+ "tweetnacl": "^0.14.3"
+ }
+ },
+ "body-parser": {
+ "version": "1.19.0",
+ "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz",
+ "integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==",
+ "requires": {
+ "bytes": "3.1.0",
+ "content-type": "~1.0.4",
+ "debug": "2.6.9",
+ "depd": "~1.1.2",
+ "http-errors": "1.7.2",
+ "iconv-lite": "0.4.24",
+ "on-finished": "~2.3.0",
+ "qs": "6.7.0",
+ "raw-body": "2.4.0",
+ "type-is": "~1.6.17"
+ }
+ },
+ "boom": {
+ "version": "4.3.1",
+ "resolved": "https://registry.npmjs.org/boom/-/boom-4.3.1.tgz",
+ "integrity": "sha1-T4owBctKfjiJ90kDD9JbluAdLjE=",
+ "requires": {
+ "hoek": "4.x.x"
+ }
+ },
+ "buffer-from": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz",
+ "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A=="
+ },
+ "busboy": {
+ "version": "0.2.14",
+ "resolved": "https://registry.npmjs.org/busboy/-/busboy-0.2.14.tgz",
+ "integrity": "sha1-bCpiLvz0fFe7vh4qnDetNseSVFM=",
+ "requires": {
+ "dicer": "0.2.5",
+ "readable-stream": "1.1.x"
+ }
+ },
+ "bytes": {
+ "version": "3.1.0",
+ "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz",
+ "integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg=="
+ },
+ "caseless": {
+ "version": "0.12.0",
+ "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
+ "integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw="
+ },
+ "co": {
+ "version": "4.6.0",
+ "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
+ "integrity": "sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ="
+ },
+ "combined-stream": {
+ "version": "1.0.8",
+ "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
+ "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+ "requires": {
+ "delayed-stream": "~1.0.0"
+ }
+ },
+ "concat-stream": {
+ "version": "1.6.2",
+ "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz",
+ "integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==",
+ "requires": {
+ "buffer-from": "^1.0.0",
+ "inherits": "^2.0.3",
+ "readable-stream": "^2.2.2",
+ "typedarray": "^0.0.6"
+ },
+ "dependencies": {
+ "isarray": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
+ "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE="
+ },
+ "readable-stream": {
+ "version": "2.3.7",
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.7.tgz",
+ "integrity": "sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==",
+ "requires": {
+ "core-util-is": "~1.0.0",
+ "inherits": "~2.0.3",
+ "isarray": "~1.0.0",
+ "process-nextick-args": "~2.0.0",
+ "safe-buffer": "~5.1.1",
+ "string_decoder": "~1.1.1",
+ "util-deprecate": "~1.0.1"
+ }
+ },
+ "string_decoder": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
+ "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
+ "requires": {
+ "safe-buffer": "~5.1.0"
+ }
+ }
+ }
+ },
+ "content-disposition": {
+ "version": "0.5.3",
+ "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz",
+ "integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==",
+ "requires": {
+ "safe-buffer": "5.1.2"
+ }
+ },
+ "content-type": {
+ "version": "1.0.4",
+ "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz",
+ "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA=="
+ },
+ "cookie": {
+ "version": "0.4.0",
+ "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz",
+ "integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg=="
+ },
+ "cookie-signature": {
+ "version": "1.0.6",
+ "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
+ "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw="
+ },
+ "core-util-is": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz",
+ "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac="
+ },
+ "cryptiles": {
+ "version": "3.1.4",
+ "resolved": "https://registry.npmjs.org/cryptiles/-/cryptiles-3.1.4.tgz",
+ "integrity": "sha512-8I1sgZHfVwcSOY6mSGpVU3lw/GSIZvusg8dD2+OGehCJpOhQRLNcH0qb9upQnOH4XhgxxFJSg6E2kx95deb1Tw==",
+ "requires": {
+ "boom": "5.x.x"
+ },
+ "dependencies": {
+ "boom": {
+ "version": "5.2.0",
+ "resolved": "https://registry.npmjs.org/boom/-/boom-5.2.0.tgz",
+ "integrity": "sha512-Z5BTk6ZRe4tXXQlkqftmsAUANpXmuwlsF5Oov8ThoMbQRzdGTA1ngYRW160GexgOgjsFOKJz0LYhoNi+2AMBUw==",
+ "requires": {
+ "hoek": "4.x.x"
+ }
+ }
+ }
+ },
+ "dashdash": {
+ "version": "1.14.1",
+ "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
+ "integrity": "sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=",
+ "requires": {
+ "assert-plus": "^1.0.0"
+ }
+ },
+ "debug": {
+ "version": "2.6.9",
+ "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
+ "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
+ "requires": {
+ "ms": "2.0.0"
+ }
+ },
+ "delayed-stream": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
+ "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk="
+ },
+ "depd": {
+ "version": "1.1.2",
+ "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz",
+ "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak="
+ },
+ "destroy": {
+ "version": "1.0.4",
+ "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz",
+ "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA="
+ },
+ "dicer": {
+ "version": "0.2.5",
+ "resolved": "https://registry.npmjs.org/dicer/-/dicer-0.2.5.tgz",
+ "integrity": "sha1-WZbAhrszIYyBLAkL3cCc0S+stw8=",
+ "requires": {
+ "readable-stream": "1.1.x",
+ "streamsearch": "0.1.2"
+ }
+ },
+ "ecc-jsbn": {
+ "version": "0.1.2",
+ "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz",
+ "integrity": "sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=",
+ "requires": {
+ "jsbn": "~0.1.0",
+ "safer-buffer": "^2.1.0"
+ }
+ },
+ "ee-first": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
+ "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0="
+ },
+ "encodeurl": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
+ "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k="
+ },
+ "escape-html": {
+ "version": "1.0.3",
+ "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
+ "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg="
+ },
+ "etag": {
+ "version": "1.8.1",
+ "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
+ "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc="
+ },
+ "express": {
+ "version": "4.17.1",
+ "resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz",
+ "integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==",
+ "requires": {
+ "accepts": "~1.3.7",
+ "array-flatten": "1.1.1",
+ "body-parser": "1.19.0",
+ "content-disposition": "0.5.3",
+ "content-type": "~1.0.4",
+ "cookie": "0.4.0",
+ "cookie-signature": "1.0.6",
+ "debug": "2.6.9",
+ "depd": "~1.1.2",
+ "encodeurl": "~1.0.2",
+ "escape-html": "~1.0.3",
+ "etag": "~1.8.1",
+ "finalhandler": "~1.1.2",
+ "fresh": "0.5.2",
+ "merge-descriptors": "1.0.1",
+ "methods": "~1.1.2",
+ "on-finished": "~2.3.0",
+ "parseurl": "~1.3.3",
+ "path-to-regexp": "0.1.7",
+ "proxy-addr": "~2.0.5",
+ "qs": "6.7.0",
+ "range-parser": "~1.2.1",
+ "safe-buffer": "5.1.2",
+ "send": "0.17.1",
+ "serve-static": "1.14.1",
+ "setprototypeof": "1.1.1",
+ "statuses": "~1.5.0",
+ "type-is": "~1.6.18",
+ "utils-merge": "1.0.1",
+ "vary": "~1.1.2"
+ }
+ },
+ "extend": {
+ "version": "3.0.2",
+ "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
+ "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="
+ },
+ "extsprintf": {
+ "version": "1.3.0",
+ "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz",
+ "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU="
+ },
+ "fast-deep-equal": {
+ "version": "1.1.0",
+ "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-1.1.0.tgz",
+ "integrity": "sha1-wFNHeBfIa1HaqFPIHgWbcz0CNhQ="
+ },
+ "fast-json-stable-stringify": {
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz",
+ "integrity": "sha1-1RQsDK7msRifh9OnYREGT4bIu/I="
+ },
+ "finalhandler": {
+ "version": "1.1.2",
+ "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz",
+ "integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==",
+ "requires": {
+ "debug": "2.6.9",
+ "encodeurl": "~1.0.2",
+ "escape-html": "~1.0.3",
+ "on-finished": "~2.3.0",
+ "parseurl": "~1.3.3",
+ "statuses": "~1.5.0",
+ "unpipe": "~1.0.0"
+ }
+ },
+ "forever-agent": {
+ "version": "0.6.1",
+ "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz",
+ "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE="
+ },
+ "form-data": {
+ "version": "2.3.3",
+ "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz",
+ "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==",
+ "requires": {
+ "asynckit": "^0.4.0",
+ "combined-stream": "^1.0.6",
+ "mime-types": "^2.1.12"
+ }
+ },
+ "forwarded": {
+ "version": "0.1.2",
+ "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz",
+ "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ="
+ },
+ "fresh": {
+ "version": "0.5.2",
+ "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
+ "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac="
+ },
+ "getpass": {
+ "version": "0.1.7",
+ "resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz",
+ "integrity": "sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=",
+ "requires": {
+ "assert-plus": "^1.0.0"
+ }
+ },
+ "har-schema": {
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz",
+ "integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI="
+ },
+ "har-validator": {
+ "version": "5.0.3",
+ "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.0.3.tgz",
+ "integrity": "sha1-ukAsJmGU8VlW7xXg/PJCmT9qff0=",
+ "requires": {
+ "ajv": "^5.1.0",
+ "har-schema": "^2.0.0"
+ }
+ },
+ "hawk": {
+ "version": "6.0.2",
+ "resolved": "https://registry.npmjs.org/hawk/-/hawk-6.0.2.tgz",
+ "integrity": "sha512-miowhl2+U7Qle4vdLqDdPt9m09K6yZhkLDTWGoUiUzrQCn+mHHSmfJgAyGaLRZbPmTqfFFjRV1QWCW0VWUJBbQ==",
+ "requires": {
+ "boom": "4.x.x",
+ "cryptiles": "3.x.x",
+ "hoek": "4.x.x",
+ "sntp": "2.x.x"
+ }
+ },
+ "hoek": {
+ "version": "4.2.1",
+ "resolved": "https://registry.npmjs.org/hoek/-/hoek-4.2.1.tgz",
+ "integrity": "sha512-QLg82fGkfnJ/4iy1xZ81/9SIJiq1NGFUMGs6ParyjBZr6jW2Ufj/snDqTHixNlHdPNwN2RLVD0Pi3igeK9+JfA=="
+ },
+ "http-errors": {
+ "version": "1.7.2",
+ "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz",
+ "integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==",
+ "requires": {
+ "depd": "~1.1.2",
+ "inherits": "2.0.3",
+ "setprototypeof": "1.1.1",
+ "statuses": ">= 1.5.0 < 2",
+ "toidentifier": "1.0.0"
+ }
+ },
+ "http-signature": {
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz",
+ "integrity": "sha1-muzZJRFHcvPZW2WmCruPfBj7rOE=",
+ "requires": {
+ "assert-plus": "^1.0.0",
+ "jsprim": "^1.2.2",
+ "sshpk": "^1.7.0"
+ }
+ },
+ "iconv-lite": {
+ "version": "0.4.24",
+ "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
+ "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
+ "requires": {
+ "safer-buffer": ">= 2.1.2 < 3"
+ }
+ },
+ "inherits": {
+ "version": "2.0.3",
+ "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz",
+ "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4="
+ },
+ "ipaddr.js": {
+ "version": "1.9.1",
+ "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
+ "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="
+ },
+ "is-typedarray": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz",
+ "integrity": "sha1-5HnICFjfDBsR3dppQPlgEfzaSpo="
+ },
+ "isarray": {
+ "version": "0.0.1",
+ "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz",
+ "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8="
+ },
+ "isstream": {
+ "version": "0.1.2",
+ "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz",
+ "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo="
+ },
+ "jsbn": {
+ "version": "0.1.1",
+ "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
+ "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM="
+ },
+ "json-schema": {
+ "version": "0.2.3",
+ "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz",
+ "integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM="
+ },
+ "json-schema-traverse": {
+ "version": "0.3.1",
+ "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.3.1.tgz",
+ "integrity": "sha1-NJptRMU6Ud6JtAgFxdXlm0F9M0A="
+ },
+ "json-stringify-safe": {
+ "version": "5.0.1",
+ "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
+ "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus="
+ },
+ "jsprim": {
+ "version": "1.4.1",
+ "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz",
+ "integrity": "sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=",
+ "requires": {
+ "assert-plus": "1.0.0",
+ "extsprintf": "1.3.0",
+ "json-schema": "0.2.3",
+ "verror": "1.10.0"
+ }
+ },
+ "media-typer": {
+ "version": "0.3.0",
+ "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
+ "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g="
+ },
+ "merge-descriptors": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
+ "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E="
+ },
+ "methods": {
+ "version": "1.1.2",
+ "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
+ "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4="
+ },
+ "mime": {
+ "version": "1.6.0",
+ "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
+ "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="
+ },
+ "mime-db": {
+ "version": "1.42.0",
+ "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.42.0.tgz",
+ "integrity": "sha512-UbfJCR4UAVRNgMpfImz05smAXK7+c+ZntjaA26ANtkXLlOe947Aag5zdIcKQULAiF9Cq4WxBi9jUs5zkA84bYQ=="
+ },
+ "mime-types": {
+ "version": "2.1.25",
+ "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.25.tgz",
+ "integrity": "sha512-5KhStqB5xpTAeGqKBAMgwaYMnQik7teQN4IAzC7npDv6kzeU6prfkR67bc87J1kWMPGkoaZSq1npmexMgkmEVg==",
+ "requires": {
+ "mime-db": "1.42.0"
+ }
+ },
+ "mkdirp": {
+ "version": "0.5.5",
+ "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.5.tgz",
+ "integrity": "sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==",
+ "requires": {
+ "minimist": "^1.2.5"
+ },
+ "dependencies": {
+ "minimist": {
+ "version": "1.2.5",
+ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
+ "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw=="
+ }
+ }
+ },
+ "ms": {
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
+ "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
+ },
+ "multer": {
+ "version": "1.4.2",
+ "resolved": "https://registry.npmjs.org/multer/-/multer-1.4.2.tgz",
+ "integrity": "sha512-xY8pX7V+ybyUpbYMxtjM9KAiD9ixtg5/JkeKUTD6xilfDv0vzzOFcCp4Ljb1UU3tSOM3VTZtKo63OmzOrGi3Cg==",
+ "requires": {
+ "append-field": "^1.0.0",
+ "busboy": "^0.2.11",
+ "concat-stream": "^1.5.2",
+ "mkdirp": "^0.5.1",
+ "object-assign": "^4.1.1",
+ "on-finished": "^2.3.0",
+ "type-is": "^1.6.4",
+ "xtend": "^4.0.0"
+ }
+ },
+ "negotiator": {
+ "version": "0.6.2",
+ "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz",
+ "integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw=="
+ },
+ "oauth-sign": {
+ "version": "0.8.2",
+ "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.8.2.tgz",
+ "integrity": "sha1-Rqarfwrq2N6unsBWV4C31O/rnUM="
+ },
+ "object-assign": {
+ "version": "4.1.1",
+ "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
+ "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM="
+ },
+ "on-finished": {
+ "version": "2.3.0",
+ "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz",
+ "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=",
+ "requires": {
+ "ee-first": "1.1.1"
+ }
+ },
+ "parseurl": {
+ "version": "1.3.3",
+ "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
+ "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="
+ },
+ "path-to-regexp": {
+ "version": "0.1.7",
+ "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
+ "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w="
+ },
+ "performance-now": {
+ "version": "2.1.0",
+ "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz",
+ "integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns="
+ },
+ "process-nextick-args": {
+ "version": "2.0.1",
+ "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
+ "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag=="
+ },
+ "proxy-addr": {
+ "version": "2.0.6",
+ "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.6.tgz",
+ "integrity": "sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==",
+ "requires": {
+ "forwarded": "~0.1.2",
+ "ipaddr.js": "1.9.1"
+ }
+ },
+ "punycode": {
+ "version": "1.4.1",
+ "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz",
+ "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4="
+ },
+ "qs": {
+ "version": "6.7.0",
+ "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz",
+ "integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ=="
+ },
+ "range-parser": {
+ "version": "1.2.1",
+ "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
+ "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg=="
+ },
+ "raw-body": {
+ "version": "2.4.0",
+ "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz",
+ "integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==",
+ "requires": {
+ "bytes": "3.1.0",
+ "http-errors": "1.7.2",
+ "iconv-lite": "0.4.24",
+ "unpipe": "1.0.0"
+ }
+ },
+ "readable-stream": {
+ "version": "1.1.14",
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz",
+ "integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=",
+ "requires": {
+ "core-util-is": "~1.0.0",
+ "inherits": "~2.0.1",
+ "isarray": "0.0.1",
+ "string_decoder": "~0.10.x"
+ }
+ },
+ "request": {
+ "version": "2.82.0",
+ "resolved": "https://registry.npmjs.org/request/-/request-2.82.0.tgz",
+ "integrity": "sha512-/QWqfmyTfQ4OYs6EhB1h2wQsX9ZxbuNePCvCm0Mdz/mxw73mjdg0D4QdIl0TQBFs35CZmMXLjk0iCGK395CUDg==",
+ "requires": {
+ "aws-sign2": "~0.7.0",
+ "aws4": "^1.6.0",
+ "caseless": "~0.12.0",
+ "combined-stream": "~1.0.5",
+ "extend": "~3.0.1",
+ "forever-agent": "~0.6.1",
+ "form-data": "~2.3.1",
+ "har-validator": "~5.0.3",
+ "hawk": "~6.0.2",
+ "http-signature": "~1.2.0",
+ "is-typedarray": "~1.0.0",
+ "isstream": "~0.1.2",
+ "json-stringify-safe": "~5.0.1",
+ "mime-types": "~2.1.17",
+ "oauth-sign": "~0.8.2",
+ "performance-now": "^2.1.0",
+ "qs": "~6.5.1",
+ "safe-buffer": "^5.1.1",
+ "stringstream": "~0.0.5",
+ "tough-cookie": "~2.3.2",
+ "tunnel-agent": "^0.6.0",
+ "uuid": "^3.1.0"
+ },
+ "dependencies": {
+ "qs": {
+ "version": "6.5.2",
+ "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz",
+ "integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA=="
+ }
+ }
+ },
+ "safe-buffer": {
+ "version": "5.1.2",
+ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
+ "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
+ },
+ "safer-buffer": {
+ "version": "2.1.2",
+ "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
+ },
+ "send": {
+ "version": "0.17.1",
+ "resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz",
+ "integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==",
+ "requires": {
+ "debug": "2.6.9",
+ "depd": "~1.1.2",
+ "destroy": "~1.0.4",
+ "encodeurl": "~1.0.2",
+ "escape-html": "~1.0.3",
+ "etag": "~1.8.1",
+ "fresh": "0.5.2",
+ "http-errors": "~1.7.2",
+ "mime": "1.6.0",
+ "ms": "2.1.1",
+ "on-finished": "~2.3.0",
+ "range-parser": "~1.2.1",
+ "statuses": "~1.5.0"
+ },
+ "dependencies": {
+ "ms": {
+ "version": "2.1.1",
+ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
+ "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg=="
+ }
+ }
+ },
+ "serve-static": {
+ "version": "1.14.1",
+ "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz",
+ "integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==",
+ "requires": {
+ "encodeurl": "~1.0.2",
+ "escape-html": "~1.0.3",
+ "parseurl": "~1.3.3",
+ "send": "0.17.1"
+ }
+ },
+ "setprototypeof": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz",
+ "integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw=="
+ },
+ "sntp": {
+ "version": "2.1.0",
+ "resolved": "https://registry.npmjs.org/sntp/-/sntp-2.1.0.tgz",
+ "integrity": "sha512-FL1b58BDrqS3A11lJ0zEdnJ3UOKqVxawAkF3k7F0CVN7VQ34aZrV+G8BZ1WC9ZL7NyrwsW0oviwsWDgRuVYtJg==",
+ "requires": {
+ "hoek": "4.x.x"
+ }
+ },
+ "sshpk": {
+ "version": "1.16.1",
+ "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz",
+ "integrity": "sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==",
+ "requires": {
+ "asn1": "~0.2.3",
+ "assert-plus": "^1.0.0",
+ "bcrypt-pbkdf": "^1.0.0",
+ "dashdash": "^1.12.0",
+ "ecc-jsbn": "~0.1.1",
+ "getpass": "^0.1.1",
+ "jsbn": "~0.1.0",
+ "safer-buffer": "^2.0.2",
+ "tweetnacl": "~0.14.0"
+ }
+ },
+ "statuses": {
+ "version": "1.5.0",
+ "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz",
+ "integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow="
+ },
+ "streamsearch": {
+ "version": "0.1.2",
+ "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-0.1.2.tgz",
+ "integrity": "sha1-gIudDlb8Jz2Am6VzOOkpkZoanxo="
+ },
+ "string_decoder": {
+ "version": "0.10.31",
+ "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
+ "integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ="
+ },
+ "stringstream": {
+ "version": "0.0.6",
+ "resolved": "https://registry.npmjs.org/stringstream/-/stringstream-0.0.6.tgz",
+ "integrity": "sha512-87GEBAkegbBcweToUrdzf3eLhWNg06FJTebl4BVJz/JgWy8CvEr9dRtX5qWphiynMSQlxxi+QqN0z5T32SLlhA=="
+ },
+ "toidentifier": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz",
+ "integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw=="
+ },
+ "tough-cookie": {
+ "version": "2.3.4",
+ "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.3.4.tgz",
+ "integrity": "sha512-TZ6TTfI5NtZnuyy/Kecv+CnoROnyXn2DN97LontgQpCwsX2XyLYCC0ENhYkehSOwAp8rTQKc/NUIF7BkQ5rKLA==",
+ "requires": {
+ "punycode": "^1.4.1"
+ }
+ },
+ "tunnel-agent": {
+ "version": "0.6.0",
+ "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
+ "integrity": "sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=",
+ "requires": {
+ "safe-buffer": "^5.0.1"
+ }
+ },
+ "tweetnacl": {
+ "version": "0.14.5",
+ "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz",
+ "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q="
+ },
+ "type-is": {
+ "version": "1.6.18",
+ "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
+ "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
+ "requires": {
+ "media-typer": "0.3.0",
+ "mime-types": "~2.1.24"
+ }
+ },
+ "typedarray": {
+ "version": "0.0.6",
+ "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz",
+ "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c="
+ },
+ "ultron": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/ultron/-/ultron-1.1.1.tgz",
+ "integrity": "sha512-UIEXBNeYmKptWH6z8ZnqTeS8fV74zG0/eRU9VGkpzz+LIJNs8W/zM/L+7ctCkRrgbNnnR0xxw4bKOr0cW0N0Og=="
+ },
+ "unpipe": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
+ "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw="
+ },
+ "util-deprecate": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
+ "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8="
+ },
+ "utils-merge": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
+ "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM="
+ },
+ "uuid": {
+ "version": "3.3.3",
+ "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.3.tgz",
+ "integrity": "sha512-pW0No1RGHgzlpHJO1nsVrHKpOEIxkGg1xB+v0ZmdNH5OAeAwzAVrCnI2/6Mtx+Uys6iaylxa+D3g4j63IKKjSQ=="
+ },
+ "vary": {
+ "version": "1.1.2",
+ "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
+ "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw="
+ },
+ "verror": {
+ "version": "1.10.0",
+ "resolved": "https://registry.npmjs.org/verror/-/verror-1.10.0.tgz",
+ "integrity": "sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=",
+ "requires": {
+ "assert-plus": "^1.0.0",
+ "core-util-is": "1.0.2",
+ "extsprintf": "^1.2.0"
+ }
+ },
+ "ws": {
+ "version": "4.0.0",
+ "resolved": "https://registry.npmjs.org/ws/-/ws-4.0.0.tgz",
+ "integrity": "sha512-QYslsH44bH8O7/W2815u5DpnCpXWpEK44FmaHffNwgJI4JMaSZONgPBTOfrxJ29mXKbXak+LsJ2uAkDTYq2ptQ==",
+ "requires": {
+ "async-limiter": "~1.0.0",
+ "safe-buffer": "~5.1.0",
+ "ultron": "~1.1.0"
+ }
+ },
+ "xtend": {
+ "version": "4.0.2",
+ "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",
+ "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ=="
+ }
+ }
+}
diff --git a/mephisto/abstractions/architects/router/deploy/package.json b/mephisto/abstractions/architects/router/deploy/package.json
new file mode 100644
index 000000000..db1aa2777
--- /dev/null
+++ b/mephisto/abstractions/architects/router/deploy/package.json
@@ -0,0 +1,23 @@
+{
+ "name": "server",
+ "version": "1.0.0",
+ "description": "",
+ "main": "server.js",
+ "scripts": {
+ "test": "echo \"Error: no test specified\" && exit 1",
+ "start": "node server.js"
+ },
+ "keywords": [],
+ "author": "",
+ "engines": {
+ "node": ">= 7.6.0"
+ },
+ "dependencies": {
+ "async-lock": "1.0.0",
+ "body-parser": "^1.19.0",
+ "express": "^4.17.1",
+ "multer": "^1.4.2",
+ "request": "2.82.0",
+ "ws": "4.0.0"
+ }
+}
diff --git a/mephisto/abstractions/architects/router/deploy/server.js b/mephisto/abstractions/architects/router/deploy/server.js
new file mode 100644
index 000000000..74f07d5d1
--- /dev/null
+++ b/mephisto/abstractions/architects/router/deploy/server.js
@@ -0,0 +1,526 @@
+/* Copyright (c) Facebook, Inc. and its affiliates.
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+"use strict";
+
+const DEBUG = false;
+
+// TODO add some testing to launch this server and communicate with it
+
+const bodyParser = require("body-parser");
+const express = require("express");
+const http = require("http");
+const fs = require("fs");
+const WebSocket = require("ws");
+const multer = require("multer");
+const path = require("path");
+
+const task_directory_name = "static";
+
+const PORT = process.env.PORT || 3000;
+
+// Generate a random id
+function uuidv4() {
+ return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
+ var r = (Math.random() * 16) | 0,
+ v = c == "x" ? r : (r & 0x3) | 0x8;
+ return v.toString(16);
+ });
+}
+
+// Initialize app
+const app = express();
+app.use(bodyParser.text());
+app.use(
+ bodyParser.urlencoded({
+ extended: true,
+ })
+);
+app.use(bodyParser.json());
+
+var storage = multer.diskStorage({
+ destination: function (req, file, cb) {
+ cb(null, "/tmp/");
+ },
+ filename: function (req, file, cb) {
+ const uniqueSuffix = Date.now() + "-" + Math.round(Math.random() * 1e9);
+ cb(null, uniqueSuffix + "-" + file.fieldname + "-" + file.originalname);
+ },
+});
+
+var upload = multer({ storage: storage });
+
+const server = http.createServer(app);
+
+// ======= ========
+
+const FAILED_RECONNECT_TIME = 10000;
+
+// TODO can we pull all these from somewhere, make sure they're testable
+// and show they're the same as the python ones?
+const STATUS_INIT = "none";
+const STATUS_CONNECTED = "connected";
+const STATUS_DISCONNECTED = "disconnect";
+const STATUS_COMPLETED = "completed";
+
+const SYSTEM_SOCKET_ID = "mephisto"; // TODO pull from somewhere
+// TODO use registered socket id from on_alive
+const SERVER_SOCKET_ID = "mephisto_server";
+
+const PACKET_TYPE_REQUEST_AGENT_STATUS = "request_status";
+const PACKET_TYPE_RETURN_AGENT_STATUS = "return_status";
+const PACKET_TYPE_INIT_DATA = "initial_data_send";
+const PACKET_TYPE_AGENT_ACTION = "agent_action";
+const PACKET_TYPE_REQUEST_ACTION = "request_act";
+const PACKET_TYPE_UPDATE_AGENT_STATUS = "update_status";
+const PACKET_TYPE_NEW_AGENT = "register_agent";
+const PACKET_TYPE_NEW_WORKER = "register_worker";
+const PACKET_TYPE_GET_INIT_DATA = "init_data_request";
+const PACKET_TYPE_ALIVE = "alive";
+const PACKET_TYPE_PROVIDER_DETAILS = "provider_details";
+const PACKET_TYPE_SUBMIT_ONBOARDING = "submit_onboarding";
+const PACKET_TYPE_HEARTBEAT = "heartbeat";
+
+// State for agents tracked by the server
+class LocalAgentState {
+ constructor(agent_id) {
+ this.status = STATUS_INIT;
+ this.agent_id = agent_id;
+ this.unsent_messages = [];
+ this.state = { wants_act: false, done_text: null };
+ this.is_alive = false;
+ }
+
+ get_sendable_messages() {
+ let sendable_messages = this.unsent_messages;
+ this.unsent_messages = [];
+ return sendable_messages;
+ }
+}
+
+const wss = new WebSocket.Server({ server });
+
+// Track connectionss
+var agent_id_to_socket = {};
+var socket_id_to_agent = {};
+var mephisto_message_queue = [];
+var main_thread_timeout = null;
+var mephisto_socket = null;
+
+// This is a mapping of connection id -> state
+var agent_id_to_agent = {};
+
+var pending_provider_requests = {};
+
+var last_mephisto_ping = Date.now();
+
+function debug_log() {
+ if (DEBUG) {
+ console.log.apply(null, arguments);
+ }
+}
+
+// Handles sending a message through the socket
+function _send_message(socket, packet) {
+ if (!socket) {
+ debug_log("No socket to send packet to", packet);
+ // Socket doesn't exist - odd
+ return;
+ }
+
+ if (socket.readyState == 3) {
+ // Socket has already closed
+ return;
+ }
+
+ // Send the message through, with one retry a half second later
+ socket.send(JSON.stringify(packet), function ack(error) {
+ if (error === undefined) {
+ return;
+ }
+ setTimeout(function () {
+ socket.send(JSON.stringify(packet), function ack2(error2) {
+ if (error2 === undefined) {
+ return;
+ } else {
+ console.log(error2);
+ }
+ });
+ }, 500);
+ });
+}
+
+function find_or_create_agent(agent_id) {
+ var agent = agent_id_to_agent[agent_id];
+ if (agent === undefined) {
+ var agent = new LocalAgentState(agent_id);
+ agent_id_to_agent[agent_id] = agent;
+ }
+ return agent;
+}
+
+// Open connections send alives to identify who they are,
+// register them correctly here
+function handle_alive(socket, alive_packet) {
+ if (alive_packet.sender_id == SYSTEM_SOCKET_ID) {
+ mephisto_socket = socket;
+ console.log(socket._socket.remoteAddress);
+ if (main_thread_timeout === null) {
+ debug_log("launching main thread");
+ main_thread_timeout = setTimeout(main_thread, 50);
+ }
+ } else {
+ var agent_id = alive_packet.sender_id;
+ var agent = find_or_create_agent(agent_id);
+ agent.is_alive = true;
+ agent_id_to_socket[agent_id] = socket;
+ socket_id_to_agent[socket.id] = agent;
+ }
+}
+
+// Return the status of all agents mapped by their agent id
+function handle_get_agent_status(status_packet) {
+ last_mephisto_ping = Date.now();
+ let agent_statuses = {};
+ for (let agent_id in agent_id_to_agent) {
+ agent_statuses[agent_id] = agent_id_to_agent[agent_id].status;
+ let ping_packet = {
+ packet_type: PACKET_TYPE_REQUEST_AGENT_STATUS,
+ sender_id: SYSTEM_SOCKET_ID,
+ receiver_id: agent_id,
+ data: null,
+ };
+ handle_forward(ping_packet);
+ }
+ let packet = {
+ packet_type: PACKET_TYPE_RETURN_AGENT_STATUS,
+ sender_id: SERVER_SOCKET_ID,
+ receiver_id: SYSTEM_SOCKET_ID,
+ data: agent_statuses,
+ };
+ mephisto_message_queue.push(packet);
+}
+
+function get_agent_state(agent_id) {
+ let agent = find_or_create_agent(agent_id);
+ return agent.state;
+}
+
+function handle_update_local_status(status_packet) {
+ let agent_id = status_packet.receiver_id;
+ let agent = find_or_create_agent(agent_id);
+ if (status_packet.data.status != undefined) {
+ agent.status = status_packet.data.status;
+ }
+ agent.state = Object.assign(agent.state, status_packet.data.state);
+}
+
+function update_wanted_acts(agent_id, wants_act) {
+ let agent = find_or_create_agent(agent_id);
+ agent.state.wants_act = wants_act;
+}
+
+// Handle a message being sent to or from a frontend agent
+function handle_forward(packet) {
+ if (packet.receiver_id == SYSTEM_SOCKET_ID) {
+ debug_log("Adding message to mephisto queue", packet);
+ mephisto_message_queue.push(packet);
+ } else {
+ let agent = find_or_create_agent(packet.receiver_id);
+ debug_log("Adding message to agent queue", packet);
+ agent.unsent_messages.push(packet);
+ }
+}
+
+function _followup_possible_disconnect(agent) {
+ if (!agent.is_alive) {
+ agent.status = STATUS_DISCONNECTED;
+ debug_log("Agent disconnected", agent);
+ }
+}
+
+function handle_possible_disconnect(agent) {
+ debug_log("Possible disconnect", agent);
+ agent.is_alive = false;
+
+ // Give the agent some time to possibly reconnect
+ setTimeout(() => _followup_possible_disconnect(agent), FAILED_RECONNECT_TIME);
+}
+
+function send_status_for_agent(agent_id) {
+ let agent = find_or_create_agent(agent_id);
+ let packet = {
+ packet_type: PACKET_TYPE_UPDATE_AGENT_STATUS,
+ sender_id: SERVER_SOCKET_ID,
+ receiver_id: agent_id,
+ data: {
+ status: agent.status,
+ state: agent.state,
+ },
+ };
+ handle_forward(packet);
+}
+
+// Register handlers
+wss.on("connection", function (socket) {
+ socket.id = uuidv4();
+ console.log("Client connected");
+ // Disconnects are logged
+ socket.on("disconnect", function () {
+ console.log("socket disconnected");
+ var agent = socket_id_to_agent[socket.id];
+ if (agent !== undefined) {
+ handle_possible_disconnect(agent);
+ }
+ });
+
+ socket.on("error", (err) => {
+ console.log("Caught socket error, probably closed!");
+ console.log(err);
+ var agent = socket_id_to_agent[socket.id];
+ if (agent !== undefined) {
+ handle_possible_disconnect(agent);
+ }
+ });
+
+ // handles routing a packet to the desired recipient
+ socket.on("message", function (packet) {
+ try {
+ packet = JSON.parse(packet);
+ if (packet["packet_type"] == PACKET_TYPE_REQUEST_AGENT_STATUS) {
+ debug_log("Mephisto requesting status");
+ handle_get_agent_status(packet);
+ } else if (packet["packet_type"] == PACKET_TYPE_AGENT_ACTION) {
+ debug_log("Agent action: ", packet);
+ handle_forward(packet);
+ if (packet.receiver_id == SYSTEM_SOCKET_ID) {
+ update_wanted_acts(packet.sender_id, false);
+ send_status_for_agent(packet.sender_id);
+ }
+ } else if (packet["packet_type"] == PACKET_TYPE_ALIVE) {
+ debug_log("Agent alive: ", packet);
+ handle_alive(socket, packet);
+ } else if (packet["packet_type"] == PACKET_TYPE_UPDATE_AGENT_STATUS) {
+ debug_log("Update agent status", packet);
+ handle_update_local_status(packet);
+ packet.data.state = get_agent_state(packet.receiver_id);
+ handle_forward(packet);
+ } else if (packet["packet_type"] == PACKET_TYPE_REQUEST_ACTION) {
+ debug_log("Requesting act", packet);
+ update_wanted_acts(packet.receiver_id, true);
+ let agent_id = packet["receiver_id"];
+ send_status_for_agent(agent_id);
+ } else if (
+ packet["packet_type"] == PACKET_TYPE_PROVIDER_DETAILS ||
+ packet["packet_type"] == PACKET_TYPE_INIT_DATA
+ ) {
+ let request_id = packet["data"]["request_id"];
+ if (request_id === undefined) {
+ request_id = packet["receiver_id"];
+ }
+ let res_obj = pending_provider_requests[request_id];
+ if (res_obj) {
+ res_obj.json(packet);
+ delete pending_provider_requests[request_id];
+ }
+ } else if (packet["packet_type"] == PACKET_TYPE_HEARTBEAT) {
+ packet["data"] = { last_mephisto_ping: last_mephisto_ping };
+ let agent_id = packet["sender_id"];
+ packet["sender_id"] = packet["receiver_id"];
+ packet["receiver_id"] = agent_id;
+ let agent = agent_id_to_agent[agent_id];
+ if (agent !== undefined) {
+ agent.is_alive = true;
+ packet.data.status = agent.status;
+ packet.data.state = agent.state;
+ if (agent_id_to_socket[agent.agent_id] != socket) {
+ // Not communicating to the correct socket, update
+ debug_log("Updating socket for ", agent);
+ agent_id_to_socket[agent.agent_id] = socket;
+ socket_id_to_agent[socket.id] = agent;
+ }
+ }
+ handle_forward(packet);
+ }
+ } catch (error) {
+ console.log("Transient error on message");
+ console.log(error);
+ }
+ });
+});
+
+server.listen(PORT, function () {
+ console.log("Listening on %d", server.address().port);
+});
+
+// ============ ==============
+
+// ======================= =======================
+
+// TODO add crash checking around this thread?
+function main_thread() {
+ try {
+ // Handle active connections message sends
+ for (const agent_id in agent_id_to_socket) {
+ let agent_state = agent_id_to_agent[agent_id];
+ if (!agent_state.is_alive) {
+ continue;
+ }
+ let sendable_messages = agent_state.get_sendable_messages();
+ if (sendable_messages.length > 0) {
+ let socket = agent_id_to_socket[agent_id];
+ // TODO send all these messages in a batch
+ for (const packet of sendable_messages) {
+ _send_message(socket, packet);
+ }
+ }
+ }
+
+ // Handle sending batches to the mephisto python client
+ let mephisto_messages = [];
+ while (mephisto_message_queue.length > 0) {
+ mephisto_messages.push(mephisto_message_queue.shift());
+ }
+ if (mephisto_messages.length > 0) {
+ for (const packet of mephisto_messages) {
+ _send_message(mephisto_socket, packet);
+ }
+ }
+ } catch (error) {
+ console.log("Transient error in main thread?");
+ console.log(error);
+ }
+
+ // Re-call this thead, as it should run forever
+ main_thread_timeout = setTimeout(main_thread, 50);
+}
+
+// ======================= ======================
+
+// ===================== ========================
+function make_provider_request(request_type, provider_data, res) {
+ var request_id = uuidv4();
+
+ let request_packet = {
+ packet_type: request_type,
+ sender_id: SERVER_SOCKET_ID,
+ receiver_id: SYSTEM_SOCKET_ID,
+ data: {
+ provider_data: provider_data,
+ request_id: request_id,
+ },
+ };
+
+ pending_provider_requests[request_id] = res;
+ _send_message(mephisto_socket, request_packet);
+ // TODO set a timeout to expire this request rather than leave the worker hanging
+}
+
+app.post("/initial_task_data", function (req, res) {
+ var provider_data = req.body.provider_data;
+ make_provider_request(PACKET_TYPE_GET_INIT_DATA, provider_data, res);
+});
+
+app.post("/register_worker", function (req, res) {
+ var provider_data = req.body.provider_data;
+ make_provider_request(PACKET_TYPE_NEW_WORKER, provider_data, res);
+});
+
+app.post("/request_agent", function (req, res) {
+ var provider_data = req.body.provider_data;
+ make_provider_request(PACKET_TYPE_NEW_AGENT, provider_data, res);
+});
+
+app.post("/submit_onboarding", function (req, res) {
+ var provider_data = req.body.provider_data;
+ var request_id = uuidv4();
+
+ let agent_id = provider_data.USED_AGENT_ID;
+ delete provider_data.USED_AGENT_ID;
+
+ provider_data.request_id = request_id;
+
+ let submit_packet = {
+ packet_type: PACKET_TYPE_SUBMIT_ONBOARDING,
+ sender_id: agent_id,
+ receiver_id: SYSTEM_SOCKET_ID,
+ data: provider_data,
+ };
+
+ pending_provider_requests[request_id] = res;
+ _send_message(mephisto_socket, submit_packet);
+});
+
+app.post("/submit_task", upload.any(), function (req, res) {
+ var provider_data = req.body;
+ let agent_id = provider_data.USED_AGENT_ID;
+ delete provider_data.USED_AGENT_ID;
+ let submit_packet = {
+ packet_type: PACKET_TYPE_AGENT_ACTION,
+ sender_id: agent_id,
+ receiver_id: SYSTEM_SOCKET_ID,
+ data: {
+ task_data: provider_data,
+ MEPHISTO_is_submit: true,
+ files: req.files,
+ },
+ };
+ _send_message(mephisto_socket, submit_packet);
+ res.json({ status: "Submitted!" });
+
+ // Cleanup local state for a task that's already submitted
+ if (agent_id in agent_id_to_agent) {
+ delete agent_id_to_agent[agent_id];
+ }
+ if (agent_id in agent_id_to_socket) {
+ let socket_id = agent_id_to_socket[agent_id].id;
+ delete agent_id_to_socket[agent_id];
+ delete socket_id_to_agent[socket_id];
+ }
+});
+
+// Quick status check for this server
+app.get("/is_alive", function (req, res) {
+ res.json({ status: "Alive!" });
+});
+
+// Returns server time for now
+app.get("/get_timestamp", function (req, res) {
+ res.json({ timestamp: Date.now() }); // in milliseconds
+});
+
+app.get("/task_index", function (req, res) {
+ // TODO how do we pass the task config to the frontend?
+ res.render("index.html");
+});
+
+app.get("/download_file/:file", function (req, res) {
+ var ip =
+ req.ip ||
+ req.headers["x-forwarded-for"] ||
+ req.connection.remoteAddress ||
+ req.socket.remoteAddress ||
+ req.connection.socket.remoteAddress;
+ if (ip == mephisto_socket._socket.remoteAddress) {
+ res.sendFile(path.join("/tmp/", req.params.file), function (err) {
+ if (err) {
+ console.log(err);
+ res.status(err.status).end();
+ }
+ });
+ } else {
+ res.sendFile(path.join("/tmp/", req.params.file), function (err) {
+ if (err) {
+ console.log(err);
+ res.status(err.status).end();
+ }
+ });
+ // TODO only return the files for requests from the origin
+ // res.status(403).end();
+ }
+});
+
+app.use(express.static("static"));
+
+// ======================= =======================
diff --git a/mephisto/abstractions/architects/router/deploy/static/index.html b/mephisto/abstractions/architects/router/deploy/static/index.html
new file mode 100644
index 000000000..ee7f159c1
--- /dev/null
+++ b/mephisto/abstractions/architects/router/deploy/static/index.html
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+ Crowdsourcing Task
+
+
+
+
+
+
+
+
+
diff --git a/mephisto/abstractions/architects/router/deploy/uploads/exists.txt b/mephisto/abstractions/architects/router/deploy/uploads/exists.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/mephisto/abstractions/blueprint.py b/mephisto/abstractions/blueprint.py
new file mode 100644
index 000000000..63fcbf12f
--- /dev/null
+++ b/mephisto/abstractions/blueprint.py
@@ -0,0 +1,625 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from abc import ABC, abstractmethod
+from mephisto.operations.utils import find_or_create_qualification
+from typing import (
+ ClassVar,
+ Optional,
+ List,
+ Dict,
+ Any,
+ Type,
+ ClassVar,
+ Union,
+ Iterable,
+ AsyncIterator,
+ Callable,
+ TYPE_CHECKING,
+)
+
+from dataclasses import dataclass, field
+from omegaconf import MISSING, DictConfig
+
+from mephisto.data_model.exceptions import (
+ AgentReturnedError,
+ AgentDisconnectedError,
+ AgentTimeoutError,
+)
+from mephisto.data_model.constants.assignment_state import AssignmentState
+
+if TYPE_CHECKING:
+ from mephisto.data_model.agent import Agent, OnboardingAgent
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.data_model.assignment import Assignment, InitializationData
+ from mephisto.data_model.unit import Unit
+ from mephisto.data_model.packet import Packet
+ from mephisto.data_model.worker import Worker
+ from argparse import _ArgumentGroup as ArgumentGroup
+
+
+@dataclass
+class BlueprintArgs:
+ _blueprint_type: str = MISSING
+ onboarding_qualification: str = field(
+ default=MISSING,
+ metadata={
+ "help": (
+ "Specify the name of a qualification used to block workers who fail onboarding, "
+ "Empty will skip onboarding."
+ )
+ },
+ )
+ block_qualification: str = field(
+ default=MISSING,
+ metadata={
+ "help": ("Specify the name of a qualification used to soft block workers.")
+ },
+ )
+
+
+@dataclass
+class SharedTaskState:
+ """
+ Base class for specifying additional state that can't just
+ be passed as Hydra args, like functions and objects
+ """
+
+ onboarding_data: Dict[str, Any] = field(default_factory=dict)
+ task_config: Dict[str, Any] = field(default_factory=dict)
+ validate_onboarding: Callable[[Any], bool] = field(
+ default_factory=lambda: (lambda x: True)
+ )
+ qualifications: List[Any] = field(default_factory=list)
+ worker_can_do_unit: Callable[["Worker", "Unit"], bool] = field(
+ default_factory=lambda: (lambda worker, unit: True)
+ )
+
+
+class TaskBuilder(ABC):
+ """
+ Class to manage building a task of a specific type in a directory
+ that will be used to deploy that task.
+ """
+
+ def __init__(self, task_run: "TaskRun", args: "DictConfig"):
+ self.args = args
+ self.task_run = task_run
+
+ def __new__(cls, task_run: "TaskRun", args: "DictConfig") -> "TaskBuilder":
+ """Get the correct TaskBuilder for this task run"""
+ from mephisto.operations.registry import get_blueprint_from_type
+
+ if cls == TaskBuilder:
+ # We are trying to construct an TaskBuilder, find what type to use and
+ # create that instead
+ correct_class = get_blueprint_from_type(task_run.task_type).TaskBuilderClass
+ return super().__new__(correct_class)
+ else:
+ # We are constructing another instance directly
+ return super().__new__(cls)
+
+ @abstractmethod
+ def build_in_dir(self, build_dir: str) -> None:
+ """
+ Build the server for the given task run into the provided directory
+ """
+ raise NotImplementedError()
+
+
+class TaskRunner(ABC):
+ """
+ Class to manage running a task of a specific type. Includes
+ building the dependencies to a directory to be deployed to
+ the server, and spawning threads that manage the process of
+ passing agents through a task.
+ """
+
+ def __init__(
+ self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
+ ):
+ self.args = args
+ self.shared_state = shared_state
+ self.task_run = task_run
+ self.running_assignments: Dict[str, "Assignment"] = {}
+ self.running_units: Dict[str, "Unit"] = {}
+ self.running_onboardings: Dict[str, "OnboardingAgent"] = {}
+ self.is_concurrent = False
+ # TODO(102) populate some kind of local state for tasks that are being run
+ # by this runner from the database.
+
+ self.block_qualification = args.blueprint.get("block_qualification", None)
+ if self.block_qualification is not None:
+ find_or_create_qualification(task_run.db, self.block_qualification)
+
+ def __new__(
+ cls, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
+ ) -> "TaskRunner":
+ """Get the correct TaskRunner for this task run"""
+ if cls == TaskRunner:
+ from mephisto.operations.registry import get_blueprint_from_type
+
+ # We are trying to construct an AgentState, find what type to use and
+ # create that instead
+ correct_class = get_blueprint_from_type(task_run.task_type).TaskRunnerClass
+ return super().__new__(correct_class)
+ else:
+ # We are constructing another instance directly
+ return super().__new__(cls)
+
+ def launch_onboarding(self, onboarding_agent: "OnboardingAgent") -> None:
+ """
+ Validate that onboarding is ready, then launch. Catch disconnect conditions
+ """
+ onboarding_id = onboarding_agent.get_agent_id()
+ if onboarding_id in self.running_onboardings:
+ print(f"Onboarding {onboarding_id} is already running")
+ return
+
+ print(f"Onboarding {onboarding_id} is launching with {onboarding_agent}")
+
+ # At this point we're sure we want to run Onboarding
+ self.running_onboardings[onboarding_id] = onboarding_agent
+ try:
+ self.run_onboarding(onboarding_agent)
+ onboarding_agent.mark_done()
+ except (AgentReturnedError, AgentTimeoutError, AgentDisconnectedError):
+ self.cleanup_onboarding(onboarding_agent)
+ except Exception as e:
+ print(f"Unhandled exception in onboarding {onboarding_agent}: {repr(e)}")
+ import traceback
+
+ traceback.print_exc()
+ self.cleanup_onboarding(onboarding_agent)
+ del self.running_onboardings[onboarding_id]
+ return
+
+ def launch_unit(self, unit: "Unit", agent: "Agent") -> None:
+ """
+ Validate the unit is prepared to launch, then run it
+ """
+ if unit.db_id in self.running_units:
+ print(f"Unit {unit.db_id} is already running")
+ return
+
+ print(f"Unit {unit.db_id} is launching with {agent}")
+
+ # At this point we're sure we want to run the unit
+ self.running_units[unit.db_id] = unit
+ try:
+ self.run_unit(unit, agent)
+ except (AgentReturnedError, AgentTimeoutError, AgentDisconnectedError):
+ # A returned Unit can be worked on again by someone else.
+ if (
+ unit.get_status() != AssignmentState.EXPIRED
+ and unit.get_assigned_agent().db_id == agent.db_id
+ ):
+ unit.clear_assigned_agent()
+ self.cleanup_unit(unit)
+ except Exception as e:
+ print(f"Unhandled exception in unit {unit}: {repr(e)}")
+ import traceback
+
+ traceback.print_exc()
+ self.cleanup_unit(unit)
+ del self.running_units[unit.db_id]
+ return
+
+ def launch_assignment(
+ self, assignment: "Assignment", agents: List["Agent"]
+ ) -> None:
+ """
+ Validate the assignment is prepared to launch, then run it
+ """
+ if assignment.db_id in self.running_assignments:
+ print(f"Assignment {assignment.db_id} is already running")
+ return
+
+ print(f"Assignment {assignment.db_id} is launching with {agents}")
+
+ # At this point we're sure we want to run the assignment
+ self.running_assignments[assignment.db_id] = assignment
+ try:
+ self.run_assignment(assignment, agents)
+ except (AgentReturnedError, AgentTimeoutError, AgentDisconnectedError) as e:
+ # TODO(#99) if some operator flag is set for counting complete tasks, launch a
+ # new assignment copied from the parameters of this one
+ disconnected_agent_id = e.agent_id
+ for agent in agents:
+ if agent.db_id != e.agent_id:
+ agent.update_status(AgentState.STATUS_PARTNER_DISCONNECT)
+ else:
+ # Must expire the disconnected unit so that
+ # new workers aren't shown it
+ agent.get_unit().expire()
+ self.cleanup_assignment(assignment)
+ except Exception as e:
+ print(f"Unhandled exception in assignment {assignment}: {repr(e)}")
+ import traceback
+
+ traceback.print_exc()
+ self.cleanup_assignment(assignment)
+ del self.running_assignments[assignment.db_id]
+ return
+
+ @staticmethod
+ def get_data_for_assignment(assignment: "Assignment") -> "InitializationData":
+ """
+ Finds the right data to get for the given assignment.
+ """
+ return assignment.get_assignment_data()
+
+ @abstractmethod
+ def get_init_data_for_agent(self, agent: "Agent"):
+ """
+ Return the data that an agent will need for their task.
+ """
+ raise NotImplementedError()
+
+ def filter_units_for_worker(self, units: List["Unit"], worker: "Worker"):
+ """
+ Returns the list of Units that the given worker is eligible to work on.
+
+ Some tasks may want more direct control of what units a worker is
+ allowed to work on, so this method should be overridden by children
+ classes.
+ """
+ return units
+
+ # TaskRunners must implement either the unit or assignment versions of the
+ # run and cleanup functions, depending on if the task is run at the assignment
+ # level rather than on the the unit level.
+
+ def run_onboarding(self, agent: "OnboardingAgent"):
+ """
+ Handle setup for any resources to run an onboarding task. This
+ will be run in a background thread, and should be tolerant to being
+ interrupted by cleanup_onboarding.
+
+ Only required by tasks that want to implement onboarding
+ """
+ raise NotImplementedError()
+
+ def cleanup_onboarding(self, agent: "OnboardingAgent"):
+ """
+ Handle cleaning up the resources that were being used to onboard
+ the given agent.
+ """
+ raise NotImplementedError()
+
+ def run_unit(self, unit: "Unit", agent: "Agent"):
+ """
+ Handle setup for any resources required to get this unit running.
+ This will be run in a background thread, and should be tolerant to
+ being interrupted by cleanup_unit.
+
+ Only needs to be implemented by non-concurrent tasks
+ """
+ raise NotImplementedError()
+
+ def cleanup_unit(self, unit: "Unit"):
+ """
+ Handle ensuring resources for a given assignment are cleaned up following
+ a disconnect or other crash event
+
+ Does not need to be implemented if the run_unit method is
+ already error catching and handles its own cleanup
+ """
+ raise NotImplementedError()
+
+ def run_assignment(self, assignment: "Assignment", agents: List["Agent"]):
+ """
+ Handle setup for any resources required to get this assignment running.
+ This will be run in a background thread, and should be tolerant to
+ being interrupted by cleanup_assignment.
+
+ Only needs to be implemented by concurrent tasks
+ """
+ raise NotImplementedError()
+
+ def cleanup_assignment(self, assignment: "Assignment"):
+ """
+ Handle ensuring resources for a given assignment are cleaned up following
+ a disconnect or other crash event
+
+ Does not need to be implemented if the run_assignment method is
+ already error catching and handles its own cleanup
+ """
+ raise NotImplementedError()
+
+
+# TODO(#101) what is the best method for creating new ones of these for different task types
+# in ways that are supported by different backends? Perhaps abstract additional
+# methods into the required db interface? Move any file manipulations into a
+# extra_data_handler subcomponent of the MephistoDB class?
+class AgentState(ABC):
+ """
+ Class for holding state information about work by an Agent on a Unit, currently
+ stored as current task work into a json file.
+
+ Specific state implementations will need to be created for different Task Types,
+ as different tasks store and load differing data.
+ """
+
+ # Possible Agent Status Values
+ STATUS_NONE = "none"
+ STATUS_ACCEPTED = "accepted"
+ STATUS_ONBOARDING = "onboarding"
+ STATUS_WAITING = "waiting"
+ STATUS_IN_TASK = "in task"
+ STATUS_COMPLETED = "completed"
+ STATUS_DISCONNECT = "disconnect"
+ STATUS_TIMEOUT = "timeout"
+ STATUS_PARTNER_DISCONNECT = "partner disconnect"
+ STATUS_EXPIRED = "expired"
+ STATUS_RETURNED = "returned"
+ STATUS_APPROVED = "approved"
+ STATUS_SOFT_REJECTED = "soft_rejected"
+ STATUS_REJECTED = "rejected"
+
+ def __new__(cls, agent: Union["Agent", "OnboardingAgent"]) -> "AgentState":
+ """Return the correct agent state for the given agent"""
+ if cls == AgentState:
+ from mephisto.data_model.agent import Agent
+ from mephisto.operations.registry import get_blueprint_from_type
+
+ # We are trying to construct an AgentState, find what type to use and
+ # create that instead
+ if isinstance(agent, Agent):
+ correct_class = get_blueprint_from_type(agent.task_type).AgentStateClass
+ else:
+ correct_class = get_blueprint_from_type(
+ agent.task_type
+ ).OnboardingAgentStateClass
+ return super().__new__(correct_class)
+ else:
+ # We are constructing another instance directly
+ return super().__new__(cls)
+
+ @staticmethod
+ def complete() -> List[str]:
+ """Return all final Agent statuses which will not be updated by the supervisor"""
+ return [
+ AgentState.STATUS_COMPLETED,
+ AgentState.STATUS_DISCONNECT,
+ AgentState.STATUS_TIMEOUT,
+ AgentState.STATUS_EXPIRED,
+ AgentState.STATUS_RETURNED,
+ AgentState.STATUS_SOFT_REJECTED,
+ AgentState.STATUS_APPROVED,
+ AgentState.STATUS_REJECTED,
+ ]
+
+ @staticmethod
+ def valid() -> List[str]:
+ """Return all valid Agent statuses"""
+ # TODO(#97) write a test that ensures all AgentState statuses are here
+ return [
+ AgentState.STATUS_NONE,
+ AgentState.STATUS_ACCEPTED,
+ AgentState.STATUS_ONBOARDING,
+ AgentState.STATUS_WAITING,
+ AgentState.STATUS_IN_TASK,
+ AgentState.STATUS_COMPLETED,
+ AgentState.STATUS_DISCONNECT,
+ AgentState.STATUS_TIMEOUT,
+ AgentState.STATUS_PARTNER_DISCONNECT,
+ AgentState.STATUS_EXPIRED,
+ AgentState.STATUS_RETURNED,
+ AgentState.STATUS_SOFT_REJECTED,
+ AgentState.STATUS_APPROVED,
+ AgentState.STATUS_REJECTED,
+ ]
+
+ # Implementations of an AgentState must implement the following:
+
+ @abstractmethod
+ def __init__(self, agent: "Agent"):
+ """
+ Create an AgentState to track the state of an agent's work on a Unit
+
+ Implementations should initialize any required files for saving and
+ loading state data somewhere.
+
+ If said file already exists based on the given agent, load that data
+ instead.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def set_init_state(self, data: Any) -> bool:
+ """Set the initial state for this agent"""
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_init_state(self) -> Optional[Any]:
+ """
+ Return the initial state for this agent,
+ None if no such state exists
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def load_data(self) -> None:
+ """
+ Load stored data from a file to this object
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_data(self) -> Dict[str, Any]:
+ """
+ Return the currently stored data for this task in the format
+ expected by any frontend displays
+ """
+ raise NotImplementedError()
+
+ def get_parsed_data(self) -> Any:
+ """
+ Return the portion of the data that is relevant to a human
+ who wants to parse or analyze the data
+
+ Utility function to handle stripping the data of any
+ context that is only important for reproducing the task
+ exactly. By default is just `get_data`
+ """
+ return self.get_data()
+
+ @abstractmethod
+ def save_data(self) -> None:
+ """
+ Save the relevant data from this Unit to a file in the expected location
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def update_data(self, packet: "Packet") -> None:
+ """
+ Put new current Unit data into this AgentState
+ """
+ # TODO(#100) maybe refine the signature for this function once use cases
+ # are fully scoped
+
+ # Some use cases might just be appending new data, some
+ # might instead prefer to maintain a final state.
+
+ # Maybe the correct storage is of a series of actions taken
+ # on this Unit? Static tasks only have 2 turns max, dynamic
+ # ones may have multiple turns or steps.
+ raise NotImplementedError()
+
+ def get_task_start(self) -> Optional[float]:
+ """
+ Return the start time for this task, if it is available
+ """
+ return 0.0
+
+ def get_task_end(self) -> Optional[float]:
+ """
+ Return the end time for this task, if it is available
+ """
+ return 0.0
+
+
+class OnboardingRequired(object):
+ """
+ Compositional class for blueprints that may have an onboarding step
+ """
+
+ @staticmethod
+ def get_failed_qual(qual_name: str) -> str:
+ """Returns the wrapper for a qualification to represent failing an onboarding"""
+ return qual_name + "-failed"
+
+ def init_onboarding_config(
+ self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
+ ):
+ self.onboarding_qualification_name: Optional[str] = args.blueprint.get(
+ "onboarding_qualification", None
+ )
+ self.onboarding_data = shared_state.onboarding_data
+ self.use_onboarding = self.onboarding_qualification_name is not None
+ self.onboarding_qualification_id = None
+ if self.onboarding_qualification_name is not None:
+ db = task_run.db
+ found_qualifications = db.find_qualifications(
+ self.onboarding_qualification_name
+ )
+ if len(found_qualifications) == 0:
+ self.onboarding_qualification_id = db.make_qualification(
+ self.onboarding_qualification_name
+ )
+ else:
+ self.onboarding_qualification_id = found_qualifications[0].db_id
+
+ # We need to keep a separate qualification for failed onboarding
+ # to push to a crowd provider in order to prevent workers
+ # who have failed from being shown our task
+ self.onboarding_failed_name = self.get_failed_qual(
+ self.onboarding_qualification_name
+ )
+ found_qualifications = db.find_qualifications(self.onboarding_failed_name)
+ if len(found_qualifications) == 0:
+ self.onboarding_failed_id = db.make_qualification(
+ self.onboarding_failed_name
+ )
+ else:
+ self.onboarding_failed_id = found_qualifications[0].db_id
+
+ def get_onboarding_data(self, worker_id: str) -> Dict[str, Any]:
+ """
+ If the onboarding task on the frontend requires any specialized data, the blueprint
+ should provide it for the user.
+
+ As onboarding qualifies a worker for all tasks from this blueprint, this should
+ generally be static data that can later be evaluated against.
+ """
+ return self.onboarding_data
+
+ def validate_onboarding(
+ self, worker: "Worker", onboarding_agent: "OnboardingAgent"
+ ) -> bool:
+ """
+ Check the incoming onboarding data and evaluate if the worker
+ has passed the qualification or not. Return True if the worker
+ has qualified.
+ """
+ return True
+
+
+class Blueprint(ABC):
+ """
+ Configuration class for the various parts of building, launching,
+ and running a task of a specific task. Provides utility functions
+ for managing between the three main components, which are separated
+ into separate classes in acknowledgement that some tasks may have
+ particularly complicated processes for them
+ """
+
+ AgentStateClass: ClassVar[Type["AgentState"]]
+ OnboardingAgentStateClass: ClassVar[Type["AgentState"]] = AgentState # type: ignore
+ TaskRunnerClass: ClassVar[Type["TaskRunner"]]
+ TaskBuilderClass: ClassVar[Type["TaskBuilder"]]
+ ArgsClass: ClassVar[Type["BlueprintArgs"]] = BlueprintArgs
+ SharedStateClass: ClassVar[Type["SharedTaskState"]] = SharedTaskState
+ supported_architects: ClassVar[List[str]]
+ BLUEPRINT_TYPE: str
+
+ def __init__(
+ self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
+ ):
+ self.args = args
+ self.shared_state = shared_state
+ self.frontend_task_config = shared_state.task_config
+
+ @classmethod
+ def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
+ """
+ Assert that the provided arguments are valid. Should
+ fail if a task launched with these arguments would
+ not work
+ """
+ return
+
+ def get_frontend_args(self) -> Dict[str, Any]:
+ """
+ Specifies what options should be fowarded
+ to the client for use by the task's frontend
+ """
+ return self.frontend_task_config
+
+ @abstractmethod
+ def get_initialization_data(
+ self,
+ ) -> Union[Iterable["InitializationData"], AsyncIterator["InitializationData"]]:
+ """
+ Get all of the data used to initialize tasks from this blueprint.
+ Can either be a simple iterable if all the assignments can
+ be processed at once, or an AsyncIterator if the number
+ of tasks is unknown or changes based on something running
+ concurrently with the job.
+ """
+ raise NotImplementedError
diff --git a/mephisto/abstractions/blueprints/README.md b/mephisto/abstractions/blueprints/README.md
new file mode 100644
index 000000000..f0337c116
--- /dev/null
+++ b/mephisto/abstractions/blueprints/README.md
@@ -0,0 +1,43 @@
+# Blueprints
+## Overview
+Blueprints serve to package tasks (and groups of similar tasks) into a reusable format. They can be used to work through piloting tasks, collecting data, testing different formats, etc. They're also used by the architecture to simplify the data accumulation and review processes. The `StaticBlueprint` is a good starting example of how to implement a blueprint.
+
+## Implementation Details
+### `AgentState`
+The agent state is responsible for defining the data that is important to store for a specific `Unit`, as well as methods for writing that locally to disk. To abstract this, it must implement the following methods:
+- `set_init_state(data)`: given data provided by the `get_init_data_for_agent` method, initialize this agent state to whatever starting state is relevant for this `Unit`.
+- `get_init_state()`: Return the initial state to be sent to the agent for use in the frontend.
+- `load_data()`: Load data that is saved to file to re-initialize the state for this `AgentState`. Generally data should be stored in `self.agent.get_data_dir()`, however any storage solution will work as long as it remains consistent.
+- `get_data()`: Return the stored data for this task in the format expected to render a completed task in the frontend.
+- `save_data()`: Save data to a file such that it can be re-initialized later. Generally data should be stored in `self.agent.get_data_dir()`, however any storage solution will work as long as it remains consistent, and `load_data()` will be able to find it.
+- `update_data()`: Update the local state stored in this `AgentState` given the data sent from the frontend. Given your frontend is what packages data to send, this is entirely customizable by the task creator.
+
+(TODO) Specify a format for data to be sent to the frontend for review.
+
+### `TaskBuilder`
+`TaskBuilder`s exist to abstract away the portion of building a frontend to however one would want to, allowing Mephisto users to design tasks however they'd like. They also can take build options to customize what ends up built. They must implement the following:
+- `build_in_dir(build_dir)`: Take any important source files and put them into the given build dir. This directory will be deployed to the frontend and will become the static target for completing the task.
+- `get_extra_options()`: Return the specific task options that are relevant to customize the frontend when `build_in_dir` is called.
+(TODO) Remove all references to the below functon
+- `task_dir_is_valid(task_dir)`: Originally this was intended to specify whether the task directory supplied outside of the task for this task to use was properly formatted, however when `Blueprint`s were finalized, the gallery no longer existed and this route of customization is no longer supported.
+
+### `TaskRunner`
+The `TaskRunner` component of a blueprint is responsible for actually stepping `Agent`s through the task when it is live. It is, in short, able to set up task control. A `TaskRunner` needs to implement the following:
+- `get_init_data_for_agent`: Provide initial data for an assignment. If this agent is reconnecting (and as such attached to an existing task), update that task to point to the new agent (as the old agent object will no longer receive data from the frontend).
+- `run_assignment`: Handle setup for any resources required to get this assignment running. It will be launched in a background thread, and should be tolerant to being interrupted by cleanup_assignment.
+- `cleanup_assignment`: Send any signals to the required thread for the given assignment to tell it to terminate, then clean up any resources that were set within it.
+- `get_data_for_assignment` (optional): Get the data that an assignment is going to use when run. By default, this pulls from `assignment.get_assignment_data()` however if a task has a special storage mechanism or data type, the assignment data can be fetched here. (TODO) make this optional by having the base class use the `StaticTaskRunner`'s implementation.
+(TODO) task launching management at the moment is really sloppy, and the API for it is unclear. Something better needs to be picked, as at the moment `get_init_data_for_assignment` is responsible for ensuring that `run_assignment` is set up in a thread. Perhaps this responsibility should be consolidated into the `TaskLauncher` class.
+
+## Implementations
+### `StaticBlueprint`
+The `StaticBlueprint` class allows a replication of the interface that MTurk provides, being able to take a snippet of `HTML` and a `.csv` file and deploy tasks that fill templates of the `HTML` with values from the `.csv`.
+
+(TODO) support other sources than a .csv
+
+### `MockBlueprint`
+The `MockBlueprint` exists to test other parts of the Mephisto architecture, and doesn't actually provide a real task.
+
+## Future work
+(TODO) - Clean up the notion of galleries and parent task ids, as we're consolidating into blueprints
+(TODO) - Allow for using user blueprints
diff --git a/test/server/__init__.py b/mephisto/abstractions/blueprints/__init__.py
similarity index 100%
rename from test/server/__init__.py
rename to mephisto/abstractions/blueprints/__init__.py
diff --git a/mephisto/abstractions/blueprints/abstract/README.md b/mephisto/abstractions/blueprints/abstract/README.md
new file mode 100644
index 000000000..2bd09963f
--- /dev/null
+++ b/mephisto/abstractions/blueprints/abstract/README.md
@@ -0,0 +1,2 @@
+# Abstract blueprints
+The blueprints present in this folder provide a set of common underlying blueprint infrastructure, but are incomplete in some form or other. They are not registered as blueprints because they aren't intended to be launched, but rather to be extended upon.
\ No newline at end of file
diff --git a/test/server/architects/__init__.py b/mephisto/abstractions/blueprints/abstract/__init__.py
similarity index 100%
rename from test/server/architects/__init__.py
rename to mephisto/abstractions/blueprints/abstract/__init__.py
diff --git a/test/server/blueprints/__init__.py b/mephisto/abstractions/blueprints/abstract/static_task/__init__.py
similarity index 100%
rename from test/server/blueprints/__init__.py
rename to mephisto/abstractions/blueprints/abstract/static_task/__init__.py
diff --git a/mephisto/abstractions/blueprints/abstract/static_task/empty_task_builder.py b/mephisto/abstractions/blueprints/abstract/static_task/empty_task_builder.py
new file mode 100644
index 000000000..f4614e016
--- /dev/null
+++ b/mephisto/abstractions/blueprints/abstract/static_task/empty_task_builder.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.blueprint import TaskBuilder
+
+
+class EmptyStaticTaskBuilder(TaskBuilder):
+ """
+ Abstract class for a task builder for static tasks
+ """
+
+ def build_in_dir(self, build_dir: str):
+ """Build the frontend if it doesn't exist, then copy into the server directory"""
+ raise AssertionError(
+ "Classes that extend the abstract StaticBlueprint must define a custom "
+ "TaskBuilder class that pulls the correct frontend together. Examples "
+ "can be seen in the static_react_task and static_html_task folders. "
+ "Note that extra static content will be provided in `args.blueprint.extra_source_dir` "
+ )
diff --git a/mephisto/abstractions/blueprints/abstract/static_task/static_agent_state.py b/mephisto/abstractions/blueprints/abstract/static_task/static_agent_state.py
new file mode 100644
index 000000000..8fa64a766
--- /dev/null
+++ b/mephisto/abstractions/blueprints/abstract/static_task/static_agent_state.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Dict, Optional, Any, TYPE_CHECKING
+from mephisto.abstractions.blueprint import AgentState
+import os
+import json
+import time
+
+if TYPE_CHECKING:
+ from mephisto.data_model.agent import Agent
+ from mephisto.data_model.packet import Packet
+
+from mephisto.operations.logger_core import get_logger
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+DATA_FILE = "agent_data.json"
+
+
+class StaticAgentState(AgentState):
+ """
+ Agent state for static tasks.
+ """
+
+ def _get_empty_state(self) -> Dict[str, Optional[Dict[str, Any]]]:
+ return {
+ "inputs": None,
+ "outputs": None,
+ "times": {"task_start": 0, "task_end": 0},
+ }
+
+ def __init__(self, agent: "Agent"):
+ """
+ Static agent states should store
+ input dict -> output dict pairs to disc
+ """
+ self.agent = agent
+ self.state: Dict[str, Optional[Dict[str, Any]]] = self._get_empty_state()
+ self.load_data()
+
+ def set_init_state(self, data: Any) -> bool:
+ """Set the initial state for this agent"""
+ if self.get_init_state() is not None:
+ # Initial state is already set
+ return False
+ else:
+ self.state["inputs"] = data
+ times_dict = self.state["times"]
+ # TODO(#103) this typing may be better handled another way
+ assert isinstance(times_dict, dict)
+ times_dict["task_start"] = time.time()
+ self.save_data()
+ return True
+
+ def get_init_state(self) -> Optional[Dict[str, Any]]:
+ """
+ Return the initial state for this agent,
+ None if no such state exists
+ """
+ if self.state["inputs"] is None:
+ return None
+ return self.state["inputs"].copy()
+
+ def load_data(self) -> None:
+ """Load data for this agent from disk"""
+ data_dir = self.agent.get_data_dir()
+ data_path = os.path.join(data_dir, DATA_FILE)
+ if os.path.exists(data_path):
+ with open(data_path, "r") as data_file:
+ self.state = json.load(data_file)
+ else:
+ self.state = self._get_empty_state()
+
+ def get_data(self) -> Dict[str, Any]:
+ """Return dict of this agent's state"""
+ return self.state.copy()
+
+ def save_data(self) -> None:
+ """Save static agent data to disk"""
+ data_dir = self.agent.get_data_dir()
+ os.makedirs(data_dir, exist_ok=True)
+ out_filename = os.path.join(data_dir, DATA_FILE)
+ with open(out_filename, "w+") as data_file:
+ json.dump(self.state, data_file)
+ logger.info(f"SAVED_DATA_TO_DISC at {out_filename}")
+
+ def update_data(self, packet: "Packet") -> None:
+ """
+ Process the incoming data packet, and handle
+ updating the state
+ """
+ assert (
+ packet.data.get("MEPHISTO_is_submit") is True
+ or packet.data.get("onboarding_data") is not None
+ ), "Static tasks should only have final act"
+
+ outputs: Dict[str, Any]
+
+ if packet.data.get("onboarding_data") is not None:
+ outputs = packet.data["onboarding_data"]
+ else:
+ outputs = packet.data["task_data"]
+ times_dict = self.state["times"]
+ # TODO(#013) this typing may be better handled another way
+ assert isinstance(times_dict, dict)
+ times_dict["task_end"] = time.time()
+ if packet.data.get("files") != None:
+ logger.info(f"Got files: {str(packet.data['files'])[:500]}")
+ outputs["files"] = [f["filename"] for f in packet.data["files"]]
+ self.state["outputs"] = outputs
+ self.save_data()
diff --git a/mephisto/abstractions/blueprints/abstract/static_task/static_blueprint.py b/mephisto/abstractions/blueprints/abstract/static_task/static_blueprint.py
new file mode 100644
index 000000000..3dcd4c482
--- /dev/null
+++ b/mephisto/abstractions/blueprints/abstract/static_task/static_blueprint.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.blueprint import (
+ Blueprint,
+ OnboardingRequired,
+ BlueprintArgs,
+ SharedTaskState,
+)
+from dataclasses import dataclass, field
+from omegaconf import MISSING, DictConfig
+from mephisto.data_model.assignment import InitializationData
+from mephisto.abstractions.blueprints.abstract.static_task.static_agent_state import (
+ StaticAgentState,
+)
+from mephisto.abstractions.blueprints.abstract.static_task.static_task_runner import (
+ StaticTaskRunner,
+)
+from mephisto.abstractions.blueprints.abstract.static_task.empty_task_builder import (
+ EmptyStaticTaskBuilder,
+)
+from mephisto.operations.registry import register_mephisto_abstraction
+
+import os
+import time
+import csv
+import json
+
+from typing import ClassVar, List, Type, Any, Dict, Iterable, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.blueprint import (
+ AgentState,
+ TaskRunner,
+ TaskBuilder,
+ OnboardingAgent,
+ )
+ from mephisto.data_model.assignment import Assignment
+ from mephisto.data_model.worker import Worker
+ from argparse import _ArgumentGroup as ArgumentGroup
+
+
+@dataclass
+class SharedStaticTaskState(SharedTaskState):
+ static_task_data: List[Any] = field(default_factory=list)
+
+
+@dataclass
+class StaticBlueprintArgs(BlueprintArgs):
+ units_per_assignment: int = field(
+ default=1, metadata={"help": "How many workers you want to do each assignment"}
+ )
+ extra_source_dir: str = field(
+ default=MISSING,
+ metadata={
+ "help": (
+ "Optional path to sources that the HTML may "
+ "refer to (such as images/video/css/scripts)"
+ )
+ },
+ )
+ data_json: str = field(
+ default=MISSING, metadata={"help": "Path to JSON file containing task data"}
+ )
+ data_jsonl: str = field(
+ default=MISSING, metadata={"help": "Path to JSON-L file containing task data"}
+ )
+ data_csv: str = field(
+ default=MISSING, metadata={"help": "Path to csv file containing task data"}
+ )
+ extra_source_dir: str = field(
+ default=MISSING,
+ metadata={
+ "help": (
+ "Optional path to sources that the HTML may "
+ "refer to (such as images/video/css/scripts)"
+ )
+ },
+ )
+
+
+class StaticBlueprint(Blueprint, OnboardingRequired):
+ """
+ Abstract blueprint for a task that runs without any extensive backend.
+ These are generally one-off tasks sending data to the frontend and then
+ awaiting a response.
+ """
+
+ AgentStateClass: ClassVar[Type["AgentState"]] = StaticAgentState
+ OnboardingAgentStateClass: ClassVar[Type["AgentState"]] = StaticAgentState
+ TaskBuilderClass: ClassVar[Type["TaskBuilder"]] = EmptyStaticTaskBuilder
+ TaskRunnerClass: ClassVar[Type["TaskRunner"]] = StaticTaskRunner
+ ArgsClass: ClassVar[Type["BlueprintArgs"]] = StaticBlueprintArgs
+ supported_architects: ClassVar[List[str]] = ["mock"] # TODO update
+
+ def __init__(
+ self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
+ ):
+ super().__init__(task_run, args, shared_state)
+ self.init_onboarding_config(task_run, args, shared_state)
+
+ self._initialization_data_dicts: List[Dict[str, Any]] = []
+ blue_args = args.blueprint
+ if blue_args.get("data_csv", None) is not None:
+ csv_file = os.path.expanduser(blue_args.data_csv)
+ with open(csv_file, "r", encoding="utf-8-sig") as csv_fp:
+ csv_reader = csv.reader(csv_fp)
+ headers = next(csv_reader)
+ for row in csv_reader:
+ row_data = {}
+ for i, col in enumerate(row):
+ row_data[headers[i]] = col
+ self._initialization_data_dicts.append(row_data)
+ elif blue_args.get("data_json", None) is not None:
+ json_file = os.path.expanduser(blue_args.data_json)
+ with open(json_file, "r", encoding="utf-8-sig") as json_fp:
+ json_data = json.loads(json_fp)
+ for jd in json_data:
+ self._initialization_data_dicts.append(jd)
+ elif blue_args.get("data_jsonl", None) is not None:
+ jsonl_file = os.path.expanduser(blue_args.data_jsonl)
+ with open(jsonl_file, "r", encoding="utf-8-sig") as jsonl_fp:
+ line = jsonl_fp.readline()
+ while line:
+ j = json.loads(line)
+ self._initialization_data_dicts.append(j)
+ line = jsonl_fp.readline()
+ elif shared_state.static_task_data is not None:
+ self._initialization_data_dicts = shared_state.static_task_data
+ else:
+ # instantiating a version of the blueprint, but not necessarily needing the data
+ pass
+
+ @classmethod
+ def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
+ """Ensure that the data can be properly loaded"""
+ blue_args = args.blueprint
+ if blue_args.get("data_csv", None) is not None:
+ csv_file = os.path.expanduser(blue_args.data_csv)
+ assert os.path.exists(
+ csv_file
+ ), f"Provided csv file {csv_file} doesn't exist"
+ elif blue_args.get("data_json", None) is not None:
+ json_file = os.path.expanduser(blue_args.data_json)
+ assert os.path.exists(
+ json_file
+ ), f"Provided JSON file {json_file} doesn't exist"
+ elif blue_args.get("data_jsonl", None) is not None:
+ jsonl_file = os.path.expanduser(blue_args.data_jsonl)
+ assert os.path.exists(
+ jsonl_file
+ ), f"Provided JSON-L file {jsonl_file} doesn't exist"
+ elif shared_state.static_task_data is not None:
+ assert (
+ len(shared_state.static_task_data) > 0
+ ), "Length of data dict provided was 0"
+ else:
+ raise AssertionError(
+ "Must provide one of a data csv, json, json-L, or a list of tasks"
+ )
+
+ def get_initialization_data(self) -> Iterable["InitializationData"]:
+ """
+ Return the InitializationData retrieved from the specified stream
+ """
+ return [
+ InitializationData(
+ shared=d, unit_data=[{}] * self.args.blueprint.units_per_assignment
+ )
+ for d in self._initialization_data_dicts
+ ]
+
+ def validate_onboarding(
+ self, worker: "Worker", onboarding_agent: "OnboardingAgent"
+ ) -> bool:
+ """
+ Check the incoming onboarding data and evaluate if the worker
+ has passed the qualification or not. Return True if the worker
+ has qualified.
+ """
+ data = onboarding_agent.state.get_data()
+ return self.shared_state.validate_onboarding(
+ data
+ ) # data["outputs"].get("success", True)
diff --git a/mephisto/abstractions/blueprints/abstract/static_task/static_task_runner.py b/mephisto/abstractions/blueprints/abstract/static_task/static_task_runner.py
new file mode 100644
index 000000000..cf8e827af
--- /dev/null
+++ b/mephisto/abstractions/blueprints/abstract/static_task/static_task_runner.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.blueprint import TaskRunner
+
+import os
+import time
+import threading
+
+from typing import ClassVar, List, Type, Any, Dict, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.data_model.assignment import Unit, InitializationData
+ from mephisto.data_model.agent import Agent, OnboardingAgent
+ from mephisto.abstractions.blueprint import SharedTaskState
+ from omegaconf import DictConfig
+
+
+SYSTEM_SENDER = "mephisto" # TODO(CLEAN) pull from somewhere
+
+
+class StaticTaskRunner(TaskRunner):
+ """
+ Task runner for a static task
+
+ Static tasks always assume single unit assignments,
+ as only one person can work on them at a time
+ """
+
+ def __init__(
+ self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
+ ):
+ super().__init__(task_run, args, shared_state)
+ self.is_concurrent = False
+ self.assignment_duration_in_seconds = (
+ task_run.get_task_config().assignment_duration_in_seconds
+ )
+
+ def get_init_data_for_agent(self, agent: "Agent") -> Dict[str, Any]:
+ """
+ Return the data for an agent already assigned to a particular unit
+ """
+ init_state = agent.state.get_init_state()
+ if init_state is not None:
+ # reconnecting agent, give what we've got
+ return init_state
+ else:
+ assignment = agent.get_unit().get_assignment()
+ assignment_data = self.get_data_for_assignment(assignment)
+ agent.state.set_init_state(assignment_data.shared)
+ return assignment_data.shared
+
+ def run_onboarding(self, agent: "OnboardingAgent"):
+ """
+ Static onboarding flows eaxactly like a regular task, waiting for
+ the submit to come through
+ """
+ agent_act = agent.act(timeout=self.assignment_duration_in_seconds)
+
+ def cleanup_onboarding(self, agent: "OnboardingAgent"):
+ """Nothing to clean up in a static onboarding"""
+ return
+
+ def run_unit(self, unit: "Unit", agent: "Agent") -> None:
+ """
+ Static runners will get the task data, send it to the user, then
+ wait for the agent to act (the data to be completed)
+ """
+ # Frontend implicitly asks for the initialization data, so we just need
+ # to wait for a response
+ agent_act = agent.act(timeout=self.assignment_duration_in_seconds)
+
+ def cleanup_unit(self, unit: "Unit") -> None:
+ """There is currently no cleanup associated with killing an incomplete task"""
+ return
diff --git a/mephisto/abstractions/blueprints/mock/__init__.py b/mephisto/abstractions/blueprints/mock/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/mephisto/abstractions/blueprints/mock/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/mephisto/abstractions/blueprints/mock/mock_agent_state.py b/mephisto/abstractions/blueprints/mock/mock_agent_state.py
new file mode 100644
index 000000000..a5c623a8a
--- /dev/null
+++ b/mephisto/abstractions/blueprints/mock/mock_agent_state.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional, Dict, Any, TYPE_CHECKING
+from mephisto.abstractions.blueprint import AgentState
+import os
+import json
+
+if TYPE_CHECKING:
+ from mephisto.data_model.agent import Agent
+ from mephisto.data_model.packet import Packet
+
+
+class MockAgentState(AgentState):
+ """
+ Mock agent state that is to be used for testing
+ """
+
+ def __init__(self, agent: "Agent"):
+ """Mock agent states keep everything in local memory"""
+ self.agent = agent
+ self.state: Dict[str, Any] = {}
+ self.init_state: Any = None
+
+ def set_init_state(self, data: Any) -> bool:
+ """Set the initial state for this agent"""
+ if self.init_state is not None:
+ # Initial state is already set
+ return False
+ else:
+ self.init_state = data
+ self.save_data()
+ return True
+
+ def get_init_state(self) -> Optional[Dict[str, Any]]:
+ """
+ Return the initial state for this agent,
+ None if no such state exists
+ """
+ return self.init_state
+
+ def load_data(self) -> None:
+ """Mock agent states have no data stored"""
+ pass
+
+ def get_data(self) -> Dict[str, Any]:
+ """Return dict of this agent's state"""
+ return self.state
+
+ def save_data(self) -> None:
+ """Mock agents don't save data (yet)"""
+ pass
+
+ def update_data(self, packet: "Packet") -> None:
+ """Put new data into this mock state"""
+ self.state = packet.data
diff --git a/mephisto/abstractions/blueprints/mock/mock_blueprint.py b/mephisto/abstractions/blueprints/mock/mock_blueprint.py
new file mode 100644
index 000000000..e8f5c824d
--- /dev/null
+++ b/mephisto/abstractions/blueprints/mock/mock_blueprint.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.blueprint import (
+ Blueprint,
+ OnboardingRequired,
+ BlueprintArgs,
+ SharedTaskState,
+)
+from dataclasses import dataclass, field
+from omegaconf import MISSING, DictConfig
+from mephisto.data_model.assignment import InitializationData
+from mephisto.abstractions.blueprints.mock.mock_agent_state import MockAgentState
+from mephisto.abstractions.blueprints.mock.mock_task_runner import MockTaskRunner
+from mephisto.abstractions.blueprints.mock.mock_task_builder import MockTaskBuilder
+from mephisto.operations.registry import register_mephisto_abstraction
+
+import os
+import time
+
+from typing import ClassVar, List, Type, Any, Dict, Iterable, TYPE_CHECKING, Optional
+
+if TYPE_CHECKING:
+ from mephsito.data_model.agent import OnboardingAgent
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.blueprint import AgentState, TaskRunner, TaskBuilder
+ from mephisto.data_model.assignment import Assignment
+ from mephisto.data_model.worker import Worker
+ from argparse import _ArgumentGroup as ArgumentGroup
+
+BLUEPRINT_TYPE = "mock"
+
+
+@dataclass
+class MockBlueprintArgs(BlueprintArgs):
+ _blueprint_type: str = BLUEPRINT_TYPE
+ num_assignments: int = field(
+ default=MISSING,
+ metadata={
+ "help": "How many workers you want to do each assignment",
+ "required": True,
+ },
+ )
+ use_onboarding: bool = field(
+ default=False, metadata={"help": "Whether onboarding should be required"}
+ )
+ timeout_time: int = field(
+ default=0,
+ metadata={"help": "Whether acts in the run assignment should have a timeout"},
+ )
+ is_concurrent: bool = field(
+ default=True,
+ metadata={"help": "Whether to run this mock task as a concurrent task or not"},
+ )
+
+
+@register_mephisto_abstraction()
+class MockBlueprint(Blueprint, OnboardingRequired):
+ """Mock of a task type, for use in testing"""
+
+ AgentStateClass: ClassVar[Type["AgentState"]] = MockAgentState
+ OnboardingAgentStateClass: ClassVar[Type["AgentState"]] = MockAgentState
+ TaskBuilderClass: ClassVar[Type["TaskBuilder"]] = MockTaskBuilder
+ TaskRunnerClass: ClassVar[Type["TaskRunner"]] = MockTaskRunner
+ ArgsClass: ClassVar[Type["BlueprintArgs"]] = MockBlueprintArgs
+ supported_architects: ClassVar[List[str]] = ["mock"]
+ BLUEPRINT_TYPE = BLUEPRINT_TYPE
+
+ def __init__(
+ self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
+ ):
+ super().__init__(task_run, args, shared_state)
+ self.init_onboarding_config(task_run, args, shared_state)
+
+ def get_initialization_data(self) -> Iterable[InitializationData]:
+ """
+ Return the number of empty assignments specified in --num-assignments
+ """
+ return [
+ MockTaskRunner.get_mock_assignment_data()
+ for i in range(self.args.blueprint.num_assignments)
+ ]
+
+ def validate_onboarding(
+ self, worker: "Worker", onboarding_agent: "OnboardingAgent"
+ ) -> bool:
+ """
+ Onboarding validation for MockBlueprints just returns the 'should_pass' field
+ """
+ return onboarding_agent.state.get_data()["should_pass"]
diff --git a/mephisto/abstractions/blueprints/mock/mock_task_builder.py b/mephisto/abstractions/blueprints/mock/mock_task_builder.py
new file mode 100644
index 000000000..196f6dbb8
--- /dev/null
+++ b/mephisto/abstractions/blueprints/mock/mock_task_builder.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.blueprint import TaskBuilder
+
+import os
+import time
+
+from typing import ClassVar, List, Type, Any, Dict, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.data_model.assignment import Assignment
+ from argparse import _ArgumentGroup as ArgumentGroup
+
+
+class MockTaskBuilder(TaskBuilder):
+ """Builder for a mock task, for use in testing"""
+
+ BUILT_FILE = "done.built"
+ BUILT_MESSAGE = "built!"
+
+ def build_in_dir(self, build_dir: str):
+ """Mock tasks don't really build anything (yet)"""
+ with open(os.path.join(build_dir, self.BUILT_FILE), "w+") as built_file:
+ built_file.write(self.BUILT_MESSAGE)
diff --git a/mephisto/abstractions/blueprints/mock/mock_task_runner.py b/mephisto/abstractions/blueprints/mock/mock_task_runner.py
new file mode 100644
index 000000000..0c0c90f53
--- /dev/null
+++ b/mephisto/abstractions/blueprints/mock/mock_task_runner.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.blueprint import TaskRunner, SharedTaskState
+from mephisto.data_model.assignment import InitializationData
+
+import os
+import time
+
+from typing import ClassVar, List, Type, Any, Dict, Union, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.data_model.assignment import Assignment, Unit
+ from mephisto.data_model.agent import Agent, OnboardingAgent
+ from argparse import _ArgumentGroup as ArgumentGroup
+ from omegaconf import DictConfig
+
+
+class MockTaskRunner(TaskRunner):
+ """Mock of a task runner, for use in testing"""
+
+ def __init__(
+ self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
+ ):
+ super().__init__(task_run, args, shared_state)
+ self.timeout = args.blueprint.timeout_time
+ self.tracked_tasks: Dict[str, Union["Assignment", "Unit"]] = {}
+ self.is_concurrent = args.blueprint.get("is_concurrent", True)
+ print(f"Blueprint is concurrent: {self.is_concurrent}, {args}")
+
+ @staticmethod
+ def get_mock_assignment_data() -> InitializationData:
+ return InitializationData(shared={}, unit_data=[{}, {}])
+
+ @staticmethod
+ def get_data_for_assignment(assignment: "Assignment") -> InitializationData:
+ """
+ Mock tasks have no data unless given during testing
+ """
+ return MockTaskRunner.get_mock_assignment_data()
+
+ def get_init_data_for_agent(self, agent: "Agent") -> Dict[str, Any]:
+ """
+ Return the data for an agent already assigned to a particular unit
+ """
+ # TODO(#97) implement
+ pass
+
+ def run_onboarding(self, onboarding_agent: "OnboardingAgent"):
+ """
+ Mock runners simply wait for an act to come in with whether
+ or not onboarding is complete
+ """
+ packet = onboarding_agent.act(timeout=self.timeout)
+ onboarding_agent.did_submit.set()
+ onboarding_agent.mark_done()
+
+ def run_unit(self, unit: "Unit", agent: "Agent"):
+ """
+ Mock runners will pass the agents for the given assignment
+ all of the required messages to finish a task.
+ """
+ self.tracked_tasks[unit.db_id] = unit
+ time.sleep(0.3)
+ assigned_agent = unit.get_assigned_agent()
+ assert assigned_agent is not None, "No agent was assigned"
+ assert (
+ assigned_agent.db_id == agent.db_id
+ ), "Task was not given to assigned agent"
+ packet = agent.act(timeout=self.timeout)
+ if packet is not None:
+ agent.observe(packet)
+ agent.did_submit.set()
+ agent.mark_done()
+ del self.tracked_tasks[unit.db_id]
+
+ def run_assignment(self, assignment: "Assignment", agents: List["Agent"]):
+ """
+ Mock runners will pass the agents for the given assignment
+ all of the required messages to finish a task.
+ """
+ self.tracked_tasks[assignment.db_id] = assignment
+ agent_dict = {a.db_id: a for a in agents}
+ time.sleep(0.3)
+ for unit in assignment.get_units():
+ assigned_agent = unit.get_assigned_agent()
+ assert assigned_agent is not None, "Task was not fully assigned"
+ agent = agent_dict.get(assigned_agent.db_id)
+ assert agent is not None, "Task was not launched with assigned agents"
+ packet = agent.act(timeout=self.timeout)
+ if packet is not None:
+ agent.observe(packet)
+ agent.did_submit.set()
+ agent.mark_done()
+ del self.tracked_tasks[assignment.db_id]
+
+ def cleanup_assignment(self, assignment: "Assignment"):
+ """No cleanup required yet for ending mock runs"""
+ pass
+
+ def cleanup_unit(self, unit: "Unit"):
+ """No cleanup required yet for ending mock runs"""
+ pass
+
+ def cleanup_onboarding(self, onboarding_agent: "OnboardingAgent"):
+ """No cleanup required yet for ending onboarding in mocks"""
+ pass
diff --git a/mephisto/abstractions/blueprints/parlai_chat/__init__.py b/mephisto/abstractions/blueprints/parlai_chat/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/mephisto/abstractions/blueprints/parlai_chat/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/mephisto/abstractions/blueprints/parlai_chat/parlai_chat_agent_state.py b/mephisto/abstractions/blueprints/parlai_chat/parlai_chat_agent_state.py
new file mode 100644
index 000000000..031cd7209
--- /dev/null
+++ b/mephisto/abstractions/blueprints/parlai_chat/parlai_chat_agent_state.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional, Dict, Any, Tuple, TYPE_CHECKING
+from mephisto.abstractions.blueprint import AgentState
+from mephisto.data_model.packet import (
+ PACKET_TYPE_AGENT_ACTION,
+ PACKET_TYPE_UPDATE_AGENT_STATUS,
+)
+import os
+import json
+import time
+
+if TYPE_CHECKING:
+ from mephisto.data_model.agent import Agent
+ from mephisto.data_model.packet import Packet
+
+
+class ParlAIChatAgentState(AgentState):
+ """
+ Holds information about ParlAI-style chat. Data is stored in json files
+ containing every act from the ParlAI world.
+ """
+
+ def __init__(self, agent: "Agent"):
+ """
+ Create an AgentState to track the state of an agent's work on a Unit
+
+ Initialize with an existing file if it exists.
+ """
+ self.agent = agent
+ data_file = self._get_expected_data_file()
+ if os.path.exists(data_file):
+ self.load_data()
+ else:
+ self.messages: List[Dict[str, Any]] = []
+ self.init_data = None
+ self.save_data()
+
+ def set_init_state(self, data: Any) -> bool:
+ """Set the initial state for this agent"""
+ if self.init_data is not None:
+ # Initial state is already set
+ return False
+ else:
+ self.init_data = data
+ self.save_data()
+ return True
+
+ def get_init_state(self) -> Optional[Dict[str, Any]]:
+ """
+ Return the initial state for this agent,
+ None if no such state exists
+ """
+ if self.init_data is None:
+ return None
+ return {"task_data": self.init_data, "raw_messages": self.messages}
+
+ def _get_expected_data_file(self) -> str:
+ """Return the place we would expect to find data for this agent state"""
+ agent_dir = self.agent.get_data_dir()
+ os.makedirs(agent_dir, exist_ok=True)
+ return os.path.join(agent_dir, "state.json")
+
+ def load_data(self) -> None:
+ """Load stored data from a file to this object"""
+ agent_file = self._get_expected_data_file()
+ with open(agent_file, "r") as state_json:
+ state = json.load(state_json)
+ self.messages = state["outputs"]["messages"]
+ self.init_data = state["inputs"]
+
+ def get_data(self) -> Dict[str, Any]:
+ """Return dict with the messages of this agent"""
+ return {"outputs": {"messages": self.messages}, "inputs": self.init_data}
+
+ def get_parsed_data(self) -> Dict[str, Any]:
+ """Return the formatted input, conversations, and final data"""
+ init_data = self.init_data
+ save_data = None
+ messages = [
+ m["data"]
+ for m in self.messages
+ if m["packet_type"] == PACKET_TYPE_AGENT_ACTION
+ ]
+ agent_name = None
+ if len(messages) > 0:
+ for m in self.messages:
+ if m["packet_type"] == PACKET_TYPE_UPDATE_AGENT_STATUS:
+ if "agent_display_name" in m["data"]["state"]:
+ agent_name = m["data"]["state"]["agent_display_name"]
+ break
+ if "MEPHISTO_is_submit" in messages[-1]:
+ messages = messages[:-1]
+ if "WORLD_DATA" in messages[-1]:
+ save_data = messages[-1]["WORLD_DATA"]
+ messages = messages[:-1]
+ return {
+ "agent_name": agent_name,
+ "initial_data": init_data,
+ "messages": messages,
+ "save_data": save_data,
+ }
+
+ def save_data(self) -> None:
+ """Save all messages from this agent to """
+ agent_file = self._get_expected_data_file()
+ with open(agent_file, "w+") as state_json:
+ json.dump(self.get_data(), state_json)
+
+ def update_data(self, packet: "Packet") -> None:
+ """
+ Append the incoming packet as well as who it came from
+ """
+ message_data = packet.to_sendable_dict()
+ message_data["timestamp"] = time.time()
+ self.messages.append(message_data)
+ self.save_data()
diff --git a/mephisto/abstractions/blueprints/parlai_chat/parlai_chat_blueprint.py b/mephisto/abstractions/blueprints/parlai_chat/parlai_chat_blueprint.py
new file mode 100644
index 000000000..8421dafd0
--- /dev/null
+++ b/mephisto/abstractions/blueprints/parlai_chat/parlai_chat_blueprint.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.blueprint import (
+ Blueprint,
+ OnboardingRequired,
+ BlueprintArgs,
+ SharedTaskState,
+)
+from dataclasses import dataclass, field
+from mephisto.data_model.assignment import InitializationData
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_agent_state import (
+ ParlAIChatAgentState,
+)
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_task_runner import (
+ ParlAIChatTaskRunner,
+)
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_task_builder import (
+ ParlAIChatTaskBuilder,
+)
+from mephisto.operations.registry import register_mephisto_abstraction
+from omegaconf import DictConfig, MISSING
+
+import os
+import time
+import csv
+import sys
+
+from importlib import import_module
+
+from typing import ClassVar, List, Type, Any, Dict, Iterable, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.worker import Worker
+ from mephisto.data_model.agent import Agent, OnboardingAgent
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.blueprint import AgentState, TaskRunner, TaskBuilder
+ from mephisto.data_model.assignment import Assignment
+ from argparse import _ArgumentGroup as ArgumentGroup
+
+BLUEPRINT_TYPE = "parlai_chat"
+
+
+MISSING_SOMETHING_TEXT = (
+ "
"
+ "You didn't specify a task_description_file and also didn't override the "
+ "frontend `TaskPreviewView` (if this is a preview) or the `TaskDescription` "
+ "component (if this is in-task)."
+ "
"
+ );
+
+ React.useEffect(() => {
+ requestTaskHMTL(url).then((data) => setRetrievedHtml(data));
+ }, []);
+
+ return ;
+}
+
+function HtmlRenderer({ html, data }) {
+ const elRef = React.useRef();
+
+ function handleUpdatingRemainingScripts(curr_counter, scripts_left) {
+ if (scripts_left.length == 0) {
+ return;
+ }
+ let script_to_load = scripts_left.shift();
+ if (script_to_load.text == "") {
+ var head = document.getElementsByTagName("head")[0];
+ var script = document.createElement("script");
+ script.onload = () => {
+ handleUpdatingRemainingScripts(curr_counter + 1, scripts_left);
+ };
+ script.async = 1;
+ script.src = script_to_load.src;
+ head.appendChild(script);
+ } else {
+ const script_text = script_to_load.text;
+ // This magic lets us evaluate a script from the global context
+ (1, eval)(script_text);
+ handleUpdatingRemainingScripts(curr_counter + 1, scripts_left);
+ }
+ }
+
+ function interpolateHtml(html, dataObj = null) {
+ let base_html = html;
+ let fin_html = base_html;
+
+ if (dataObj !== null) {
+ for (let [key, value] of Object.entries(dataObj)) {
+ let find_string = "${" + key + "}";
+ // Could be better done with a regex for performant code
+ fin_html = fin_html.split(find_string).join(value);
+ }
+ }
+
+ return fin_html;
+ }
+
+ React.useEffect(() => {
+ let children = elRef.current.children;
+ let scripts_to_load = [];
+ for (let child of children) {
+ if (child.tagName == "SCRIPT") {
+ scripts_to_load.push(child);
+ }
+ }
+ if (scripts_to_load.length > 0) {
+ handleUpdatingRemainingScripts(0, scripts_to_load);
+ }
+ }, [elRef.current]);
+
+ return (
+
+ );
+}
+
+ReactDOM.render(, document.getElementById("app"));
diff --git a/mephisto/abstractions/blueprints/static_html_task/source/dev/main.js b/mephisto/abstractions/blueprints/static_html_task/source/dev/main.js
new file mode 100644
index 000000000..8afaf4641
--- /dev/null
+++ b/mephisto/abstractions/blueprints/static_html_task/source/dev/main.js
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+import "./app.jsx";
diff --git a/mephisto/abstractions/blueprints/static_react_task/__init__.py b/mephisto/abstractions/blueprints/static_react_task/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/mephisto/abstractions/blueprints/static_react_task/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/mephisto/abstractions/blueprints/static_react_task/static_react_blueprint.py b/mephisto/abstractions/blueprints/static_react_task/static_react_blueprint.py
new file mode 100644
index 000000000..cd139d327
--- /dev/null
+++ b/mephisto/abstractions/blueprints/static_react_task/static_react_blueprint.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.data_model.assignment import InitializationData
+from dataclasses import dataclass, field
+from omegaconf import MISSING
+from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import (
+ StaticBlueprint,
+ StaticBlueprintArgs,
+)
+from mephisto.abstractions.blueprints.static_react_task.static_react_task_builder import (
+ StaticReactTaskBuilder,
+)
+from mephisto.operations.registry import register_mephisto_abstraction
+
+import os
+import time
+import csv
+
+from typing import ClassVar, List, Type, Any, Dict, Iterable, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.blueprint import AgentState, TaskRunner, TaskBuilder
+ from mephisto.data_model.assignment import Assignment
+ from argparse import _ArgumentGroup as ArgumentGroup
+
+BLUEPRINT_TYPE = "static_react_task"
+
+
+@dataclass
+class StaticReactBlueprintArgs(StaticBlueprintArgs):
+ """
+ StaticReactBlueprint: Tasks launched from static blueprints need
+ a prebuilt javascript bundle containing the task. We suggest building
+ with our provided useMephistoTask hook.
+ """
+
+ _blueprint_type: str = BLUEPRINT_TYPE
+ _group: str = field(
+ default="StaticReactBlueprint",
+ metadata={
+ "help": """
+ Tasks launched from static blueprints need
+ a prebuilt javascript bundle containing the task. We suggest building
+ with our provided useMephistoTask hook.
+ """
+ },
+ )
+ task_source: str = field(
+ default=MISSING,
+ metadata={
+ "help": "Path to file containing javascript bundle for the task",
+ "required": True,
+ },
+ )
+
+
+@register_mephisto_abstraction()
+class StaticReactBlueprint(StaticBlueprint):
+ """Blueprint for a task that runs off of a built react javascript bundle"""
+
+ TaskBuilderClass: ClassVar[Type["TaskBuilder"]] = StaticReactTaskBuilder
+ ArgsClass = StaticReactBlueprintArgs
+ BLUEPRINT_TYPE = BLUEPRINT_TYPE
+
+ def __init__(
+ self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
+ ):
+ super().__init__(task_run, args, shared_state)
+ self.js_bundle = os.path.expanduser(args.blueprint.task_source)
+ if not os.path.exists(self.js_bundle):
+ raise FileNotFoundError(
+ f"Specified bundle file {self.js_bundle} was not found from {os.getcwd()}"
+ )
+
+ @classmethod
+ def assert_task_args(
+ cls, args: "DictConfig", shared_state: "SharedTaskState"
+ ) -> None:
+ """Ensure that static requirements are fulfilled, and source file exists"""
+ super().assert_task_args(args, shared_state)
+
+ found_task_source = args.blueprint.task_source
+ assert (
+ found_task_source is not None
+ ), "Must provide a path to a javascript bundle in `task_source`"
+ found_task_path = os.path.expanduser(found_task_source)
+ assert os.path.exists(
+ found_task_path
+ ), f"Provided task source {found_task_path} does not exist."
diff --git a/mephisto/abstractions/blueprints/static_react_task/static_react_task_builder.py b/mephisto/abstractions/blueprints/static_react_task/static_react_task_builder.py
new file mode 100644
index 000000000..16080d2f4
--- /dev/null
+++ b/mephisto/abstractions/blueprints/static_react_task/static_react_task_builder.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.blueprint import TaskBuilder
+
+from distutils.dir_util import copy_tree
+import os
+import shutil
+
+
+class StaticReactTaskBuilder(TaskBuilder):
+ """
+ Builder for a static task, puts required files into
+ the server directory for deployment.
+ """
+
+ BUILT_FILE = "done.built"
+ BUILT_MESSAGE = "built!"
+
+ def build_in_dir(self, build_dir: str):
+ """Build the frontend if it doesn't exist, then copy into the server directory"""
+ target_resource_dir = os.path.join(build_dir, "static")
+
+ # If any additional task files are required via a source_dir, copy those as well
+ extra_dir_path = self.args.blueprint.get("extra_source_dir", None)
+ if extra_dir_path is not None:
+ extra_dir_path = os.path.expanduser(extra_dir_path)
+ copy_tree(extra_dir_path, target_resource_dir)
+
+ # Copy the built core and the given task file to the target path
+ use_bundle = os.path.expanduser(self.args.blueprint.task_source)
+ target_path = os.path.join(target_resource_dir, "bundle.js")
+ shutil.copy2(use_bundle, target_path)
+
+ # Write a built file confirmation
+ with open(os.path.join(build_dir, self.BUILT_FILE), "w+") as built_file:
+ built_file.write(self.BUILT_MESSAGE)
diff --git a/mephisto/abstractions/channel.py b/mephisto/abstractions/channel.py
new file mode 100644
index 000000000..e9bd52bc1
--- /dev/null
+++ b/mephisto/abstractions/channel.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from abc import ABC, abstractmethod
+
+from typing import Callable
+from mephisto.data_model.packet import Packet
+
+STATUS_CHECK_TIME = 4
+
+
+class Channel(ABC):
+ """
+ Manages the API between the Supervisor and the server that is produced
+ by the architect.
+
+ Should be able to be configured by an architect, and used to communicate
+ with that server based on the queries that a Supervisor needs to run a job
+ """
+
+ def __init__(
+ self,
+ channel_id: str,
+ on_channel_open: Callable[[str], None],
+ on_catastrophic_disconnect: Callable[[str], None],
+ on_message: Callable[[str, Packet], None],
+ ):
+ """
+ Create a channel by the given id, and initialize any resources that
+ will later be required during the `open` call.
+
+ Children classes will likely need to accept additional parameters
+
+ on_channel_open should be called when the channel is first alive.
+ It takes the channel id as the only argument.
+ on_catastrophic_disconnect should only be called if the channel
+ is entirely unable to connect to the server and any ongoing
+ jobs should be killed.
+ It takes the channel id as the only argument.
+ on_message should be called whenever this channel receives a message
+ from the server.
+ It takes the channel id as the first argument and the received
+ packet as the second argument.
+ """
+ self.channel_id = channel_id
+ self.on_channel_open = on_channel_open
+ self.on_catastrophic_disconnect = on_catastrophic_disconnect
+ self.on_message = on_message
+
+ @abstractmethod
+ def is_closed(self):
+ """
+ Return whether or not this connection has been explicitly closed
+ by the supervisor or another source.
+ """
+
+ @abstractmethod
+ def close(self):
+ """
+ Close this channel, and ensure that all threads and surrounding
+ resources are cleaned up
+ """
+
+ @abstractmethod
+ def is_alive(self):
+ """
+ Return if this channel is actively able to send/recieve messages.
+ Should be False until a connection has been established with the
+ server.
+ """
+
+ @abstractmethod
+ def open(self):
+ """
+ Do whatever is necessary to 'connect' this socket to the server
+ """
+
+ @abstractmethod
+ def send(self, packet: "Packet") -> bool:
+ """
+ Send the packet given to the intended recipient.
+ Return True on success and False on failure.
+ """
diff --git a/mephisto/abstractions/crowd_provider.py b/mephisto/abstractions/crowd_provider.py
new file mode 100644
index 000000000..30d920846
--- /dev/null
+++ b/mephisto/abstractions/crowd_provider.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from abc import ABC, abstractmethod, abstractproperty
+from dataclasses import dataclass, field
+from omegaconf import MISSING, DictConfig
+from mephisto.abstractions.blueprint import AgentState, SharedTaskState
+from mephisto.data_model.assignment import Unit
+from mephisto.data_model.requester import Requester
+from mephisto.data_model.worker import Worker
+from mephisto.data_model.agent import Agent
+
+from typing import List, Optional, Tuple, Dict, Any, ClassVar, Type, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.task_run import TaskRun
+ from argparse import _ArgumentGroup as ArgumentGroup
+
+
+@dataclass
+class ProviderArgs:
+ """Base class for arguments to configure Crowd Providers"""
+
+ _provider_type: str = MISSING
+ requester_name: str = MISSING
+
+
+class CrowdProvider(ABC):
+ """
+ Base class that defines the required functionality for
+ the mephisto system to be able to interface with an
+ external crowdsourcing vendor.
+
+ Implementing the methods within, as well as supplying
+ wrapped Unit, Requester, Worker, and Agent classes
+ should ensure support for a vendor.
+ """
+
+ PROVIDER_TYPE = "__PROVIDER_BASE_CLASS__"
+
+ UnitClass: ClassVar[Type[Unit]] = Unit
+
+ RequesterClass: ClassVar[Type[Requester]] = Requester
+
+ WorkerClass: ClassVar[Type[Worker]] = Worker
+
+ AgentClass: ClassVar[Type[Agent]] = Agent
+
+ ArgsClass: ClassVar[Type[ProviderArgs]] = ProviderArgs
+
+ SUPPORTED_TASK_TYPES: ClassVar[List[str]]
+
+ def __init__(self, db: "MephistoDB"):
+ """
+ Crowd provider classes should keep as much of their state
+ as possible in their non-python datastore. This way
+ the system can work even after shutdowns, and the
+ state of the system can be managed or observed from
+ other processes.
+
+ In order to set up a datastore, init should check to see
+ if one is already set (using get_datastore_for_provider)
+ and use that one if available, otherwise make a new one
+ and register it with the database.
+ """
+ self.db = db
+ if db.has_datastore_for_provider(self.PROVIDER_TYPE):
+ self.datastore = db.get_datastore_for_provider(self.PROVIDER_TYPE)
+ else:
+ self.datastore_root = db.get_db_path_for_provider(self.PROVIDER_TYPE)
+ self.datastore = self.initialize_provider_datastore(self.datastore_root)
+ db.set_datastore_for_provider(self.PROVIDER_TYPE, self.datastore)
+
+ @classmethod
+ def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
+ """
+ Assert that the provided arguments are valid. Should
+ fail if a task launched with these arguments would
+ not work
+ """
+ return
+
+ @classmethod
+ @abstractmethod
+ def get_wrapper_js_path(cls):
+ """
+ Return the path to the `wrap_crowd_source.js` file for this
+ provider to be deployed to the server
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def initialize_provider_datastore(self, storage_path: str) -> Any:
+ """
+ Do whatever is required to initialize this provider insofar
+ as setting up local or external state is required to ensure
+ that this vendor is usable.
+
+ Local data storage should be put into the given root path.
+
+ This method should return the local data storage component that
+ is required to do any object initialization, as it will be available
+ from the MephistoDB in a db.get_provider_datastore(PROVIDER_TYPE).
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def setup_resources_for_task_run(
+ self,
+ task_run: "TaskRun",
+ args: DictConfig,
+ shared_state: "SharedTaskState",
+ server_url: str,
+ ) -> None:
+ """
+ Setup any required resources for managing any additional resources
+ surrounding a specific task run.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def cleanup_resources_from_task_run(
+ self, task_run: "TaskRun", server_url: str
+ ) -> None:
+ """
+ Destroy any resources set up specifically for this task run
+ """
+ raise NotImplementedError()
+
+ def cleanup_qualification(self, qualification_name: str) -> None:
+ """
+ Remove the linked qualification from the crowdprovider if it exists
+ """
+ return None
diff --git a/mephisto/abstractions/database.py b/mephisto/abstractions/database.py
new file mode 100644
index 000000000..3433f10b7
--- /dev/null
+++ b/mephisto/abstractions/database.py
@@ -0,0 +1,582 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import os
+import sqlite3
+
+from abc import ABC, abstractmethod
+from mephisto.operations.utils import get_data_dir
+from mephisto.operations.registry import (
+ get_crowd_provider_from_type,
+ get_valid_provider_types,
+)
+from typing import Mapping, Optional, Any, List
+from mephisto.data_model.agent import Agent, OnboardingAgent
+from mephisto.data_model.assignment import Assignment, Unit
+from mephisto.data_model.project import Project
+from mephisto.data_model.requester import Requester
+from mephisto.data_model.task import Task
+from mephisto.data_model.task_run import TaskRun
+from mephisto.data_model.worker import Worker
+from mephisto.data_model.qualification import Qualification, GrantedQualification
+
+# TODO(#101) investigate rate limiting against the db by caching locally where appropriate across the data model?
+# TODO(#101) investigate cursors for DB queries as the project scales
+
+
+class MephistoDBException(Exception):
+ pass
+
+
+class EntryAlreadyExistsException(MephistoDBException):
+ pass
+
+
+class EntryDoesNotExistException(MephistoDBException):
+ pass
+
+
+class MephistoDB(ABC):
+ """
+ Provides the interface for all queries that are necessary for the Mephisto
+ architecture to run as expected. All other databases should implement
+ these methods to be used as the database that backs Mephisto.
+
+ By default, we use a LocalMesphistoDB located at `mephisto/data/database.db`
+ """
+
+ def __init__(self, database_path=None):
+ """Ensure the database is set up and ready to handle data"""
+ if database_path is None:
+ database_path = os.path.join(get_data_dir(), "database.db")
+ self.db_path = database_path
+ self.db_root = os.path.dirname(self.db_path)
+ self.init_tables()
+ self.__provider_datastores: Dict[str, Any] = {}
+
+ def get_db_path_for_provider(self, provider_type) -> str:
+ """Get the path to store data for a specific provider in"""
+ database_root = os.path.dirname(self.db_path)
+ provider_root = os.path.join(database_root, provider_type)
+ os.makedirs(provider_root, exist_ok=True)
+ return provider_root
+
+ def has_datastore_for_provider(self, provider_type: str) -> bool:
+ """Determine if a datastore has been registered for the given provider"""
+ return provider_type in self.__provider_datastores
+
+ def get_datastore_for_provider(self, provider_type: str) -> Any:
+ """Get the provider datastore registered with this db"""
+ if provider_type not in self.__provider_datastores:
+ # Register this provider for usage now
+ ProviderClass = get_crowd_provider_from_type(provider_type)
+ provider = ProviderClass(self)
+ return self.__provider_datastores.get(provider_type)
+
+ def set_datastore_for_provider(self, provider_type: str, datastore: Any) -> None:
+ """Set the provider datastore registered with this db"""
+ self.__provider_datastores[provider_type] = datastore
+
+ def delete_qualification(self, qualification_name: str) -> None:
+ """
+ Remove this qualification from all workers that have it, then delete the qualification
+ """
+ self._delete_qualification(qualification_name)
+ for crowd_provider_name in get_valid_provider_types():
+ ProviderClass = get_crowd_provider_from_type(crowd_provider_name)
+ provider = ProviderClass(self)
+ provider.cleanup_qualification(qualification_name)
+
+ @abstractmethod
+ def shutdown(self) -> None:
+ """Do whatever is required to shut this server off"""
+ raise NotImplementedError()
+
+ @abstractmethod
+ def init_tables(self) -> None:
+ """
+ Initialize any tables that may be required to run this database. If this is an expensive
+ operation, check to see if they already exist before trying to initialize
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def new_project(self, project_name: str) -> str:
+ """
+ Create a new project with the given project name. Raise EntryAlreadyExistsException if a project
+ with this name has already been created.
+
+ Project names are permanent, as changing directories later is painful.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_project(self, project_id: str) -> Mapping[str, Any]:
+ """
+ Return project's fields by the given project_id, raise EntryDoesNotExistException if no id exists
+ in projects
+
+ See Project for the expected returned mapping's fields
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def find_projects(self, project_name: Optional[str] = None) -> List[Project]:
+ """
+ Try to find any project that matches the above. When called with no arguments,
+ return all projects.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def new_task(
+ self,
+ task_name: str,
+ task_type: str,
+ project_id: Optional[str] = None,
+ parent_task_id: Optional[str] = None,
+ ) -> str:
+ """
+ Create a new task with the given task name. Raise EntryAlreadyExistsException if a task
+ with this name has already been created.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_task(self, task_id: str) -> Mapping[str, Any]:
+ """
+ Return task's fields by task_id, raise EntryDoesNotExistException if no id exists
+ in tasks
+
+ See Task for the expected fields for the returned mapping
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def find_tasks(
+ self,
+ task_name: Optional[str] = None,
+ project_id: Optional[str] = None,
+ parent_task_id: Optional[str] = None,
+ ) -> List[Task]:
+ """
+ Try to find any task that matches the above. When called with no arguments,
+ return all tasks.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def update_task(
+ self,
+ task_id: str,
+ task_name: Optional[str] = None,
+ project_id: Optional[str] = None,
+ ) -> None:
+ """
+ Update the given task with the given parameters if possible, raise appropriate exception otherwise.
+
+ Should only be runable if no runs have been created for this task
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def new_task_run(
+ self,
+ task_id: str,
+ requester_id: str,
+ init_params: str,
+ provider_type: str,
+ task_type: str,
+ sandbox: bool = True,
+ ) -> str:
+ """
+ Create a new task_run for the given task.
+
+ Once a run is created, it should no longer be altered. The assignments and
+ subassignments depend on the data set up within, as the launched task
+ cannot be replaced and the requester can not be swapped mid-run.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_task_run(self, task_run_id: str) -> Mapping[str, Any]:
+ """
+ Return the given task_run's fields by task_run_id, raise EntryDoesNotExistException if no id exists
+ in task_runs.
+
+ See TaskRun for the expected fields to populate in the returned mapping
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def find_task_runs(
+ self,
+ task_id: Optional[str] = None,
+ requester_id: Optional[str] = None,
+ is_completed: Optional[bool] = None,
+ ) -> List[TaskRun]:
+ """
+ Try to find any task_run that matches the above. When called with no arguments,
+ return all task_runs.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def update_task_run(self, task_run_id: str, is_completed: bool):
+ """
+ Update a task run. At the moment, can only update completion status
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def new_assignment(
+ self,
+ task_id: str,
+ task_run_id: str,
+ requester_id: str,
+ task_type: str,
+ provider_type: str,
+ sandbox: bool = True,
+ ) -> str:
+ """
+ Create a new assignment for the given task
+
+ Assignments should not be edited or altered once created
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_assignment(self, assignment_id: str) -> Mapping[str, Any]:
+ """
+ Return assignment's fields by assignment_id, raise EntryDoesNotExistException if
+ no id exists in tasks
+
+ See Assignment for the expected fields for the returned mapping
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def find_assignments(
+ self,
+ task_run_id: Optional[str] = None,
+ task_id: Optional[str] = None,
+ requester_id: Optional[str] = None,
+ task_type: Optional[str] = None,
+ provider_type: Optional[str] = None,
+ sandbox: Optional[bool] = None,
+ ) -> List[Assignment]:
+ """
+ Try to find any task that matches the above. When called with no arguments,
+ return all tasks.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def new_unit(
+ self,
+ task_id: str,
+ task_run_id: str,
+ requester_id: str,
+ assignment_id: str,
+ unit_index: int,
+ pay_amount: float,
+ provider_type: str,
+ task_type: str,
+ sandbox: bool = True,
+ ) -> str:
+ """
+ Create a new unit with the given index. Raises EntryAlreadyExistsException
+ if there is already a unit for the given assignment with the given index.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_unit(self, unit_id: str) -> Mapping[str, Any]:
+ """
+ Return unit's fields by unit_id, raise EntryDoesNotExistException
+ if no id exists in units
+
+ See unit for the expected fields for the returned mapping
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def find_units(
+ self,
+ task_id: Optional[str] = None,
+ task_run_id: Optional[str] = None,
+ requester_id: Optional[str] = None,
+ assignment_id: Optional[str] = None,
+ unit_index: Optional[int] = None,
+ provider_type: Optional[str] = None,
+ task_type: Optional[str] = None,
+ agent_id: Optional[str] = None,
+ worker_id: Optional[str] = None,
+ sandbox: Optional[bool] = None,
+ status: Optional[str] = None,
+ ) -> List[Unit]:
+ """
+ Try to find any unit that matches the above. When called with no arguments,
+ return all units.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def clear_unit_agent_assignment(self, unit_id: str) -> None:
+ """
+ Update the given unit by removing the agent that is assigned to it, thus updating
+ the status to assignable.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def update_unit(
+ self, unit_id: str, agent_id: Optional[str] = None, status: Optional[str] = None
+ ) -> None:
+ """
+ Update the given task with the given parameters if possible, raise appropriate exception otherwise.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def new_requester(self, requester_name: str, provider_type: str) -> str:
+ """
+ Create a new requester with the given name and provider type.
+ Raises EntryAlreadyExistsException
+ if there is already a requester with this name
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_requester(self, requester_id: str) -> Mapping[str, Any]:
+ """
+ Return requester's fields by requester_id, raise EntryDoesNotExistException
+ if no id exists in requesters
+
+ See requester for the expected fields for the returned mapping
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def find_requesters(
+ self, requester_name: Optional[str] = None, provider_type: Optional[str] = None
+ ) -> List[Requester]:
+ """
+ Try to find any requester that matches the above. When called with no arguments,
+ return all requesters.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def new_worker(self, worker_name: str, provider_type: str) -> str:
+ """
+ Create a new worker with the given name and provider type.
+ Raises EntryAlreadyExistsException
+ if there is already a worker with this name
+
+ worker_name should be the unique identifier by which the crowd provider
+ is using to keep track of this worker
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_worker(self, worker_id: str) -> Mapping[str, Any]:
+ """
+ Return worker's fields by worker_id, raise EntryDoesNotExistException
+ if no id exists in workers
+
+ See worker for the expected fields for the returned mapping
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def find_workers(
+ self, worker_name: Optional[str] = None, provider_type: Optional[str] = None
+ ) -> List[Worker]:
+ """
+ Try to find any worker that matches the above. When called with no arguments,
+ return all workers.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def new_agent(
+ self,
+ worker_id: str,
+ unit_id: str,
+ task_id: str,
+ task_run_id: str,
+ assignment_id: str,
+ task_type: str,
+ provider_type: str,
+ ) -> str:
+ """
+ Create a new agent for the given worker id to assign to the given unit
+ Raises EntryAlreadyExistsException
+
+ Should update the unit's status to ASSIGNED and the assigned agent to
+ this one.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_agent(self, agent_id: str) -> Mapping[str, Any]:
+ """
+ Return agent's fields by agent_id, raise EntryDoesNotExistException
+ if no id exists in agents
+
+ See Agent for the expected fields for the returned mapping
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def update_agent(self, agent_id: str, status: Optional[str] = None) -> None:
+ """
+ Update the given task with the given parameters if possible, raise appropriate exception otherwise.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def find_agents(
+ self,
+ status: Optional[str] = None,
+ unit_id: Optional[str] = None,
+ worker_id: Optional[str] = None,
+ task_id: Optional[str] = None,
+ task_run_id: Optional[str] = None,
+ assignment_id: Optional[str] = None,
+ task_type: Optional[str] = None,
+ provider_type: Optional[str] = None,
+ ) -> List[Agent]:
+ """
+ Try to find any agent that matches the above. When called with no arguments,
+ return all agents.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def new_onboarding_agent(
+ self, worker_id: str, task_id: str, task_run_id: str, task_type: str
+ ) -> str:
+ """
+ Create a new agent for the given worker id to assign to the given unit
+ Raises EntryAlreadyExistsException
+
+ Should update the unit's status to ASSIGNED and the assigned agent to
+ this one.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_onboarding_agent(self, onboarding_agent_id: str) -> Mapping[str, Any]:
+ """
+ Return onboarding agent's fields by onboarding_agent_id, raise
+ EntryDoesNotExistException if no id exists in onboarding_agents
+
+ See OnboardingAgent for the expected fields for the returned mapping
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def update_onboarding_agent(
+ self, onboarding_agent_id: str, status: Optional[str] = None
+ ) -> None:
+ """
+ Update the given onboarding agent with the given parameters if possible,
+ raise appropriate exception otherwise.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def find_onboarding_agents(
+ self,
+ status: Optional[str] = None,
+ worker_id: Optional[str] = None,
+ task_id: Optional[str] = None,
+ task_run_id: Optional[str] = None,
+ task_type: Optional[str] = None,
+ ) -> List[OnboardingAgent]:
+ """
+ Try to find any onboarding agent that matches the above. When called with no arguments,
+ return all onboarding agents.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def make_qualification(self, qualification_name: str) -> str:
+ """
+ Make a new qualification, throws an error if a qualification by the given name
+ already exists. Return the id for the qualification.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def find_qualifications(
+ self, qualification_name: Optional[str] = None
+ ) -> List[Qualification]:
+ """
+ Find a qualification. If no name is supplied, returns all qualifications.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_qualification(self, qualification_id: str) -> Mapping[str, Any]:
+ """
+ Return qualification's fields by qualification_id, raise
+ EntryDoesNotExistException if no id exists in qualifications
+
+ See Qualification for the expected fields for the returned mapping
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def _delete_qualification(self, qualification_name: str) -> None:
+ """
+ Remove this qualification from all workers that have it, then delete the qualification
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def grant_qualification(
+ self, qualification_id: str, worker_id: str, value: int = 1
+ ) -> None:
+ """
+ Grant a worker the given qualification. Update the qualification value if it
+ already exists
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def check_granted_qualifications(
+ self,
+ qualification_id: Optional[str] = None,
+ worker_id: Optional[str] = None,
+ value: Optional[int] = None,
+ ) -> List[GrantedQualification]:
+ """
+ Find granted qualifications that match the given specifications
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_granted_qualification(
+ self, qualification_id: Optional[str] = None, worker_id: Optional[str] = None
+ ) -> Mapping[str, Any]:
+ """
+ Return the granted qualification in the database between the given
+ worker and qualification id
+
+ See GrantedQualification for the expected fields for the returned mapping
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def revoke_qualification(self, qualification_id: str, worker_id: str) -> None:
+ """
+ Remove the given qualification from the given worker
+ """
+ raise NotImplementedError()
diff --git a/mephisto/abstractions/databases/__init__.py b/mephisto/abstractions/databases/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/mephisto/abstractions/databases/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/mephisto/abstractions/databases/local_database.py b/mephisto/abstractions/databases/local_database.py
new file mode 100644
index 000000000..42f9a8d29
--- /dev/null
+++ b/mephisto/abstractions/databases/local_database.py
@@ -0,0 +1,1369 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.database import (
+ MephistoDB,
+ MephistoDBException,
+ EntryAlreadyExistsException,
+ EntryDoesNotExistException,
+)
+from typing import Mapping, Optional, Any, List, Dict
+from mephisto.operations.utils import get_data_dir
+from mephisto.operations.registry import get_valid_provider_types
+from mephisto.data_model.agent import Agent, AgentState, OnboardingAgent
+from mephisto.data_model.assignment import Assignment, Unit, AssignmentState
+from mephisto.data_model.constants import NO_PROJECT_NAME
+from mephisto.data_model.project import Project
+from mephisto.data_model.requester import Requester
+from mephisto.data_model.task import Task
+from mephisto.data_model.task_run import TaskRun
+from mephisto.data_model.worker import Worker
+from mephisto.data_model.qualification import Qualification, GrantedQualification
+
+import sqlite3
+from sqlite3 import Connection, Cursor
+import threading
+
+from mephisto.operations.logger_core import get_logger
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+
+def nonesafe_int(in_string: Optional[str]) -> Optional[int]:
+ """Cast input to an int or None"""
+ if in_string is None:
+ return None
+ return int(in_string)
+
+
+def assert_valid_provider(provider_type: str) -> None:
+ """Throw an assertion error if the given provider type is not valid"""
+ valid_types = get_valid_provider_types()
+ if provider_type not in valid_types:
+ raise MephistoDBException(
+ f"Supplied provider {provider_type} is not in supported list of providers {valid_types}."
+ )
+
+
+def is_key_failure(e: sqlite3.IntegrityError) -> bool:
+ """
+ Return if the given error is representing a foreign key
+ failure, where an insertion was expecting something to
+ exist already in the DB but it didn't.
+ """
+ return str(e) == "FOREIGN KEY constraint failed"
+
+
+def is_unique_failure(e: sqlite3.IntegrityError) -> bool:
+ """
+ Return if the given error is representing a foreign key
+ failure, where an insertion was expecting something to
+ exist already in the DB but it didn't.
+ """
+ return str(e).startswith("UNIQUE constraint")
+
+
+CREATE_PROJECTS_TABLE = """CREATE TABLE IF NOT EXISTS projects (
+ project_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ project_name TEXT NOT NULL UNIQUE,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+"""
+
+CREATE_TASKS_TABLE = """CREATE TABLE IF NOT EXISTS tasks (
+ task_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ task_name TEXT NOT NULL UNIQUE,
+ task_type TEXT NOT NULL,
+ project_id INTEGER,
+ parent_task_id INTEGER,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+ FOREIGN KEY (parent_task_id) REFERENCES tasks (task_id),
+ FOREIGN KEY (project_id) REFERENCES projects (project_id)
+);
+"""
+
+CREATE_REQUESTERS_TABLE = """CREATE TABLE IF NOT EXISTS requesters (
+ requester_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ requester_name TEXT NOT NULL UNIQUE,
+ provider_type TEXT NOT NULL,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+"""
+
+CREATE_TASK_RUNS_TABLE = """
+ CREATE TABLE IF NOT EXISTS task_runs (
+ task_run_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ task_id INTEGER NOT NULL,
+ requester_id INTEGER NOT NULL,
+ init_params TEXT NOT NULL,
+ is_completed BOOLEAN NOT NULL,
+ provider_type TEXT NOT NULL,
+ task_type TEXT NOT NULL,
+ sandbox BOOLEAN NOT NULL,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+ FOREIGN KEY (task_id) REFERENCES tasks (task_id),
+ FOREIGN KEY (requester_id) REFERENCES requesters (requester_id)
+);
+"""
+
+CREATE_ASSIGNMENTS_TABLE = """CREATE TABLE IF NOT EXISTS assignments (
+ assignment_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ task_id INTEGER NOT NULL,
+ task_run_id INTEGER NOT NULL,
+ requester_id INTEGER NOT NULL,
+ task_type TEXT NOT NULL,
+ provider_type TEXT NOT NULL,
+ sandbox BOOLEAN NOT NULL,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+ FOREIGN KEY (task_id) REFERENCES tasks (task_id),
+ FOREIGN KEY (task_run_id) REFERENCES task_runs (task_run_id),
+ FOREIGN KEY (requester_id) REFERENCES requesters (requester_id)
+);
+"""
+
+CREATE_UNITS_TABLE = """CREATE TABLE IF NOT EXISTS units (
+ unit_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ assignment_id INTEGER NOT NULL,
+ unit_index INTEGER NOT NULL,
+ pay_amount FLOAT NOT NULL,
+ provider_type TEXT NOT NULL,
+ status TEXT NOT NULL,
+ agent_id INTEGER,
+ worker_id INTEGER,
+ task_type TEXT NOT NULL,
+ task_id INTEGER NOT NULL,
+ task_run_id INTEGER NOT NULL,
+ sandbox BOOLEAN NOT NULL,
+ requester_id INTEGER NOT NULL,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+ FOREIGN KEY (assignment_id) REFERENCES assignments (assignment_id),
+ FOREIGN KEY (agent_id) REFERENCES agents (agent_id),
+ FOREIGN KEY (task_run_id) REFERENCES task_runs (task_run_id),
+ FOREIGN KEY (task_id) REFERENCES tasks (task_id),
+ FOREIGN KEY (requester_id) REFERENCES requesters (requester_id),
+ FOREIGN KEY (worker_id) REFERENCES workers (worker_id),
+ UNIQUE (assignment_id, unit_index)
+);
+"""
+
+CREATE_WORKERS_TABLE = """CREATE TABLE IF NOT EXISTS workers (
+ worker_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ worker_name TEXT NOT NULL UNIQUE,
+ provider_type TEXT NOT NULL,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+"""
+
+CREATE_AGENTS_TABLE = """CREATE TABLE IF NOT EXISTS agents (
+ agent_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ worker_id INTEGER NOT NULL,
+ unit_id INTEGER NOT NULL,
+ task_id INTEGER NOT NULL,
+ task_run_id INTEGER NOT NULL,
+ assignment_id INTEGER NOT NULL,
+ task_type TEXT NOT NULL,
+ provider_type TEXT NOT NULL,
+ status TEXT NOT NULL,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+ FOREIGN KEY (worker_id) REFERENCES workers (worker_id),
+ FOREIGN KEY (unit_id) REFERENCES units (unit_id)
+);
+"""
+
+CREATE_ONBOARDING_AGENTS_TABLE = """CREATE TABLE IF NOT EXISTS onboarding_agents (
+ onboarding_agent_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ worker_id INTEGER NOT NULL,
+ task_id INTEGER NOT NULL,
+ task_run_id INTEGER NOT NULL,
+ task_type TEXT NOT NULL,
+ status TEXT NOT NULL,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+ FOREIGN KEY (worker_id) REFERENCES workers (worker_id),
+ FOREIGN KEY (task_run_id) REFERENCES task_runs (task_run_id)
+);
+"""
+
+CREATE_QUALIFICATIONS_TABLE = """CREATE TABLE IF NOT EXISTS qualifications (
+ qualification_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ qualification_name TEXT NOT NULL UNIQUE,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+"""
+
+CREATE_GRANTED_QUALIFICATIONS_TABLE = """
+CREATE TABLE IF NOT EXISTS granted_qualifications (
+ granted_qualification_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ worker_id INTEGER NOT NULL,
+ qualification_id INTEGER NOT NULL,
+ value INTEGER NOT NULL,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+ FOREIGN KEY (worker_id) REFERENCES workers (worker_id),
+ FOREIGN KEY (qualification_id) REFERENCES qualifications (qualification_id),
+ UNIQUE (worker_id, qualification_id)
+);
+"""
+
+
+class StringIDRow(sqlite3.Row):
+ def __getitem__(self, key: str) -> Any:
+ val = super().__getitem__(key)
+ if key.endswith("_id") and val is not None:
+ return str(val)
+ else:
+ return val
+
+
+# TODO(101) find_x queries are pretty slow right now, as we query the same table once to get
+# all of the rows, but only select the ids, then we later construct them individually,
+# making a second set of requests.
+# It would be better to expose an init param for DB Objects that takes in the full row
+# and inits with that if provided, and queries the database if not.
+class LocalMephistoDB(MephistoDB):
+ """
+ Local database for core Mephisto data storage, the LocalMephistoDatabase handles
+ grounding all of the python interactions with the Mephisto architecture to
+ local files and a database.
+ """
+
+ def __init__(self, database_path=None):
+ logger.debug(f"database path: {database_path}")
+ self.conn: Dict[int, Connection] = {}
+ self.table_access_condition = threading.Condition()
+ super().__init__(database_path)
+
+ def _get_connection(self) -> Connection:
+ """Returns a singular database connection to be shared amongst all
+ calls for a given thread.
+ """
+ # TODO(101) is there a problem with having just one db connection?
+ # Will this cause bugs with failed commits?
+ curr_thread = threading.get_ident()
+ if curr_thread not in self.conn or self.conn[curr_thread] is None:
+ try:
+ conn = sqlite3.connect(self.db_path)
+ conn.row_factory = StringIDRow
+ self.conn[curr_thread] = conn
+ except sqlite3.Error as e:
+ raise MephistoDBException(e)
+ return self.conn[curr_thread]
+
+ def shutdown(self) -> None:
+ """Close all open connections"""
+ with self.table_access_condition:
+ curr_thread = threading.get_ident()
+ self.conn[curr_thread].close()
+ del self.conn[curr_thread]
+
+ def init_tables(self) -> None:
+ """
+ Run all the table creation SQL queries to ensure the expected tables exist
+ """
+ # TODO(#93) maybe raise flag when the schema of existing tables isn't what we expect
+ # it to be?
+ # "How to know that schema changes?"
+ # logger.warning("some message")
+ with self.table_access_condition:
+ conn = self._get_connection()
+ conn.execute("PRAGMA foreign_keys = 1")
+ with conn:
+ c = conn.cursor()
+ c.execute(CREATE_PROJECTS_TABLE)
+ c.execute(CREATE_TASKS_TABLE)
+ c.execute(CREATE_REQUESTERS_TABLE)
+ c.execute(CREATE_TASK_RUNS_TABLE)
+ c.execute(CREATE_ASSIGNMENTS_TABLE)
+ c.execute(CREATE_UNITS_TABLE)
+ c.execute(CREATE_WORKERS_TABLE)
+ c.execute(CREATE_AGENTS_TABLE)
+ c.execute(CREATE_QUALIFICATIONS_TABLE)
+ c.execute(CREATE_GRANTED_QUALIFICATIONS_TABLE)
+ c.execute(CREATE_ONBOARDING_AGENTS_TABLE)
+
+ def __get_one_by_id(
+ self, table_name: str, id_name: str, db_id: str
+ ) -> Mapping[str, Any]:
+ """
+ Try to request the row for the given table and entry,
+ raise EntryDoesNotExistException if it isn't present
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ f"""
+ SELECT * FROM {table_name}
+ WHERE ({id_name} = ?)
+ """,
+ (int(db_id),),
+ )
+ results = c.fetchall()
+ if len(results) != 1:
+ raise EntryDoesNotExistException(
+ f"Table {table_name} has no {id_name} {db_id}"
+ )
+ return results[0]
+
+ def new_project(self, project_name: str) -> str:
+ """
+ Create a new project with the given project name. Raise EntryAlreadyExistsException if a project
+ with this name has already been created.
+ """
+ if project_name in [NO_PROJECT_NAME, ""]:
+ raise MephistoDBException(f'Invalid project name "{project_name}')
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ "INSERT INTO projects(project_name) VALUES (?);", (project_name,)
+ )
+ project_id = str(c.lastrowid)
+ return project_id
+ except sqlite3.IntegrityError as e:
+ if is_key_failure(e):
+ raise EntryDoesNotExistException()
+ elif is_unique_failure(e):
+ raise EntryAlreadyExistsException(
+ f"Project {project_name} already exists"
+ )
+ raise MephistoDBException(e)
+
+ def get_project(self, project_id: str) -> Mapping[str, Any]:
+ """
+ Return project's fields by the given project_id, raise EntryDoesNotExistException
+ if no id exists in projects
+
+ Returns a SQLite Row object with the expected fields
+ """
+ return self.__get_one_by_id("projects", "project_id", project_id)
+
+ def find_projects(self, project_name: Optional[str] = None) -> List[Project]:
+ """
+ Try to find any project that matches the above. When called with no arguments,
+ return all projects.
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from projects
+ WHERE (?1 IS NULL OR project_name = ?1)
+ """,
+ (project_name,),
+ )
+ rows = c.fetchall()
+ return [Project(self, str(r["project_id"]), row=r) for r in rows]
+
+ def new_task(
+ self,
+ task_name: str,
+ task_type: str,
+ project_id: Optional[str] = None,
+ parent_task_id: Optional[str] = None,
+ ) -> str:
+ """
+ Create a new task with the given task name. Raise EntryAlreadyExistsException if a task
+ with this name has already been created.
+ """
+ if task_name in [""]:
+ raise MephistoDBException(f'Invalid task name "{task_name}')
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ """INSERT INTO tasks(
+ task_name,
+ task_type,
+ project_id,
+ parent_task_id
+ ) VALUES (?, ?, ?, ?);""",
+ (
+ task_name,
+ task_type,
+ nonesafe_int(project_id),
+ nonesafe_int(parent_task_id),
+ ),
+ )
+ task_id = str(c.lastrowid)
+ return task_id
+ except sqlite3.IntegrityError as e:
+ if is_key_failure(e):
+ raise EntryDoesNotExistException(e)
+ elif is_unique_failure(e):
+ raise EntryAlreadyExistsException(e)
+ raise MephistoDBException(e)
+
+ def get_task(self, task_id: str) -> Mapping[str, Any]:
+ """
+ Return task's fields by task_id, raise EntryDoesNotExistException if no id exists
+ in tasks
+
+ Returns a SQLite Row object with the expected fields
+ """
+ return self.__get_one_by_id("tasks", "task_id", task_id)
+
+ def find_tasks(
+ self,
+ task_name: Optional[str] = None,
+ project_id: Optional[str] = None,
+ parent_task_id: Optional[str] = None,
+ ) -> List[Task]:
+ """
+ Try to find any task that matches the above. When called with no arguments,
+ return all tasks.
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from tasks
+ WHERE (?1 IS NULL OR task_name = ?1)
+ AND (?2 IS NULL OR project_id = ?2)
+ AND (?3 IS NULL OR parent_task_id = ?3)
+ """,
+ (task_name, nonesafe_int(project_id), nonesafe_int(parent_task_id)),
+ )
+ rows = c.fetchall()
+ return [Task(self, str(r["task_id"]), row=r) for r in rows]
+
+ def update_task(
+ self,
+ task_id: str,
+ task_name: Optional[str] = None,
+ project_id: Optional[str] = None,
+ ) -> None:
+ """
+ Update the given task with the given parameters if possible, raise appropriate exception otherwise.
+
+ Tasks can only be updated if no runs exist for this task yet, otherwise there's too much state
+ and we shouldn't make changes.
+ """
+ if len(self.find_task_runs(task_id=task_id)) != 0:
+ raise MephistoDBException(
+ "Cannot edit a task that has already been run, for risk of data corruption."
+ )
+ if task_name in [""]:
+ raise MephistoDBException(f'Invalid task name "{task_name}')
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ if task_name is not None:
+ c.execute(
+ """
+ UPDATE tasks
+ SET task_name = ?
+ WHERE task_id = ?;
+ """,
+ (task_name, int(task_id)),
+ )
+ if project_id is not None:
+ c.execute(
+ """
+ UPDATE tasks
+ SET project_id = ?
+ WHERE task_id = ?;
+ """,
+ (int(project_id), int(task_id)),
+ )
+ except sqlite3.IntegrityError as e:
+ if is_key_failure(e):
+ raise EntryDoesNotExistException(e)
+ elif is_unique_failure(e):
+ raise EntryAlreadyExistsException(
+ f"Task name {task_name} is already in use"
+ )
+ raise MephistoDBException(e)
+
+ def new_task_run(
+ self,
+ task_id: str,
+ requester_id: str,
+ init_params: str,
+ provider_type: str,
+ task_type: str,
+ sandbox: bool = True,
+ ) -> str:
+ """Create a new task_run for the given task."""
+ with self.table_access_condition, self._get_connection() as conn:
+ # Ensure given ids are valid
+ c = conn.cursor()
+ try:
+ c.execute(
+ """
+ INSERT INTO task_runs(
+ task_id,
+ requester_id,
+ init_params,
+ is_completed,
+ provider_type,
+ task_type,
+ sandbox
+ )
+ VALUES (?, ?, ?, ?, ?, ?, ?);""",
+ (
+ int(task_id),
+ int(requester_id),
+ init_params,
+ False,
+ provider_type,
+ task_type,
+ sandbox,
+ ),
+ )
+ task_run_id = str(c.lastrowid)
+ return task_run_id
+ except sqlite3.IntegrityError as e:
+ if is_key_failure(e):
+ raise EntryDoesNotExistException(e)
+ raise MephistoDBException(e)
+
+ def get_task_run(self, task_run_id: str) -> Mapping[str, Any]:
+ """
+ Return the given task_run's fields by task_run_id, raise EntryDoesNotExistException if no id exists
+ in task_runs.
+
+ Returns a SQLite Row object with the expected fields
+ """
+ return self.__get_one_by_id("task_runs", "task_run_id", task_run_id)
+
+ def find_task_runs(
+ self,
+ task_id: Optional[str] = None,
+ requester_id: Optional[str] = None,
+ is_completed: Optional[bool] = None,
+ ) -> List[TaskRun]:
+ """
+ Try to find any task_run that matches the above. When called with no arguments,
+ return all task_runs.
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from task_runs
+ WHERE (?1 IS NULL OR task_id = ?1)
+ AND (?2 IS NULL OR requester_id = ?2)
+ AND (?3 IS NULL OR is_completed = ?3)
+ """,
+ (nonesafe_int(task_id), nonesafe_int(requester_id), is_completed),
+ )
+ rows = c.fetchall()
+ return [TaskRun(self, str(r["task_run_id"]), row=r) for r in rows]
+
+ def update_task_run(self, task_run_id: str, is_completed: bool):
+ """
+ Update a task run. At the moment, can only update completion status
+ """
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ """
+ UPDATE task_runs
+ SET is_completed = ?
+ WHERE task_run_id = ?;
+ """,
+ (is_completed, int(task_run_id)),
+ )
+ except sqlite3.IntegrityError as e:
+ if is_key_failure(e):
+ raise EntryDoesNotExistException(e)
+ raise MephistoDBException(e)
+
+ def new_assignment(
+ self,
+ task_id: str,
+ task_run_id: str,
+ requester_id: str,
+ task_type: str,
+ provider_type: str,
+ sandbox: bool = True,
+ ) -> str:
+ """Create a new assignment for the given task"""
+ # Ensure task run exists
+ self.get_task_run(task_run_id)
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ c.execute(
+ """
+ INSERT INTO assignments(
+ task_id,
+ task_run_id,
+ requester_id,
+ task_type,
+ provider_type,
+ sandbox
+ ) VALUES (?, ?, ?, ?, ?, ?);""",
+ (
+ int(task_id),
+ int(task_run_id),
+ int(requester_id),
+ task_type,
+ provider_type,
+ sandbox,
+ ),
+ )
+ assignment_id = str(c.lastrowid)
+ return assignment_id
+
+ def get_assignment(self, assignment_id: str) -> Mapping[str, Any]:
+ """
+ Return assignment's fields by assignment_id, raise EntryDoesNotExistException
+ if no id exists in tasks
+
+ Returns a SQLite Row object with the expected fields
+ """
+ return self.__get_one_by_id("assignments", "assignment_id", assignment_id)
+
+ def find_assignments(
+ self,
+ task_run_id: Optional[str] = None,
+ task_id: Optional[str] = None,
+ requester_id: Optional[str] = None,
+ task_type: Optional[str] = None,
+ provider_type: Optional[str] = None,
+ sandbox: Optional[bool] = None,
+ ) -> List[Assignment]:
+ """
+ Try to find any task that matches the above. When called with no arguments,
+ return all tasks.
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from assignments
+ WHERE (?1 IS NULL OR task_run_id = ?1)
+ AND (?2 IS NULL OR task_id = ?2)
+ AND (?3 IS NULL OR requester_id = ?3)
+ AND (?4 IS NULL OR task_type = ?4)
+ AND (?5 IS NULL OR provider_type = ?5)
+ AND (?6 IS NULL OR sandbox = ?6)
+ """,
+ (
+ nonesafe_int(task_run_id),
+ nonesafe_int(task_id),
+ nonesafe_int(requester_id),
+ task_type,
+ provider_type,
+ sandbox,
+ ),
+ )
+ rows = c.fetchall()
+ return [Assignment(self, str(r["assignment_id"]), row=r) for r in rows]
+
+ def new_unit(
+ self,
+ task_id: str,
+ task_run_id: str,
+ requester_id: str,
+ assignment_id: str,
+ unit_index: int,
+ pay_amount: float,
+ provider_type: str,
+ task_type: str,
+ sandbox: bool = True,
+ ) -> str:
+ """
+ Create a new unit with the given index. Raises EntryAlreadyExistsException
+ if there is already a unit for the given assignment with the given index.
+ """
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ """INSERT INTO units(
+ task_id,
+ task_run_id,
+ requester_id,
+ assignment_id,
+ unit_index,
+ pay_amount,
+ provider_type,
+ task_type,
+ sandbox,
+ status
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);""",
+ (
+ int(task_id),
+ int(task_run_id),
+ int(requester_id),
+ int(assignment_id),
+ unit_index,
+ pay_amount,
+ provider_type,
+ task_type,
+ sandbox,
+ AssignmentState.CREATED,
+ ),
+ )
+ unit_id = str(c.lastrowid)
+ return unit_id
+ except sqlite3.IntegrityError as e:
+ if is_key_failure(e):
+ raise EntryDoesNotExistException(e)
+ elif is_unique_failure(e):
+ raise EntryAlreadyExistsException(e)
+ raise MephistoDBException(e)
+
+ def get_unit(self, unit_id: str) -> Mapping[str, Any]:
+ """
+ Return unit's fields by unit_id, raise EntryDoesNotExistException
+ if no id exists in units
+
+ Returns a SQLite Row object with the expected fields
+ """
+ return self.__get_one_by_id("units", "unit_id", unit_id)
+
+ def find_units(
+ self,
+ task_id: Optional[str] = None,
+ task_run_id: Optional[str] = None,
+ requester_id: Optional[str] = None,
+ assignment_id: Optional[str] = None,
+ unit_index: Optional[int] = None,
+ provider_type: Optional[str] = None,
+ task_type: Optional[str] = None,
+ agent_id: Optional[str] = None,
+ worker_id: Optional[str] = None,
+ sandbox: Optional[bool] = None,
+ status: Optional[str] = None,
+ ) -> List[Unit]:
+ """
+ Try to find any unit that matches the above. When called with no arguments,
+ return all units.
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from units
+ WHERE (?1 IS NULL OR task_id = ?1)
+ AND (?2 IS NULL OR task_run_id = ?2)
+ AND (?3 IS NULL OR requester_id = ?3)
+ AND (?4 IS NULL OR assignment_id = ?4)
+ AND (?5 IS NULL OR unit_index = ?5)
+ AND (?6 IS NULL OR provider_type = ?6)
+ AND (?7 IS NULL OR task_type = ?7)
+ AND (?8 IS NULL OR agent_id = ?8)
+ AND (?9 IS NULL OR worker_id = ?9)
+ AND (?10 IS NULL OR sandbox = ?10)
+ AND (?11 IS NULL OR status = ?11)
+ """,
+ (
+ nonesafe_int(task_id),
+ nonesafe_int(task_run_id),
+ nonesafe_int(requester_id),
+ nonesafe_int(assignment_id),
+ unit_index,
+ provider_type,
+ task_type,
+ nonesafe_int(agent_id),
+ nonesafe_int(worker_id),
+ sandbox,
+ status,
+ ),
+ )
+ rows = c.fetchall()
+ return [Unit(self, str(r["unit_id"]), row=r) for r in rows]
+
+ def clear_unit_agent_assignment(self, unit_id: str) -> None:
+ """
+ Update the given unit by removing the agent that is assigned to it, thus updating
+ the status to assignable.
+ """
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ """
+ UPDATE units
+ SET agent_id = ?, worker_id = ?, status = ?
+ WHERE unit_id = ?;
+ """,
+ (None, None, AssignmentState.LAUNCHED, int(unit_id)),
+ )
+ except sqlite3.IntegrityError as e:
+ if is_key_failure(e):
+ raise EntryDoesNotExistException(
+ f"Given unit_id {unit_id} not found in the database"
+ )
+ raise MephistoDBException(e)
+
+ def update_unit(
+ self, unit_id: str, agent_id: Optional[str] = None, status: Optional[str] = None
+ ) -> None:
+ """
+ Update the given task with the given parameters if possible, raise appropriate exception otherwise.
+ """
+ if status not in AssignmentState.valid_unit():
+ raise MephistoDBException(f"Invalid status {status} for a unit")
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ if agent_id is not None:
+ c.execute(
+ """
+ UPDATE units
+ SET agent_id = ?
+ WHERE unit_id = ?;
+ """,
+ (int(agent_id), int(unit_id)),
+ )
+ if status is not None:
+ c.execute(
+ """
+ UPDATE units
+ SET status = ?
+ WHERE unit_id = ?;
+ """,
+ (status, int(unit_id)),
+ )
+ except sqlite3.IntegrityError as e:
+ if is_key_failure(e):
+ raise EntryDoesNotExistException(
+ f"Given unit_id {unit_id} not found in the database"
+ )
+ raise MephistoDBException(e)
+
+ def new_requester(self, requester_name: str, provider_type: str) -> str:
+ """
+ Create a new requester with the given name and provider type.
+ Raises EntryAlreadyExistsException
+ if there is already a requester with this name
+ """
+ if requester_name == "":
+ raise MephistoDBException("Empty string is not a valid requester name")
+ assert_valid_provider(provider_type)
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ "INSERT INTO requesters(requester_name, provider_type) VALUES (?, ?);",
+ (requester_name, provider_type),
+ )
+ requester_id = str(c.lastrowid)
+ return requester_id
+ except sqlite3.IntegrityError as e:
+ if is_unique_failure(e):
+ raise EntryAlreadyExistsException()
+ raise MephistoDBException(e)
+
+ def get_requester(self, requester_id: str) -> Mapping[str, Any]:
+ """
+ Return requester's fields by requester_id, raise EntryDoesNotExistException
+ if no id exists in requesters
+
+ Returns a SQLite Row object with the expected fields
+ """
+ return self.__get_one_by_id("requesters", "requester_id", requester_id)
+
+ def find_requesters(
+ self, requester_name: Optional[str] = None, provider_type: Optional[str] = None
+ ) -> List[Requester]:
+ """
+ Try to find any requester that matches the above. When called with no arguments,
+ return all requesters.
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from requesters
+ WHERE (?1 IS NULL OR requester_name = ?1)
+ AND (?2 IS NULL OR provider_type = ?2)
+ """,
+ (requester_name, provider_type),
+ )
+ rows = c.fetchall()
+ return [Requester(self, str(r["requester_id"]), row=r) for r in rows]
+
+ def new_worker(self, worker_name: str, provider_type: str) -> str:
+ """
+ Create a new worker with the given name and provider type.
+ Raises EntryAlreadyExistsException
+ if there is already a worker with this name
+
+ worker_name should be the unique identifier by which the crowd provider
+ is using to keep track of this worker
+ """
+ if worker_name == "":
+ raise MephistoDBException("Empty string is not a valid requester name")
+ assert_valid_provider(provider_type)
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ "INSERT INTO workers(worker_name, provider_type) VALUES (?, ?);",
+ (worker_name, provider_type),
+ )
+ worker_id = str(c.lastrowid)
+ return worker_id
+ except sqlite3.IntegrityError as e:
+ if is_unique_failure(e):
+ raise EntryAlreadyExistsException()
+ raise MephistoDBException(e)
+
+ def get_worker(self, worker_id: str) -> Mapping[str, Any]:
+ """
+ Return worker's fields by worker_id, raise EntryDoesNotExistException
+ if no id exists in workers
+
+ Returns a SQLite Row object with the expected fields
+ """
+ return self.__get_one_by_id("workers", "worker_id", worker_id)
+
+ def find_workers(
+ self, worker_name: Optional[str] = None, provider_type: Optional[str] = None
+ ) -> List[Worker]:
+ """
+ Try to find any worker that matches the above. When called with no arguments,
+ return all workers.
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from workers
+ WHERE (?1 IS NULL OR worker_name = ?1)
+ AND (?2 IS NULL OR provider_type = ?2)
+ """,
+ (worker_name, provider_type),
+ )
+ rows = c.fetchall()
+ return [Worker(self, str(r["worker_id"]), row=r) for r in rows]
+
+ def new_agent(
+ self,
+ worker_id: str,
+ unit_id: str,
+ task_id: str,
+ task_run_id: str,
+ assignment_id: str,
+ task_type: str,
+ provider_type: str,
+ ) -> str:
+ """
+ Create a new agent with the given name and provider type.
+ Raises EntryAlreadyExistsException
+ if there is already a agent with this name
+ """
+ assert_valid_provider(provider_type)
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ """INSERT INTO agents(
+ worker_id,
+ unit_id,
+ task_id,
+ task_run_id,
+ assignment_id,
+ task_type,
+ provider_type,
+ status
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?);""",
+ (
+ int(worker_id),
+ int(unit_id),
+ int(task_id),
+ int(task_run_id),
+ int(assignment_id),
+ task_type,
+ provider_type,
+ AgentState.STATUS_NONE,
+ ),
+ )
+ agent_id = str(c.lastrowid)
+ c.execute(
+ """
+ UPDATE units
+ SET status = ?, agent_id = ?, worker_id = ?
+ WHERE unit_id = ?;
+ """,
+ (
+ AssignmentState.ASSIGNED,
+ int(agent_id),
+ int(worker_id),
+ int(unit_id),
+ ),
+ )
+ return agent_id
+ except sqlite3.IntegrityError as e:
+ if is_key_failure(e):
+ raise EntryDoesNotExistException(e)
+ raise MephistoDBException(e)
+
+ def get_agent(self, agent_id: str) -> Mapping[str, Any]:
+ """
+ Return agent's fields by agent_id, raise EntryDoesNotExistException
+ if no id exists in agents
+
+ Returns a SQLite Row object with the expected fields
+ """
+ return self.__get_one_by_id("agents", "agent_id", agent_id)
+
+ def update_agent(self, agent_id: str, status: Optional[str] = None) -> None:
+ """
+ Update the given task with the given parameters if possible, raise appropriate exception otherwise.
+ """
+ if status not in AgentState.valid():
+ raise MephistoDBException(f"Invalid status {status} for an agent")
+
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ c.execute(
+ """
+ UPDATE agents
+ SET status = ?
+ WHERE agent_id = ?;
+ """,
+ (status, int(agent_id)),
+ )
+
+ def find_agents(
+ self,
+ status: Optional[str] = None,
+ unit_id: Optional[str] = None,
+ worker_id: Optional[str] = None,
+ task_id: Optional[str] = None,
+ task_run_id: Optional[str] = None,
+ assignment_id: Optional[str] = None,
+ task_type: Optional[str] = None,
+ provider_type: Optional[str] = None,
+ ) -> List[Agent]:
+ """
+ Try to find any agent that matches the above. When called with no arguments,
+ return all agents.
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from agents
+ WHERE (?1 IS NULL OR status = ?1)
+ AND (?2 IS NULL OR unit_id = ?2)
+ AND (?3 IS NULL OR worker_id = ?3)
+ AND (?4 IS NULL OR task_id = ?4)
+ AND (?5 IS NULL OR task_run_id = ?5)
+ AND (?6 IS NULL OR assignment_id = ?6)
+ AND (?7 IS NULL OR task_type = ?7)
+ AND (?8 IS NULL OR provider_type = ?8)
+ """,
+ (
+ status,
+ nonesafe_int(unit_id),
+ nonesafe_int(worker_id),
+ nonesafe_int(task_id),
+ nonesafe_int(task_run_id),
+ nonesafe_int(assignment_id),
+ task_type,
+ provider_type,
+ ),
+ )
+ rows = c.fetchall()
+ return [Agent(self, str(r["agent_id"]), row=r) for r in rows]
+
+ def make_qualification(self, qualification_name: str) -> str:
+ """
+ Make a new qualification, throws an error if a qualification by the given name
+ already exists. Return the id for the qualification.
+ """
+ if qualification_name == "":
+ raise MephistoDBException("Empty string is not a valid qualification name")
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ "INSERT INTO qualifications(qualification_name) VALUES (?);",
+ (qualification_name,),
+ )
+ qualification_id = str(c.lastrowid)
+ return qualification_id
+ except sqlite3.IntegrityError as e:
+ if is_unique_failure(e):
+ raise EntryAlreadyExistsException()
+ raise MephistoDBException(e)
+
+ def find_qualifications(
+ self, qualification_name: Optional[str] = None
+ ) -> List[Qualification]:
+ """
+ Find a qualification. If no name is supplied, returns all qualifications.
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from qualifications
+ WHERE (?1 IS NULL OR qualification_name = ?1)
+ """,
+ (qualification_name,),
+ )
+ rows = c.fetchall()
+ return [
+ Qualification(self, str(r["qualification_id"]), row=r) for r in rows
+ ]
+
+ def get_qualification(self, qualification_id: str) -> Mapping[str, Any]:
+ """
+ Return qualification's fields by qualification_id, raise
+ EntryDoesNotExistException if no id exists in qualifications
+
+ See Qualification for the expected fields for the returned mapping
+ """
+ return self.__get_one_by_id(
+ "qualifications", "qualification_id", qualification_id
+ )
+
+ def _delete_qualification(self, qualification_name: str) -> None:
+ """
+ Remove this qualification from all workers that have it, then delete the qualification
+ """
+ qualifications = self.find_qualifications(qualification_name=qualification_name)
+ if len(qualifications) == 0:
+ raise EntryDoesNotExistException(
+ f"No qualification found by name {qualification_name}"
+ )
+ qualification = qualifications[0]
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ c.execute(
+ "DELETE FROM granted_qualifications WHERE qualification_id = ?1;",
+ (int(qualification.db_id),),
+ )
+ c.execute(
+ "DELETE FROM qualifications WHERE qualification_name = ?1;",
+ (qualification_name,),
+ )
+
+ def grant_qualification(
+ self, qualification_id: str, worker_id: str, value: int = 1
+ ) -> None:
+ """
+ Grant a worker the given qualification. Update the qualification value if it
+ already exists
+ """
+ # Note that better syntax exists for python 3.8+, as described in PR #223
+ try:
+ # Update existing entry
+ qual_row = self.get_granted_qualification(qualification_id, worker_id)
+ with self.table_access_condition, self._get_connection() as conn:
+ if value != qual_row["value"]:
+ c = conn.cursor()
+ c.execute(
+ """
+ UPDATE granted_qualifications
+ SET value = ?
+ WHERE (qualification_id = ?)
+ AND (worker_id = ?);
+ """,
+ (value, int(qualification_id), int(worker_id)),
+ )
+ conn.commit()
+ return None
+ except EntryDoesNotExistException:
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ """
+ INSERT INTO granted_qualifications(
+ qualification_id,
+ worker_id,
+ value
+ ) VALUES (?, ?, ?);
+ """,
+ (int(qualification_id), int(worker_id), value),
+ )
+ qualification_id = str(c.lastrowid)
+ conn.commit()
+ return None
+ except sqlite3.IntegrityError as e:
+ if is_unique_failure(e):
+ raise EntryAlreadyExistsException()
+ raise MephistoDBException(e)
+
+ def check_granted_qualifications(
+ self,
+ qualification_id: Optional[str] = None,
+ worker_id: Optional[str] = None,
+ value: Optional[int] = None,
+ ) -> List[GrantedQualification]:
+ """
+ Find granted qualifications that match the given specifications
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from granted_qualifications
+ WHERE (?1 IS NULL OR qualification_id = ?1)
+ AND (?2 IS NULL OR worker_id = ?2)
+ AND (?3 IS NULL OR value = ?3)
+ """,
+ (qualification_id, worker_id, value),
+ )
+ rows = c.fetchall()
+ return [
+ GrantedQualification(
+ self, str(r["qualification_id"]), str(r["worker_id"])
+ )
+ for r in rows
+ ]
+
+ # TODO(101) these should not be optional
+ def get_granted_qualification(
+ self, qualification_id: Optional[str] = None, worker_id: Optional[str] = None
+ ) -> Mapping[str, Any]:
+ """
+ Return the granted qualification in the database between the given
+ worker and qualification id
+
+ See GrantedQualification for the expected fields for the returned mapping
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ f"""
+ SELECT * FROM granted_qualifications
+ WHERE (qualification_id = ?1)
+ AND (worker_id = ?2);
+ """,
+ (nonesafe_int(qualification_id), nonesafe_int(worker_id)),
+ )
+ results = c.fetchall()
+ if len(results) != 1:
+ raise EntryDoesNotExistException(
+ f"No such granted qualification {qualification_id}, {worker_id}"
+ )
+ return results[0]
+
+ def revoke_qualification(self, qualification_id: str, worker_id: str) -> None:
+ """
+ Remove the given qualification from the given worker
+ """
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ c.execute(
+ """DELETE FROM granted_qualifications
+ WHERE (qualification_id = ?1)
+ AND (worker_id = ?2);
+ """,
+ (int(qualification_id), int(worker_id)),
+ )
+
+ def new_onboarding_agent(
+ self, worker_id: str, task_id: str, task_run_id: str, task_type: str
+ ) -> str:
+ """
+ Create a new agent for the given worker id to assign to the given unit
+ Raises EntryAlreadyExistsException
+ """
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ try:
+ c.execute(
+ """INSERT INTO onboarding_agents(
+ worker_id,
+ task_id,
+ task_run_id,
+ task_type,
+ status
+ ) VALUES (?, ?, ?, ?, ?);""",
+ (
+ int(worker_id),
+ int(task_id),
+ int(task_run_id),
+ task_type,
+ AgentState.STATUS_NONE,
+ ),
+ )
+ return str(c.lastrowid)
+ except sqlite3.IntegrityError as e:
+ if is_key_failure(e):
+ raise EntryDoesNotExistException(e)
+ raise MephistoDBException(e)
+
+ def get_onboarding_agent(self, onboarding_agent_id: str) -> Mapping[str, Any]:
+ """
+ Return onboarding agent's fields by onboarding_agent_id, raise
+ EntryDoesNotExistException if no id exists in onboarding_agents
+
+ Returns a SQLite Row object with the expected fields
+ """
+ return self.__get_one_by_id(
+ "onboarding_agents", "onboarding_agent_id", onboarding_agent_id
+ )
+
+ def update_onboarding_agent(
+ self, onboarding_agent_id: str, status: Optional[str] = None
+ ) -> None:
+ """
+ Update the given onboarding agent with the given parameters if possible,
+ raise appropriate exception otherwise.
+ """
+ if status not in AgentState.valid():
+ raise MephistoDBException(f"Invalid status {status} for an agent")
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ if status is not None:
+ c.execute(
+ """
+ UPDATE onboarding_agents
+ SET status = ?
+ WHERE onboarding_agent_id = ?;
+ """,
+ (status, int(onboarding_agent_id)),
+ )
+
+ def find_onboarding_agents(
+ self,
+ status: Optional[str] = None,
+ worker_id: Optional[str] = None,
+ task_id: Optional[str] = None,
+ task_run_id: Optional[str] = None,
+ task_type: Optional[str] = None,
+ ) -> List[OnboardingAgent]:
+ """
+ Try to find any onboarding agent that matches the above. When called with no arguments,
+ return all onboarding agents.
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from onboarding_agents
+ WHERE (?1 IS NULL OR status = ?1)
+ AND (?2 IS NULL OR worker_id = ?2)
+ AND (?3 IS NULL OR task_id = ?3)
+ AND (?4 IS NULL OR task_run_id = ?4)
+ AND (?5 IS NULL OR task_type = ?5)
+ """,
+ (
+ status,
+ nonesafe_int(worker_id),
+ nonesafe_int(task_id),
+ nonesafe_int(task_run_id),
+ task_type,
+ ),
+ )
+ rows = c.fetchall()
+ return [
+ OnboardingAgent(self, str(r["onboarding_agent_id"]), row=r)
+ for r in rows
+ ]
diff --git a/mephisto/providers/README.md b/mephisto/abstractions/providers/README.md
similarity index 100%
rename from mephisto/providers/README.md
rename to mephisto/abstractions/providers/README.md
diff --git a/mephisto/abstractions/providers/__init__.py b/mephisto/abstractions/providers/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/mephisto/abstractions/providers/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/mephisto/abstractions/providers/mock/__init__.py b/mephisto/abstractions/providers/mock/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/mephisto/abstractions/providers/mock/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/mephisto/abstractions/providers/mock/mock_agent.py b/mephisto/abstractions/providers/mock/mock_agent.py
new file mode 100644
index 000000000..76282e355
--- /dev/null
+++ b/mephisto/abstractions/providers/mock/mock_agent.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.data_model.agent import Agent
+from mephisto.abstractions.blueprint import AgentState
+from mephisto.abstractions.providers.mock.provider_type import PROVIDER_TYPE
+
+from typing import List, Optional, Tuple, Dict, Mapping, Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.assignment import Unit
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.worker import Worker
+ from mephisto.data_model.packet import Packet
+ from mephisto.abstractions.providers.mock.mock_datastore import MockDatastore
+
+
+class MockAgent(Agent):
+ """
+ This class encompasses a worker as they are working on an individual assignment.
+ It maintains details for the current task at hand such as start and end time,
+ connection status, etc.
+ """
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ super().__init__(db, db_id, row=row)
+ self.datastore: "MockDatastore" = db.get_datastore_for_provider(PROVIDER_TYPE)
+ if db_id not in self.datastore.agent_data:
+ self.datastore.agent_data[db_id] = {
+ "observed": [],
+ "pending_acts": [],
+ "acts": [],
+ }
+
+ def observe(self, packet: "Packet") -> None:
+ """Put observations into this mock agent's observation list"""
+ self.datastore.agent_data[self.db_id]["observed"].append(packet)
+ super().observe(packet)
+
+ def act(self, timeout=None) -> Optional["Packet"]:
+ """
+ Either take an act from this mock agent's act queue (for use
+ by tests and other mock purposes) or request a regular act
+ (for use in manual testing).
+ """
+ if len(self.datastore.agent_data[self.db_id]["pending_acts"]) > 0:
+ act = self.datastore.agent_data[self.db_id]["pending_acts"].pop(0)
+ else:
+ act = super().act(timeout=timeout)
+
+ if act is not None:
+ self.datastore.agent_data[self.db_id]["acts"].append(act)
+ return act
+
+ def approve_work(self) -> None:
+ """
+ Approve the work done on this specific Unit
+
+ Mock Units
+ """
+ self.update_status(AgentState.STATUS_APPROVED)
+
+ def reject_work(self, reason) -> None:
+ """
+ Reject the work done on this specific Unit
+ """
+ self.update_status(AgentState.STATUS_REJECTED)
+
+ def mark_done(self) -> None:
+ """
+ Take any required step with the crowd_provider to ensure that
+ the worker can submit their work and be marked as complete via
+ a call to get_status
+ """
+ if self.get_status() not in AgentState.complete():
+ self.db.update_agent(
+ agent_id=self.db_id, status=AgentState.STATUS_COMPLETED
+ )
+
+ def mark_disconnected(self) -> None:
+ """Mark this mock agent as having disconnected"""
+ self.db.update_agent(agent_id=self.db_id, status=AgentState.STATUS_DISCONNECT)
+
+ @staticmethod
+ def new(db: "MephistoDB", worker: "Worker", unit: "Unit") -> "Agent":
+ """Create an agent for this worker to be used for work on the given Unit."""
+ return MockAgent._register_agent(db, worker, unit, PROVIDER_TYPE)
diff --git a/mephisto/abstractions/providers/mock/mock_datastore.py b/mephisto/abstractions/providers/mock/mock_datastore.py
new file mode 100644
index 000000000..ae4f910cf
--- /dev/null
+++ b/mephisto/abstractions/providers/mock/mock_datastore.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import boto3
+import sqlite3
+import os
+import threading
+
+from datetime import datetime
+
+
+from botocore.exceptions import ClientError
+from botocore.exceptions import ProfileNotFound
+
+from typing import Dict, Any, Optional
+
+MTURK_REGION_NAME = "us-east-1"
+
+CREATE_REQUESTERS_TABLE = """CREATE TABLE IF NOT EXISTS requesters (
+ requester_id TEXT PRIMARY KEY UNIQUE,
+ is_registered BOOLEAN
+);
+"""
+
+CREATE_UNITS_TABLE = """CREATE TABLE IF NOT EXISTS units (
+ unit_id TEXT PRIMARY KEY UNIQUE,
+ is_expired BOOLEAN
+);
+"""
+
+CREATE_WORKERS_TABLE = """CREATE TABLE IF NOT EXISTS workers (
+ worker_id TEXT PRIMARY KEY UNIQUE,
+ is_blocked BOOLEAN
+);
+"""
+
+
+class MockDatastore:
+ """
+ Handles storing mock results and statuses across processes for use
+ in unit testing and manual experimentation.
+ """
+
+ def __init__(self, datastore_root: str):
+ """Initialize local storage of active agents, connect to the database"""
+ self.agent_data: Dict[str, Dict[str, Any]] = {}
+ self.table_access_condition = threading.Condition()
+ self.conn: Dict[int, sqlite3.Connection] = {}
+ self.db_path = os.path.join(datastore_root, "mock.db")
+ self.init_tables()
+ self.datastore_root = datastore_root
+
+ def _get_connection(self) -> sqlite3.Connection:
+ """Returns a singular database connection to be shared amongst all
+ calls for a given thread.
+ """
+ curr_thread = threading.get_ident()
+ if curr_thread not in self.conn or self.conn[curr_thread] is None:
+ conn = sqlite3.connect(self.db_path)
+ conn.row_factory = sqlite3.Row
+ self.conn[curr_thread] = conn
+ return self.conn[curr_thread]
+
+ def init_tables(self) -> None:
+ """
+ Run all the table creation SQL queries to ensure the expected tables exist
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ conn.execute("PRAGMA foreign_keys = 1")
+ c = conn.cursor()
+ c.execute(CREATE_REQUESTERS_TABLE)
+ c.execute(CREATE_UNITS_TABLE)
+ c.execute(CREATE_WORKERS_TABLE)
+ conn.commit()
+
+ def ensure_requester_exists(self, requester_id: str) -> None:
+ """Create a record of this requester if it doesn't exist"""
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """INSERT OR IGNORE INTO requesters(
+ requester_id,
+ is_registered
+ ) VALUES (?, ?);""",
+ (requester_id, False),
+ )
+ conn.commit()
+ return None
+
+ def set_requester_registered(self, requester_id: str, val: bool) -> None:
+ """Set the requester registration status for the given id"""
+ self.ensure_requester_exists(requester_id)
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """UPDATE requesters
+ SET is_registered = ?
+ WHERE requester_id = ?
+ """,
+ (val, requester_id),
+ )
+ conn.commit()
+ return None
+
+ def get_requester_registered(self, requester_id: str) -> bool:
+ """Get the registration status of a requester"""
+ self.ensure_requester_exists(requester_id)
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT is_registered from requesters
+ WHERE requester_id = ?
+ """,
+ (requester_id,),
+ )
+ results = c.fetchall()
+ return bool(results[0]["is_registered"])
+
+ def ensure_worker_exists(self, worker_id: str) -> None:
+ """Create a record of this worker if it doesn't exist"""
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """INSERT OR IGNORE INTO workers(
+ worker_id,
+ is_blocked
+ ) VALUES (?, ?);""",
+ (worker_id, False),
+ )
+ conn.commit()
+ return None
+
+ def set_worker_blocked(self, worker_id: str, val: bool) -> None:
+ """Set the worker registration status for the given id"""
+ self.ensure_worker_exists(worker_id)
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """UPDATE workers
+ SET is_blocked = ?
+ WHERE worker_id = ?
+ """,
+ (val, worker_id),
+ )
+ conn.commit()
+ return None
+
+ def get_worker_blocked(self, worker_id: str) -> bool:
+ """Get the registration status of a worker"""
+ self.ensure_worker_exists(worker_id)
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT is_blocked from workers
+ WHERE worker_id = ?
+ """,
+ (worker_id,),
+ )
+ results = c.fetchall()
+ return bool(results[0]["is_blocked"])
+
+ def ensure_unit_exists(self, unit_id: str) -> None:
+ """Create a record of this unit if it doesn't exist"""
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """INSERT OR IGNORE INTO units(
+ unit_id,
+ is_expired
+ ) VALUES (?, ?);""",
+ (unit_id, False),
+ )
+ conn.commit()
+ return None
+
+ def set_unit_expired(self, unit_id: str, val: bool) -> None:
+ """Set the unit registration status for the given id"""
+ self.ensure_unit_exists(unit_id)
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """UPDATE units
+ SET is_expired = ?
+ WHERE unit_id = ?
+ """,
+ (val, unit_id),
+ )
+ conn.commit()
+ return None
+
+ def get_unit_expired(self, unit_id: str) -> bool:
+ """Get the registration status of a unit"""
+ self.ensure_unit_exists(unit_id)
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT is_expired from units
+ WHERE unit_id = ?
+ """,
+ (unit_id,),
+ )
+ results = c.fetchall()
+ return bool(results[0]["is_expired"])
diff --git a/mephisto/abstractions/providers/mock/mock_provider.py b/mephisto/abstractions/providers/mock/mock_provider.py
new file mode 100644
index 000000000..86a003825
--- /dev/null
+++ b/mephisto/abstractions/providers/mock/mock_provider.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.crowd_provider import CrowdProvider, ProviderArgs
+from mephisto.abstractions.providers.mock.mock_agent import MockAgent
+from mephisto.abstractions.providers.mock.mock_requester import MockRequester
+from mephisto.abstractions.providers.mock.mock_unit import MockUnit
+from mephisto.abstractions.providers.mock.mock_worker import MockWorker
+from mephisto.abstractions.providers.mock.mock_datastore import MockDatastore
+from mephisto.abstractions.providers.mock.provider_type import PROVIDER_TYPE
+from mephisto.data_model.requester import RequesterArgs
+from mephisto.operations.registry import register_mephisto_abstraction
+from dataclasses import dataclass, field
+
+from typing import ClassVar, Dict, Any, Optional, Type, List, TYPE_CHECKING
+
+import os
+
+if TYPE_CHECKING:
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.data_model.assignment import Unit
+ from mephisto.data_model.worker import Worker
+ from mephisto.data_model.requester import Requester
+ from mephisto.data_model.agent import Agent
+ from mephisto.abstractions.blueprint import SharedTaskState
+ from omegaconf import DictConfig
+
+
+@dataclass
+class MockProviderArgs(ProviderArgs):
+ """Base class for arguments to configure Crowd Providers"""
+
+ _provider_type: str = PROVIDER_TYPE
+
+
+@register_mephisto_abstraction()
+class MockProvider(CrowdProvider):
+ """
+ Mock implementation of a CrowdProvider that stores everything
+ in a local state in the class for use in tests.
+ """
+
+ UnitClass: ClassVar[Type["Unit"]] = MockUnit
+
+ RequesterClass: ClassVar[Type["Requester"]] = MockRequester
+
+ WorkerClass: ClassVar[Type["Worker"]] = MockWorker
+
+ AgentClass: ClassVar[Type["Agent"]] = MockAgent
+
+ ArgsClass = MockProviderArgs
+
+ SUPPORTED_TASK_TYPES: ClassVar[List[str]] = ["mock"]
+
+ PROVIDER_TYPE = PROVIDER_TYPE
+
+ curr_db_location: ClassVar[str]
+
+ def initialize_provider_datastore(self, storage_path: str) -> Any:
+ """Mocks don't need any initialization"""
+ return MockDatastore(datastore_root=storage_path)
+
+ def setup_resources_for_task_run(
+ self,
+ task_run: "TaskRun",
+ args: "DictConfig",
+ shared_state: "SharedTaskState",
+ server_url: str,
+ ) -> None:
+ """Mocks don't do any initialization"""
+ return None
+
+ def cleanup_resources_from_task_run(
+ self, task_run: "TaskRun", server_url: str
+ ) -> None:
+ """Mocks don't do any initialization"""
+ return None
+
+ @classmethod
+ def get_wrapper_js_path(cls):
+ """
+ Return the path to the `wrap_crowd_source.js` file for this
+ provider to be deployed to the server
+ """
+ return os.path.join(os.path.dirname(__file__), "wrap_crowd_source.js")
diff --git a/mephisto/abstractions/providers/mock/mock_requester.py b/mephisto/abstractions/providers/mock/mock_requester.py
new file mode 100644
index 000000000..1cd0dbd25
--- /dev/null
+++ b/mephisto/abstractions/providers/mock/mock_requester.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+from mephisto.data_model.requester import Requester, RequesterArgs
+from mephisto.abstractions.providers.mock.provider_type import PROVIDER_TYPE
+
+from typing import Optional, Dict, List, Mapping, Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.providers.mock.mock_datastore import MockDatastore
+ from argparse import _ArgumentGroup as ArgumentGroup
+ from omegaconf import DictConfig
+
+MOCK_BUDGET = 100000.0
+
+
+@dataclass
+class MockRequesterArgs(RequesterArgs):
+ name: str = field(
+ default="MOCK_REQUESTER",
+ metadata={
+ "help": "Name for the requester in the Mephisto DB.",
+ "required": True,
+ },
+ )
+ force_fail: bool = field(
+ default=False, metadata={"help": "Trigger a failed registration"}
+ )
+
+
+class MockRequester(Requester):
+ """
+ High level class representing a requester on some kind of crowd provider. Sets some default
+ initializations, but mostly should be extended by the specific requesters for crowd providers
+ with whatever implementation details are required to get those to work.
+ """
+
+ ArgsClass = MockRequesterArgs
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ super().__init__(db, db_id, row=row)
+ self.datastore: "MockDatastore" = db.get_datastore_for_provider(PROVIDER_TYPE)
+
+ def register(self, args: Optional["DictConfig"] = None) -> None:
+ """Mock requesters don't actually register credentials"""
+ if args is not None:
+ if args.get("force_fail") is True:
+ raise Exception("Forced failure test exception was set")
+ else:
+ self.datastore.set_requester_registered(self.db_id, True)
+
+ def is_registered(self) -> bool:
+ """Return the registration status"""
+ return self.datastore.get_requester_registered(self.db_id)
+
+ def get_available_budget(self) -> float:
+ """MockRequesters have $100000 to spend"""
+ return MOCK_BUDGET
+
+ def is_sandbox(self) -> bool:
+ """MockRequesters are for testing only, and are thus treated as sandbox"""
+ return True
+
+ @staticmethod
+ def new(db: "MephistoDB", requester_name: str) -> "Requester":
+ return MockRequester._register_requester(db, requester_name, PROVIDER_TYPE)
diff --git a/mephisto/abstractions/providers/mock/mock_unit.py b/mephisto/abstractions/providers/mock/mock_unit.py
new file mode 100644
index 000000000..cfe1b58d7
--- /dev/null
+++ b/mephisto/abstractions/providers/mock/mock_unit.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.data_model.assignment import Unit
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.abstractions.blueprint import AgentState
+
+from mephisto.abstractions.providers.mock.provider_type import PROVIDER_TYPE
+from typing import List, Optional, Tuple, Dict, Mapping, Any, Type, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.assignment import Assignment
+ from mephisto.abstractions.providers.mock.mock_datastore import MockDatastore
+
+from mephisto.operations.logger_core import get_logger
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+
+class MockUnit(Unit):
+ """
+ This class tracks the status of an individual worker's contribution to a
+ higher level assignment. It is the smallest 'unit' of work to complete
+ the assignment, and this class is only responsible for checking
+ the status of that work itself being done.
+
+ It should be extended for usage with a specific crowd provider
+ """
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ super().__init__(db, db_id, row=row)
+ self.datastore: "MockDatastore" = db.get_datastore_for_provider(PROVIDER_TYPE)
+
+ def launch(self, task_url: str) -> None:
+ """Mock launches do nothing right now beyond updating state"""
+ self.db.update_unit(self.db_id, status=AssignmentState.LAUNCHED)
+
+ # TODO(OWN) get this link to the frontend
+ port = task_url.split(":")[1].split("/")[0]
+ print(task_url)
+ print(
+ f"Mock task launched: localhost:{port} for preview, "
+ f"localhost:{port}/?worker_id=x&assignment_id={self.db_id}"
+ )
+ logger.info(
+ f"Mock task launched: localhost:{port} for preview, "
+ f"localhost:{port}/?worker_id=x&assignment_id={self.db_id} for assignment {self.assignment_id}"
+ )
+
+ return None
+
+ def expire(self) -> float:
+ """Expiration is immediate on Mocks"""
+ self.db.update_unit(self.db_id, status=AssignmentState.EXPIRED)
+ self.datastore.set_unit_expired(self.db_id, True)
+ return 0.0
+
+ def is_expired(self) -> bool:
+ """Determine if this unit is expired as according to the vendor."""
+ return self.datastore.get_unit_expired(self.db_id)
+
+ @staticmethod
+ def new(
+ db: "MephistoDB", assignment: "Assignment", index: int, pay_amount: float
+ ) -> "Unit":
+ """Create a Unit for the given assignment"""
+ return MockUnit._register_unit(db, assignment, index, pay_amount, PROVIDER_TYPE)
diff --git a/mephisto/abstractions/providers/mock/mock_worker.py b/mephisto/abstractions/providers/mock/mock_worker.py
new file mode 100644
index 000000000..efbd08657
--- /dev/null
+++ b/mephisto/abstractions/providers/mock/mock_worker.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.data_model.worker import Worker
+from mephisto.abstractions.providers.mock.provider_type import PROVIDER_TYPE
+from typing import List, Optional, Tuple, Dict, Mapping, Type, Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.data_model.assignment import Unit, Agent
+ from mephisto.data_model.requester import Requester
+ from mephisto.abstractions.providers.mock.mock_datastore import MockDatastore
+
+
+class MockWorker(Worker):
+ """
+ This class represents an individual - namely a person. It maintains components of ongoing identity for a user.
+ """
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ super().__init__(db, db_id, row=row)
+ self.datastore: "MockDatastore" = db.get_datastore_for_provider(PROVIDER_TYPE)
+
+ def bonus_worker(
+ self, amount: float, reason: str, unit: Optional["Unit"] = None
+ ) -> Tuple[bool, str]:
+ """Bonus this worker for work any reason. Return success of bonus"""
+ return True, ""
+
+ def block_worker(
+ self,
+ reason: str,
+ unit: Optional["Unit"] = None,
+ requester: Optional["Requester"] = None,
+ ) -> Tuple[bool, str]:
+ """Block this worker for a specified reason. Return success of block"""
+ self.datastore.set_worker_blocked(self.db_id, True)
+ return True, ""
+
+ def unblock_worker(self, reason: str, requester: "Requester") -> bool:
+ """unblock a blocked worker for the specified reason. Return success of unblock"""
+ self.datastore.set_worker_blocked(self.db_id, False)
+ return True
+
+ def is_blocked(self, requester: "Requester") -> bool:
+ """Determine if a worker is blocked"""
+ return self.datastore.get_worker_blocked(self.db_id)
+
+ def is_eligible(self, task_run: "TaskRun") -> bool:
+ """Determine if this worker is eligible for the given task run"""
+ return True
+
+ @staticmethod
+ def new(db: "MephistoDB", worker_id: str) -> "Worker":
+ return MockWorker._register_worker(db, worker_id, PROVIDER_TYPE)
diff --git a/mephisto/abstractions/providers/mock/provider_type.py b/mephisto/abstractions/providers/mock/provider_type.py
new file mode 100644
index 000000000..d044e6891
--- /dev/null
+++ b/mephisto/abstractions/providers/mock/provider_type.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+PROVIDER_TYPE = "mock"
diff --git a/mephisto/providers/mock/wrap_crowd_source.js b/mephisto/abstractions/providers/mock/wrap_crowd_source.js
similarity index 100%
rename from mephisto/providers/mock/wrap_crowd_source.js
rename to mephisto/abstractions/providers/mock/wrap_crowd_source.js
diff --git a/mephisto/abstractions/providers/mturk/__init__.py b/mephisto/abstractions/providers/mturk/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/mephisto/abstractions/providers/mturk/mturk_agent.py b/mephisto/abstractions/providers/mturk/mturk_agent.py
new file mode 100644
index 000000000..c85617fcc
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk/mturk_agent.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.data_model.agent import Agent
+from mephisto.abstractions.blueprint import AgentState
+from mephisto.abstractions.providers.mturk.provider_type import PROVIDER_TYPE
+from mephisto.abstractions.providers.mturk.mturk_utils import (
+ approve_work,
+ reject_work,
+ get_assignment,
+)
+
+from typing import List, Optional, Tuple, Dict, Mapping, Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.assignment import Unit
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.worker import Worker
+ from mephisto.data_model.packet import Packet
+ from mephisto.abstractions.providers.mturk.requester import MTurkRequester
+ from mephisto.abstractions.providers.mturk.unit import MTurkUnit
+ from mephisto.abstractions.providers.mturk.datastore import MTurkDatastore
+
+
+class MTurkAgent(Agent):
+ """
+ This class encompasses a worker as they are working on an individual assignment.
+ It maintains details for the current task at hand such as start and end time,
+ connection status, etc.
+ """
+
+ # Ensure inherited methods use this level's provider type
+ PROVIDER_TYPE = PROVIDER_TYPE
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ super().__init__(db, db_id, row=row)
+ self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
+ self.PROVIDER_TYPE
+ )
+ unit: "MTurkUnit" = self.get_unit()
+ self.mturk_assignment_id = unit.get_mturk_assignment_id()
+ # TODO(#97) any additional init as is necessary once
+ # a mock DB exists
+
+ def _get_mturk_assignment_id(self):
+ if self.mturk_assignment_id is None:
+ self.mturk_assignment_id = self.get_unit().get_mturk_assignment_id()
+ return self.mturk_assignment_id
+
+ def _get_client(self) -> Any:
+ """
+ Get an mturk client for usage with mturk_utils for this agent
+ """
+ unit = self.get_unit()
+ requester: "MTurkRequester" = unit.get_requester()
+ return self.datastore.get_client_for_requester(requester._requester_name)
+
+ @classmethod
+ def new_from_provider_data(
+ cls,
+ db: "MephistoDB",
+ worker: "Worker",
+ unit: "Unit",
+ provider_data: Dict[str, Any],
+ ) -> "Agent":
+ """
+ Wrapper around the new method that allows registering additional
+ bookkeeping information from a crowd provider for this agent
+ """
+ datastore: "MTurkDatastore" = db.get_datastore_for_provider(cls.PROVIDER_TYPE)
+ datastore.register_assignment_to_hit(
+ provider_data["hit_id"], unit.db_id, provider_data["assignment_id"]
+ )
+ return super().new_from_provider_data(db, worker, unit, provider_data)
+
+ # Required functions for Agent Interface
+
+ def approve_work(self) -> None:
+ """Approve the work done on this specific Unit"""
+ client = self._get_client()
+ approve_work(client, self._get_mturk_assignment_id(), override_rejection=True)
+ self.update_status(AgentState.STATUS_APPROVED)
+
+ def reject_work(self, reason) -> None:
+ """Reject the work done on this specific Unit"""
+ client = self._get_client()
+ reject_work(client, self._get_mturk_assignment_id(), reason)
+ self.update_status(AgentState.STATUS_REJECTED)
+
+ def mark_done(self) -> None:
+ """
+ MTurk agents are marked as done on the side of MTurk, so if this agent
+ is marked as done there's nothing else we need to do as the task has been
+ submitted.
+ """
+ if self.get_status() != AgentState.STATUS_DISCONNECT:
+ self.db.update_agent(
+ agent_id=self.db_id, status=AgentState.STATUS_COMPLETED
+ )
+
+ @staticmethod
+ def new(db: "MephistoDB", worker: "Worker", unit: "Unit") -> "Agent":
+ """Create an agent for this worker to be used for work on the given Unit."""
+ return MTurkAgent._register_agent(db, worker, unit, PROVIDER_TYPE)
diff --git a/mephisto/abstractions/providers/mturk/mturk_datastore.py b/mephisto/abstractions/providers/mturk/mturk_datastore.py
new file mode 100644
index 000000000..971b06918
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk/mturk_datastore.py
@@ -0,0 +1,289 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import boto3
+import sqlite3
+import os
+import threading
+
+from datetime import datetime
+
+
+from botocore.exceptions import ClientError
+from botocore.exceptions import ProfileNotFound
+
+from typing import Dict, Any, Optional
+
+MTURK_REGION_NAME = "us-east-1"
+
+CREATE_HITS_TABLE = """CREATE TABLE IF NOT EXISTS hits (
+ hit_id TEXT PRIMARY KEY UNIQUE,
+ unit_id TEXT,
+ assignment_id TEXT,
+ link TEXT,
+ assignment_time_in_seconds INTEGER NOT NULL,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+"""
+
+CREATE_RUN_MAP_TABLE = """CREATE TABLE IF NOT EXISTS run_mappings (
+ hit_id TEXT,
+ run_id TEXT
+);
+"""
+
+CREATE_RUNS_TABLE = """CREATE TABLE IF NOT EXISTS runs (
+ run_id TEXT PRIMARY KEY UNIQUE,
+ arn_id TEXT,
+ hit_type_id TEXT NOT NULL,
+ hit_config_path TEXT NOT NULL,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+"""
+
+CREATE_QUALIFICATIONS_TABLE = """CREATE TABLE IF NOT EXISTS qualifications (
+ qualification_name TEXT PRIMARY KEY UNIQUE,
+ requester_id TEXT,
+ mturk_qualification_name TEXT,
+ mturk_qualification_id TEXT,
+ creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+"""
+
+
+class MTurkDatastore:
+ """
+ Handles storing multiple sessions for different requesters
+ across a single mephisto thread (locked to a MephistoDB).
+ Also creates a relevant tables for mapping between MTurk
+ and mephisto.
+ """
+
+ def __init__(self, datastore_root: str):
+ """Initialize the session storage to empty, initialize tables if needed"""
+ self.session_storage: Dict[str, boto3.Session] = {}
+ self.table_access_condition = threading.Condition()
+ self.conn: Dict[int, sqlite3.Connection] = {}
+ self.db_path = os.path.join(datastore_root, "mturk.db")
+ self.init_tables()
+ self.datastore_root = datastore_root
+
+ def _get_connection(self) -> sqlite3.Connection:
+ """Returns a singular database connection to be shared amongst all
+ calls for a given thread.
+ """
+ # TODO(#101) is there a problem with having just one db connection?
+ # Will this cause bugs with failed commits?
+ curr_thread = threading.get_ident()
+ if curr_thread not in self.conn or self.conn[curr_thread] is None:
+ conn = sqlite3.connect(self.db_path)
+ conn.row_factory = sqlite3.Row
+ self.conn[curr_thread] = conn
+ return self.conn[curr_thread]
+
+ def init_tables(self) -> None:
+ """
+ Run all the table creation SQL queries to ensure the expected tables exist
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ conn.execute("PRAGMA foreign_keys = 1")
+ with conn:
+ c = conn.cursor()
+ c.execute(CREATE_HITS_TABLE)
+ c.execute(CREATE_RUNS_TABLE)
+ c.execute(CREATE_RUN_MAP_TABLE)
+ c.execute(CREATE_QUALIFICATIONS_TABLE)
+
+ def new_hit(self, hit_id: str, hit_link: str, duration: int, run_id: str) -> None:
+ """Register a new HIT mapping in the table"""
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ c.execute(
+ """INSERT INTO hits(
+ hit_id,
+ link,
+ assignment_time_in_seconds
+ ) VALUES (?, ?, ?);""",
+ (hit_id, hit_link, duration),
+ )
+ c.execute(
+ """INSERT INTO run_mappings(
+ hit_id,
+ run_id
+ ) VALUES (?, ?);""",
+ (hit_id, run_id),
+ )
+
+ def get_unassigned_hit_ids(self, run_id: str):
+ """
+ Return a list of all HIT ids that haven't been assigned
+ """
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT
+ hit_id,
+ unit_id,
+ run_id
+ FROM
+ hits
+ INNER JOIN run_mappings
+ USING (hit_id)
+ WHERE unit_id IS NULL
+ AND run_id = ?;
+ """,
+ (run_id,),
+ )
+ results = c.fetchall()
+ return [r["hit_id"] for r in results]
+
+ def register_assignment_to_hit(
+ self,
+ hit_id: str,
+ unit_id: Optional[str] = None,
+ assignment_id: Optional[str] = None,
+ ) -> None:
+ """
+ Register a specific assignment and hit to the given unit,
+ or clear the assignment after a return
+ """
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ c.execute(
+ """UPDATE hits
+ SET assignment_id = ?, unit_id = ?
+ WHERE hit_id = ?
+ """,
+ (assignment_id, unit_id, hit_id),
+ )
+
+ def get_hit_mapping(self, unit_id: str) -> sqlite3.Row:
+ """Get the mapping between Mephisto IDs and MTurk ids"""
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from hits
+ WHERE unit_id = ?
+ """,
+ (unit_id,),
+ )
+ results = c.fetchall()
+ return results[0]
+
+ def register_run(
+ self, run_id: str, arn_id: str, hit_type_id: str, hit_config_path: str
+ ) -> None:
+ """Register a new task run in the mturk table"""
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ c.execute(
+ """INSERT INTO runs(
+ run_id,
+ arn_id,
+ hit_type_id,
+ hit_config_path
+ ) VALUES (?, ?, ?, ?);""",
+ (run_id, arn_id, hit_type_id, hit_config_path),
+ )
+
+ def get_run(self, run_id: str) -> sqlite3.Row:
+ """Get the details for a run by task_run_id"""
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from runs
+ WHERE run_id = ?
+ """,
+ (run_id,),
+ )
+ results = c.fetchall()
+ return results[0]
+
+ def create_qualification_mapping(
+ self,
+ qualification_name: str,
+ requester_id: str,
+ mturk_qualification_name: str,
+ mturk_qualification_id: str,
+ ) -> None:
+ """
+ Create a mapping between mephisto qualification name and mturk
+ qualification details in the local datastore
+ """
+ with self.table_access_condition, self._get_connection() as conn:
+ c = conn.cursor()
+ c.execute(
+ """INSERT INTO qualifications(
+ qualification_name,
+ requester_id,
+ mturk_qualification_name,
+ mturk_qualification_id
+ ) VALUES (?, ?, ?, ?);""",
+ (
+ qualification_name,
+ requester_id,
+ mturk_qualification_name,
+ mturk_qualification_id,
+ ),
+ )
+ return None
+
+ def get_qualification_mapping(
+ self, qualification_name: str
+ ) -> Optional[sqlite3.Row]:
+ """Get the mapping between Mephisto qualifications and MTurk qualifications"""
+ with self.table_access_condition:
+ conn = self._get_connection()
+ c = conn.cursor()
+ c.execute(
+ """
+ SELECT * from qualifications
+ WHERE qualification_name = ?
+ """,
+ (qualification_name,),
+ )
+ results = c.fetchall()
+ if len(results) == 0:
+ return None
+ return results[0]
+
+ def get_session_for_requester(self, requester_name: str) -> boto3.Session:
+ """
+ Either create a new session for the given requester or return
+ the existing one if it has already been created
+ """
+ if requester_name not in self.session_storage:
+ session = boto3.Session(
+ profile_name=requester_name, region_name=MTURK_REGION_NAME
+ )
+ self.session_storage[requester_name] = session
+
+ return self.session_storage[requester_name]
+
+ def get_client_for_requester(self, requester_name: str) -> Any:
+ """
+ Return the client for the given requester, which should allow
+ direct calls to the mturk surface
+ """
+ return self.get_session_for_requester(requester_name).client("mturk")
+
+ def get_sandbox_client_for_requester(self, requester_name: str) -> Any:
+ """
+ Return the client for the given requester, which should allow
+ direct calls to the mturk surface
+ """
+ return self.get_session_for_requester(requester_name).client(
+ service_name="mturk",
+ region_name="us-east-1",
+ endpoint_url="https://mturk-requester-sandbox.us-east-1.amazonaws.com",
+ )
diff --git a/mephisto/abstractions/providers/mturk/mturk_provider.py b/mephisto/abstractions/providers/mturk/mturk_provider.py
new file mode 100644
index 000000000..4979192c7
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk/mturk_provider.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+from mephisto.data_model.task_config import TaskConfig
+from mephisto.abstractions.providers.mturk.provider_type import PROVIDER_TYPE
+from mephisto.abstractions.providers.mturk.mturk_datastore import MTurkDatastore
+from mephisto.abstractions.crowd_provider import CrowdProvider, ProviderArgs
+from mephisto.data_model.requester import RequesterArgs
+from mephisto.abstractions.providers.mturk.mturk_agent import MTurkAgent
+from mephisto.abstractions.providers.mturk.mturk_requester import MTurkRequester
+from mephisto.abstractions.providers.mturk.mturk_unit import MTurkUnit
+from mephisto.abstractions.providers.mturk.mturk_worker import MTurkWorker
+from mephisto.abstractions.providers.mturk.mturk_utils import (
+ create_hit_type,
+ create_hit_config,
+ setup_sns_topic,
+ delete_sns_topic,
+ delete_qualification,
+)
+from mephisto.operations.registry import register_mephisto_abstraction
+from dataclasses import dataclass, field
+
+from typing import ClassVar, Dict, Any, Optional, Type, List, cast, TYPE_CHECKING
+
+from mephisto.data_model.requester import Requester
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.blueprint import SharedTaskState
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.data_model.assignment import Unit
+ from mephisto.data_model.worker import Worker
+ from mephisto.data_model.agent import Agent
+ from omegaconf import DictConfig
+
+
+@dataclass
+class MTurkProviderArgs(ProviderArgs):
+ """Provider args for an MTurk provider"""
+
+ _provider_type: str = PROVIDER_TYPE
+
+
+@register_mephisto_abstraction()
+class MTurkProvider(CrowdProvider):
+ """
+ Implementation of a crowdprovider that interfaces with MTurk
+ """
+
+ # Ensure inherited methods use this level's provider type
+ PROVIDER_TYPE = PROVIDER_TYPE
+
+ UnitClass: ClassVar[Type["Unit"]] = MTurkUnit
+
+ RequesterClass: ClassVar[Type["Requester"]] = MTurkRequester
+
+ WorkerClass: ClassVar[Type["Worker"]] = MTurkWorker
+
+ AgentClass: ClassVar[Type["Agent"]] = MTurkAgent
+
+ ArgsClass = MTurkProviderArgs
+
+ SUPPORTED_TASK_TYPES: ClassVar[List[str]] = [
+ # TODO
+ ]
+
+ def initialize_provider_datastore(self, storage_path: str) -> Any:
+ """
+ MTurk itself is the source of truth for most data required to run
+ tasks on MTurk. The datastore holds sessions to connect with
+ MTurk as well as mappings between MTurk ids and Mephisto ids
+ """
+ return MTurkDatastore(datastore_root=storage_path)
+
+ def _get_client(self, requester_name: str) -> Any:
+ """
+ Get an mturk client for usage with mturk_utils
+ """
+ return self.datastore.get_client_for_requester(requester_name)
+
+ def setup_resources_for_task_run(
+ self,
+ task_run: "TaskRun",
+ args: "DictConfig",
+ shared_state: "SharedTaskState",
+ server_url: str,
+ ) -> None:
+ """
+ Set up SNS queue to recieve agent events from MTurk, and produce the
+ HIT type for this task run.
+ """
+ requester = cast("MTurkRequester", task_run.get_requester())
+ session = self.datastore.get_session_for_requester(requester._requester_name)
+ task_config = task_run.get_task_config()
+
+ # Set up SNS queue
+ # TODO(OWN) implement arn?
+ task_run_id = task_run.db_id
+ # task_name = task_run.get_task().task_name
+ # arn_id = setup_sns_topic(session, task_name, server_url, task_run_id)
+ arn_id = "TEST"
+
+ # Set up HIT config
+ config_dir = os.path.join(self.datastore.datastore_root, task_run_id)
+ task_config = TaskConfig(task_run)
+
+ # Find or create relevant qualifications
+ qualifications = []
+ for qualification in shared_state.qualifications:
+ applicable_providers = qualification["applicable_providers"]
+ if (
+ applicable_providers is None
+ or self.PROVIDER_TYPE in applicable_providers
+ ):
+ qualifications.append(qualification)
+ for qualification in qualifications:
+ qualification_name = qualification["qualification_name"]
+ if requester.PROVIDER_TYPE == "mturk_sandbox":
+ qualification_name += "_sandbox"
+ if self.datastore.get_qualification_mapping(qualification_name) is None:
+ qualification[
+ "QualificationTypeId"
+ ] = requester._create_new_mturk_qualification(qualification_name)
+
+ if hasattr(shared_state, "mturk_specific_qualifications"):
+ qualifications += shared_state.mturk_specific_qualifications
+
+ # Set up HIT type
+ client = self._get_client(requester._requester_name)
+ hit_type_id = create_hit_type(client, task_config, qualifications)
+ self.datastore.register_run(task_run_id, arn_id, hit_type_id, config_dir)
+
+ def cleanup_resources_from_task_run(
+ self, task_run: "TaskRun", server_url: str
+ ) -> None:
+ """Shut down the SNS queue for this task."""
+ requester = cast("MTurkRequester", task_run.get_requester())
+ session = self.datastore.get_session_for_requester(requester._requester_name)
+ run_row = self.datastore.get_run(task_run.db_id)
+ delete_sns_topic(session, run_row["arn_id"])
+
+ @classmethod
+ def get_wrapper_js_path(cls):
+ """
+ Return the path to the `wrap_crowd_source.js` file for this
+ provider to be deployed to the server
+ """
+ return os.path.join(os.path.dirname(__file__), "wrap_crowd_source.js")
+
+ def cleanup_qualification(self, qualification_name: str) -> None:
+ """Remove the qualification from the sandbox server, if it exists"""
+ mapping = self.datastore.get_qualification_mapping(qualification_name)
+ if mapping is None:
+ return None
+
+ requester_id = mapping["requester_id"]
+ requester = Requester(self.db, requester_id)
+ assert isinstance(requester, MTurkRequester), "Must be an mturk requester"
+ client = requester._get_client(requester._requester_name)
+ delete_qualification(client, mapping["mturk_qualification_id"])
diff --git a/mephisto/abstractions/providers/mturk/mturk_requester.py b/mephisto/abstractions/providers/mturk/mturk_requester.py
new file mode 100644
index 000000000..26b3e70ec
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk/mturk_requester.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from uuid import uuid4
+import time
+import random
+
+from dataclasses import dataclass, field
+from omegaconf import MISSING, DictConfig
+from mephisto.data_model.requester import Requester, RequesterArgs
+from mephisto.abstractions.providers.mturk.mturk_utils import (
+ setup_aws_credentials,
+ get_requester_balance,
+ check_aws_credentials,
+ find_or_create_qualification,
+)
+from mephisto.abstractions.providers.mturk.provider_type import PROVIDER_TYPE
+
+from typing import List, Optional, Mapping, Dict, Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.providers.mturk.mturk_datastore import MTurkDatastore
+ from argparse import _ArgumentGroup as ArgumentGroup
+
+
+MAX_QUALIFICATION_ATTEMPTS = 300
+
+
+@dataclass
+class MTurkRequesterArgs(RequesterArgs):
+ _group: str = field(
+ default="MTurkRequester",
+ metadata={
+ "help": (
+ "AWS is required to create a new Requester. "
+ "Please create an IAM user with programmatic access and "
+ "AmazonMechanicalTurkFullAccess policy at "
+ 'https://console.aws.amazon.com/iam/ (On the "Set permissions" '
+ 'page, choose "Attach existing policies directly" and then select '
+ '"AmazonMechanicalTurkFullAccess" policy). After creating '
+ "the IAM user, you should get an Access Key ID "
+ "and Secret Access Key. "
+ )
+ },
+ )
+ access_key_id: str = field(
+ default=MISSING, metadata={"required": True, "help": "IAM Access Key ID"}
+ )
+ secret_access_key: str = field(
+ default=MISSING, metadata={"required": True, "help": "IAM Secret Access Key"}
+ )
+
+
+class MTurkRequester(Requester):
+ """
+ Wrapper for requester behavior as provided by MTurk. Makes
+ all requests directly to MTurk through boto3.
+ """
+
+ # Ensure inherited methods use this level's provider type
+ PROVIDER_TYPE = PROVIDER_TYPE
+ ArgsClass = MTurkRequesterArgs
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ super().__init__(db, db_id, row=row)
+ self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
+ self.PROVIDER_TYPE
+ )
+ # Use _requester_name to preserve sandbox behavior which
+ # utilizes a different requester_name
+ self._requester_name = self.requester_name
+
+ def _get_client(self, requester_name: str) -> Any:
+ """
+ Get an mturk client for usage with mturk_utils
+ """
+ return self.datastore.get_client_for_requester(requester_name)
+
+ # Required functions for a Requester implementation
+
+ def register(self, args: Optional[DictConfig] = None) -> None:
+ """
+ Register this requester with the crowd provider by providing any required credentials
+ or such. If no args are provided, assume the registration is already made and try
+ to assert it as such.
+ """
+ for req_field in ["access_key_id", "secret_access_key"]:
+ if args is not None and req_field not in args:
+ raise Exception(
+ f'Missing IAM "{req_field}" in requester registration args'
+ )
+ setup_aws_credentials(self._requester_name, args)
+
+ def is_registered(self) -> bool:
+ """Return whether or not this requester has registered yet"""
+ return check_aws_credentials(self._requester_name)
+
+ def get_available_budget(self) -> float:
+ """Get the available budget from MTurk"""
+ client = self._get_client(self._requester_name)
+ return get_requester_balance(client)
+
+ def _create_new_mturk_qualification(self, qualification_name: str) -> str:
+ """
+ Create a new qualification on MTurk owned by the requester provided
+ """
+ client = self._get_client(self._requester_name)
+ qualification_desc = f"Equivalent qualification for {qualification_name}."
+ use_qualification_name = qualification_name
+ qualification_id = find_or_create_qualification(
+ client, qualification_name, qualification_desc, must_be_owned=True
+ )
+ if qualification_id is None:
+ # Try to append time to make the qualification unique
+ use_qualification_name = f"{qualification_name}_{time.time()}"
+ qualification_id = find_or_create_qualification(
+ client, use_qualification_name, qualification_desc, must_be_owned=True
+ )
+ attempts = 0
+ while qualification_id is None:
+ # Append something somewhat random
+ use_qualification_name = f"{qualification_name}_{str(uuid4())}"
+ qualification_id = find_or_create_qualification(
+ client,
+ use_qualification_name,
+ qualification_desc,
+ must_be_owned=True,
+ )
+ attempts += 1
+ if attempts > MAX_QUALIFICATION_ATTEMPTS:
+ raise Exception(
+ "Something has gone extremely wrong with creating qualification "
+ f"{qualification_name} for requester {self.requester_name}"
+ )
+ # Store the new qualification in the datastore
+ self.datastore.create_qualification_mapping(
+ qualification_name, self.db_id, use_qualification_name, qualification_id
+ )
+ return qualification_id
+
+ @staticmethod
+ def new(db: "MephistoDB", requester_name: str) -> "Requester":
+ return MTurkRequester._register_requester(db, requester_name, PROVIDER_TYPE)
diff --git a/mephisto/abstractions/providers/mturk/mturk_unit.py b/mephisto/abstractions/providers/mturk/mturk_unit.py
new file mode 100644
index 000000000..631984d3d
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk/mturk_unit.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from datetime import datetime
+
+from mephisto.data_model.assignment import Unit
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.abstractions.blueprint import AgentState
+from mephisto.abstractions.providers.mturk.mturk_utils import (
+ expire_hit,
+ get_hit,
+ create_hit_with_hit_type,
+)
+from mephisto.abstractions.providers.mturk.provider_type import PROVIDER_TYPE
+from typing import List, Optional, Tuple, Mapping, Dict, Any, Type, cast, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.assignment import Assignment
+ from mephisto.abstractions.providers.mturk.mturk_requester import MTurkRequester
+ from mephisto.abstractions.providers.mturk.mturk_datastore import MTurkDatastore
+
+from mephisto.operations.logger_core import get_logger
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+
+class MTurkUnit(Unit):
+ """
+ This class tracks the status of an individual worker's contribution to a
+ higher level assignment. It is the smallest 'unit' of work to complete
+ the assignment, and this class is only responsible for checking
+ the status of that work itself being done.
+ """
+
+ # Ensure inherited methods use this level's provider type
+ PROVIDER_TYPE = PROVIDER_TYPE
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ super().__init__(db, db_id, row=row)
+ self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
+ self.PROVIDER_TYPE
+ )
+ self.hit_id: Optional[str] = None
+ self._sync_hit_mapping()
+ self.__requester: Optional["MTurkRequester"] = None
+
+ def _get_client(self, requester_name: str) -> Any:
+ """
+ Get an mturk client for usage with mturk_utils
+ """
+ return self.datastore.get_client_for_requester(requester_name)
+
+ def _sync_hit_mapping(self) -> None:
+ """Sync with the datastore to see if any mappings have updated"""
+ try:
+ mapping = dict(self.datastore.get_hit_mapping(self.db_id))
+ self.hit_id = mapping["hit_id"]
+ self.mturk_assignment_id = mapping.get("assignment_id")
+ self.assignment_time_in_seconds = mapping.get("assignment_time_in_seconds")
+ except IndexError:
+ # HIT does not appear to exist
+ self.hit_id = None
+ self.mturk_assignment_id = None
+ self.assignment_time_in_seconds = -1
+
+ def get_mturk_assignment_id(self) -> Optional[str]:
+ """
+ Return the MTurk assignment id associated with this unit
+ """
+ if self.mturk_assignment_id is None:
+ self._sync_hit_mapping()
+ return self.mturk_assignment_id
+
+ def get_mturk_hit_id(self) -> Optional[str]:
+ """
+ Return the MTurk hit id associated with this unit
+ """
+ if self.hit_id is None:
+ self._sync_hit_mapping()
+ return self.hit_id
+
+ def get_requester(self) -> "MTurkRequester":
+ """Wrapper around regular Requester as this will be MTurkRequesters"""
+ if self.__requester is None:
+ self.__requester = cast("MTurkRequester", super().get_requester())
+ return self.__requester
+
+ def clear_assigned_agent(self) -> None:
+ """
+ Additionally to clearing the agent, we also need to dissociate the
+ hit_id from this unit in the MTurkDatastore
+ """
+ super().clear_assigned_agent()
+ mturk_hit_id = self.get_mturk_hit_id()
+ if mturk_hit_id is not None:
+ self.datastore.register_assignment_to_hit(mturk_hit_id)
+ self._sync_hit_mapping()
+
+ # Required Unit functions
+
+ def get_status(self) -> str:
+ """Get status for this unit directly from MTurk, fall back on local info"""
+ if self.db_status in [
+ AssignmentState.CREATED,
+ AssignmentState.ACCEPTED,
+ AssignmentState.EXPIRED,
+ AssignmentState.SOFT_REJECTED,
+ ]:
+ # These statuses don't change with a get_status call
+ return self.db_status
+
+ if self.db_status in [AssignmentState.COMPLETED, AssignmentState.REJECTED]:
+ # These statuses only change on agent dependent changes
+ agent = self.get_assigned_agent()
+ found_status = self.db_status
+ if agent is not None:
+ agent_status = agent.get_status()
+ if agent_status == AgentState.STATUS_APPROVED:
+ found_status = AssignmentState.ACCEPTED
+ elif agent_status == AgentState.STATUS_REJECTED:
+ found_status = AssignmentState.REJECTED
+ elif agent_status == AgentState.STATUS_SOFT_REJECTED:
+ found_status = AssignmentState.SOFT_REJECTED
+ else:
+ logger.warning("Agent is None")
+ if found_status != self.db_status:
+ self.set_db_status(found_status)
+ return self.db_status
+
+ # Remaining statuses are tracking a live HIT
+
+ mturk_hit_id = self.get_mturk_hit_id()
+ if mturk_hit_id is None:
+ # Can't determine anything if there is no HIT on this assignment
+ return self.db_status
+
+ requester = self.get_requester()
+ client = self._get_client(requester._requester_name)
+ hit = get_hit(client, mturk_hit_id)
+ hit_data = hit["HIT"]
+
+ local_status = self.db_status
+ external_status = self.db_status
+
+ if hit_data["HITStatus"] == "Assignable":
+ external_status = AssignmentState.LAUNCHED
+ elif hit_data["HITStatus"] == "Unassignable":
+ external_status = AssignmentState.ASSIGNED
+ elif hit_data["HITStatus"] in ["Reviewable", "Reviewing"]:
+ external_status = AssignmentState.COMPLETED
+ if hit_data["NumberOfAssignmentsAvailable"] != 0:
+ external_status = AssignmentState.EXPIRED
+ elif hit_data["HITStatus"] == "Disposed":
+ # The HIT was deleted, must rely on what we have
+ external_status = local_status
+ else:
+ raise Exception(f"Unexpected HIT status {hit_data['HITStatus']}")
+
+ if external_status != local_status:
+ if (
+ local_status == AssignmentState.ASSIGNED
+ and external_status == AssignmentState.LAUNCHED
+ ):
+ # Treat this as a return event, this hit is now doable by someone else
+ agent = self.get_assigned_agent()
+ if agent is not None:
+ # mark the agent as having returned the HIT, to
+ # free any running tasks and have Blueprint decide on cleanup.
+ agent.update_status(AgentState.STATUS_RETURNED)
+ self.set_db_status(external_status)
+
+ return self.db_status
+
+ def launch(self, task_url: str) -> None:
+ """Create this HIT on MTurk (making it availalbe) and register the ids in the local db"""
+ task_run = self.get_assignment().get_task_run()
+ duration = task_run.get_task_config().assignment_duration_in_seconds
+ run_id = task_run.db_id
+ hit_type_id = self.datastore.get_run(run_id)["hit_type_id"]
+ requester = self.get_requester()
+ client = self._get_client(requester._requester_name)
+ frame_height = 650
+ hit_link, hit_id, response = create_hit_with_hit_type(
+ client, frame_height, task_url, hit_type_id
+ )
+ # TODO(OWN) get this link to the frontend
+ print(hit_link)
+
+ # We create a hit for this unit, but note that this unit may not
+ # necessarily match with the same HIT that was launched for it.
+ self.datastore.new_hit(hit_id, hit_link, duration, run_id)
+ self.set_db_status(AssignmentState.LAUNCHED)
+ return None
+
+ def expire(self) -> float:
+ """
+ Send a request to expire the HIT, and if it's not assigned return,
+ otherwise just return the maximum assignment duration
+ """
+ delay = 0
+ if self.get_status() == AssignmentState.ASSIGNED:
+ # The assignment is currently being worked on,
+ # so we will set the wait time to be the
+ # amount of time we granted for working on this assignment
+ if self.assignment_time_in_seconds is not None:
+ delay = self.assignment_time_in_seconds
+ mturk_hit_id = self.get_mturk_hit_id()
+ requester = self.get_requester()
+ client = self._get_client(requester._requester_name)
+ if mturk_hit_id is not None:
+ expire_hit(client, mturk_hit_id)
+ return delay
+ else:
+ unassigned_hit_ids = self.datastore.get_unassigned_hit_ids(self.task_run_id)
+
+ if len(unassigned_hit_ids) == 0:
+ logger.warning(
+ f"Number of unassigned hit IDs more than 1; Potential RACE CONDITION"
+ )
+ return delay
+ hit_id = unassigned_hit_ids[0]
+ expire_hit(client, hit_id)
+ self.datastore.register_assignment_to_hit(hit_id, self.db_id)
+ self.set_db_status(AssignmentState.EXPIRED)
+ return delay
+
+ def is_expired(self) -> bool:
+ """
+ Determine if this unit is expired as according to the vendor.
+
+ In this case, we keep track of the expiration locally by refreshing
+ the hit's status and seeing if we've expired.
+ """
+ return self.get_status() == AssignmentState.EXPIRED
+
+ @staticmethod
+ def new(
+ db: "MephistoDB", assignment: "Assignment", index: int, pay_amount: float
+ ) -> "Unit":
+ """Create a Unit for the given assignment"""
+ return MTurkUnit._register_unit(
+ db, assignment, index, pay_amount, PROVIDER_TYPE
+ )
diff --git a/mephisto/abstractions/providers/mturk/mturk_utils.py b/mephisto/abstractions/providers/mturk/mturk_utils.py
new file mode 100644
index 000000000..8576af911
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk/mturk_utils.py
@@ -0,0 +1,704 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import boto3
+import os
+import json
+import re
+from typing import Dict, Optional, Tuple, List, Any, TYPE_CHECKING
+from datetime import datetime
+
+from botocore import client
+from botocore.exceptions import ClientError
+from botocore.exceptions import ProfileNotFound
+from botocore.config import Config
+from omegaconf import DictConfig
+
+from mephisto.operations.logger_core import get_logger
+from mephisto.operations.config_handler import get_config_arg
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+if TYPE_CHECKING:
+ from mephisto.data_model.task_config import TaskConfig
+
+MTURK_TASK_FEE = 0.2
+MTURK_BONUS_FEE = 0.2
+SANDBOX_ENDPOINT = "https://mturk-requester-sandbox.us-east-1.amazonaws.com"
+
+MTurkClient = Any
+
+MTURK_LOCALE_REQUIREMENT = "00000000000000000071"
+
+botoconfig = Config(retries=dict(max_attempts=10))
+
+
+def client_is_sandbox(client: MTurkClient) -> bool:
+ """
+ Determine if the given client is communicating with
+ the live server or a sandbox
+ """
+ return client.meta.endpoint_url == SANDBOX_ENDPOINT
+
+
+def check_aws_credentials(profile_name: str) -> bool:
+ try:
+ # Check existing credentials
+ boto3.Session(profile_name=profile_name)
+ return True
+ except ProfileNotFound:
+ return False
+
+
+def setup_aws_credentials(
+ profile_name: str, register_args: Optional[DictConfig] = None
+) -> bool:
+ try:
+ # Check existing credentials
+ boto3.Session(profile_name=profile_name)
+ return True
+ except ProfileNotFound:
+ # Setup new credentials
+ if register_args is not None:
+ aws_access_key_id = register_args.access_key_id
+ aws_secret_access_key = register_args.secret_access_key
+ else:
+ print(
+ f"AWS credentials for {profile_name} not found. Please create "
+ "an IAM user with "
+ "programmatic access and AdministratorAccess policy at "
+ 'https://console.aws.amazon.com/iam/ (On the "Set permissions" '
+ 'page, choose "Attach existing policies directly" and then select '
+ '"AdministratorAccess" policy). After creating the IAM user, '
+ "please enter the user's Access Key ID and Secret Access "
+ "Key below:"
+ )
+ aws_access_key_id = input("Access Key ID: ")
+ aws_secret_access_key = input("Secret Access Key: ")
+ if not os.path.exists(os.path.expanduser("~/.aws/")):
+ os.makedirs(os.path.expanduser("~/.aws/"))
+ aws_credentials_file_path = "~/.aws/credentials"
+ aws_credentials_file_string = None
+ expanded_aws_file_path = os.path.expanduser(aws_credentials_file_path)
+ if os.path.exists(expanded_aws_file_path):
+ with open(expanded_aws_file_path, "r") as aws_credentials_file:
+ aws_credentials_file_string = aws_credentials_file.read()
+ with open(expanded_aws_file_path, "a+") as aws_credentials_file:
+ # Clean up file
+ if aws_credentials_file_string:
+ if aws_credentials_file_string.endswith("\n\n"):
+ pass
+ elif aws_credentials_file_string.endswith("\n"):
+ aws_credentials_file.write("\n")
+ else:
+ aws_credentials_file.write("\n\n")
+ # Write login details
+ aws_credentials_file.write("[{}]\n".format(profile_name))
+ aws_credentials_file.write(
+ "aws_access_key_id={}\n".format(aws_access_key_id)
+ )
+ aws_credentials_file.write(
+ "aws_secret_access_key={}\n".format(aws_secret_access_key)
+ )
+ print(
+ "AWS credentials successfully saved in {} file.\n".format(
+ aws_credentials_file_path
+ )
+ )
+ return True
+
+
+def calculate_mturk_task_fee(task_amount: float) -> float:
+ """
+ MTurk Pricing: https://requester.mturk.com/pricing
+ 20% fee on the reward and bonus amount (if any) you pay Workers.
+ """
+ return MTURK_TASK_FEE * task_amount
+
+
+def calculate_mturk_bonus_fee(bonus_amount: float) -> float:
+ """
+ MTurk Pricing: https://requester.mturk.com/pricing
+ 20% fee on the reward and bonus amount (if any) you pay Workers.
+ """
+ return MTURK_TASK_FEE * bonus_amount
+
+
+def get_requester_balance(client: MTurkClient) -> float:
+ """Get the balance for the requester associated with this client"""
+ return float(client.get_account_balance()["AvailableBalance"])
+
+
+def check_mturk_balance(client: MTurkClient, balance_needed: float):
+ """Checks to see if there is at least balance_needed amount in the
+ requester account, returns True if the balance is greater than
+ balance_needed
+ """
+ # Test that you can connect to the API by checking your account balance
+ # In Sandbox this always returns $10,000
+ try:
+ user_balance = float(client.get_account_balance()["AvailableBalance"])
+ except ClientError as e:
+ if e.response["Error"]["Code"] == "RequestError":
+ print(
+ "ERROR: To use the MTurk API, you will need an Amazon Web "
+ "Services (AWS) Account. Your AWS account must be linked to "
+ "your Amazon Mechanical Turk Account. Visit "
+ "https://requestersandbox.mturk.com/developer to get started. "
+ "(Note: if you have recently linked your account, please wait "
+ "for a couple minutes before trying again.)\n"
+ )
+ quit()
+ else:
+ raise
+
+ if user_balance < balance_needed:
+ print(
+ "You might not have enough money in your MTurk account. Please go "
+ "to https://requester.mturk.com/account and increase your balance "
+ "to at least ${}, and then try again.".format(balance_needed)
+ )
+ return False
+ else:
+ return True
+
+
+def create_hit_config(
+ opt: Dict[str, Any], task_description: str, unique_worker: bool, is_sandbox: bool
+) -> None:
+ """Writes a HIT config to file"""
+ mturk_submit_url = "https://workersandbox.mturk.com/mturk/externalSubmit"
+ if not is_sandbox:
+ mturk_submit_url = "https://www.mturk.com/mturk/externalSubmit"
+ hit_config = {
+ "task_description": task_description,
+ "is_sandbox": is_sandbox,
+ "mturk_submit_url": mturk_submit_url,
+ "unique_worker": unique_worker,
+ "frame_height": opt.get("frame_height", 650),
+ "allow_reviews": opt.get("allow_reviews", False),
+ "block_mobile": opt.get("block_mobile", True),
+ # Populate the chat pane title from chat_title, defaulting to the
+ # hit_title if the task provides no chat_title
+ "chat_title": opt.get("chat_title", opt.get("hit_title", "Live Chat")),
+ "template_type": opt.get("frontend_template_type", "default"),
+ }
+ hit_config_file_path = os.path.join(opt["tmp_dir"], "hit_config.json")
+ if os.path.exists(hit_config_file_path):
+ os.remove(hit_config_file_path)
+ with open(hit_config_file_path, "w") as hit_config_file:
+ hit_config_file.write(json.dumps(hit_config))
+
+
+def delete_qualification(client: MTurkClient, qualification_id: str) -> None:
+ """Deletes a qualification by id"""
+ client.delete_qualification_type(QualificationTypeId=qualification_id)
+
+
+def find_qualification(
+ client: MTurkClient, qualification_name: str, must_be_owned: bool = True
+) -> Tuple[bool, Optional[str]]:
+ """Query amazon to find the existing qualification name, return the Id,
+ otherwise return none.
+ If must_be_owned is true, it only returns qualifications owned by the user.
+ Will return False if it finds another's qualification
+
+ The return format is (meets_owner_constraint, qual_id)
+ """
+ # Search for the qualification owned by the current user
+ response = client.list_qualification_types(
+ Query=qualification_name, MustBeRequestable=True, MustBeOwnedByCaller=True
+ )
+ for qualification in response["QualificationTypes"]:
+ if qualification["Name"] == qualification_name:
+ return (True, qualification["QualificationTypeId"])
+
+ # Qualification was not found to exist, check to see if someone else has it
+ response = client.list_qualification_types(
+ Query=qualification_name, MustBeRequestable=True, MustBeOwnedByCaller=False
+ )
+
+ for qualification in response["QualificationTypes"]:
+ if qualification["Name"] == qualification_name:
+ if must_be_owned:
+ return (False, qualification["QualificationTypeId"])
+ return (True, qualification["QualificationTypeId"])
+ return (True, None)
+
+
+def find_or_create_qualification(
+ client: MTurkClient,
+ qualification_name: str,
+ description: str,
+ must_be_owned: bool = True,
+) -> Optional[str]:
+ """Query amazon to find the existing qualification name, return the Id. If
+ it exists and must_be_owned is true but we don't own it, this returns none.
+ If it doesn't exist, the qualification is created
+ """
+ qual_usable, qual_id = find_qualification(
+ client, qualification_name, must_be_owned=must_be_owned
+ )
+
+ if qual_usable is False:
+ return None
+
+ if qual_id is not None:
+ return qual_id
+
+ # Create the qualification, as it doesn't exist yet
+ response = client.create_qualification_type(
+ Name=qualification_name,
+ Description=description,
+ QualificationTypeStatus="Active",
+ )
+ return response["QualificationType"]["QualificationTypeId"]
+
+
+def give_worker_qualification(
+ client: MTurkClient,
+ worker_id: str,
+ qualification_id: str,
+ value: Optional[int] = None,
+) -> None:
+ """Give a qualification to the given worker"""
+ if value is not None:
+ client.associate_qualification_with_worker(
+ QualificationTypeId=qualification_id,
+ WorkerId=worker_id,
+ IntegerValue=value,
+ SendNotification=False,
+ )
+ else:
+ client.associate_qualification_with_worker(
+ QualificationTypeId=qualification_id,
+ WorkerId=worker_id,
+ IntegerValue=1,
+ SendNotification=False,
+ )
+
+
+def remove_worker_qualification(
+ client: MTurkClient, worker_id: str, qualification_id: str, reason: str = ""
+) -> None:
+ """Give a qualification to the given worker"""
+ client.disassociate_qualification_from_worker(
+ QualificationTypeId=qualification_id, WorkerId=worker_id, Reason=reason
+ )
+
+
+def convert_mephisto_qualifications(
+ client: MTurkClient, qualifications: List[Dict[str, Any]]
+):
+ """Convert qualifications from mephisto's format to MTurk's"""
+ converted_qualifications = []
+ for qualification in qualifications:
+ converted = {}
+ mturk_keys = [
+ "QualificationTypeId",
+ "Comparator",
+ "IntegerValue",
+ "IntegerValues",
+ "LocaleValues",
+ "ActionsGuarded",
+ ]
+ for key in mturk_keys:
+ converted[key] = qualification.get(key)
+
+ if converted["QualificationTypeId"] is None:
+ qualification_name = qualification["qualification_name"]
+ if client_is_sandbox(client):
+ qualification_name += "_sandbox"
+ qual_id = find_or_create_qualification(
+ client,
+ qualification_name,
+ "Qualification required for Mephisto-launched tasks",
+ False,
+ )
+ if qual_id is None:
+ # TODO log more loudly that this qualification is being skipped?
+ print(
+ f"Qualification name {qualification_name} can not be found or created on MTurk"
+ )
+ converted["QualificationTypeId"] = qual_id
+
+ if converted["Comparator"] is None:
+ converted["Comparator"] = qualification["comparator"]
+
+ # if no Mturk Values are set, pull from the qualification's value
+ if (
+ converted["IntegerValue"] is None
+ and converted["IntegerValues"] is None
+ and converted["LocaleValues"] is None
+ ):
+ value = qualification["value"]
+ if isinstance(value, list):
+ converted["IntegerValues"] = value
+ elif isinstance(value, int):
+ converted["IntegerValue"] = value
+
+ # IntegerValue is deprecated, and needs conversion to IntegerValues
+ if converted["IntegerValue"] is not None:
+ converted["IntegerValues"] = [converted["IntegerValue"]]
+ del converted["IntegerValue"]
+
+ if converted["IntegerValues"] is None:
+ del converted["IntegerValues"]
+
+ if converted["LocaleValues"] is None:
+ del converted["LocaleValues"]
+
+ if converted["ActionsGuarded"] is None:
+ converted["ActionsGuarded"] = "DiscoverPreviewAndAccept"
+
+ converted_qualifications.append(converted)
+ return converted_qualifications
+
+
+def create_hit_type(
+ client: MTurkClient,
+ task_config: "TaskConfig",
+ qualifications: List[Dict[str, Any]],
+ auto_approve_delay: Optional[int] = 7 * 24 * 3600, # default 1 week
+) -> str:
+ """Create a HIT type to be used to generate HITs of the requested params"""
+ hit_title = task_config.task_title
+ hit_description = task_config.task_description
+ hit_keywords = ",".join(task_config.task_tags)
+ hit_reward = task_config.task_reward
+ assignment_duration_in_seconds = task_config.assignment_duration_in_seconds
+ existing_qualifications = convert_mephisto_qualifications(client, qualifications)
+
+ # If the user hasn't specified a location qualification, we assume to
+ # restrict the HIT to some english-speaking countries.
+ locale_requirements: List[Any] = []
+ has_locale_qual = False
+ if existing_qualifications is not None:
+ for q in existing_qualifications:
+ if q["QualificationTypeId"] == MTURK_LOCALE_REQUIREMENT:
+ has_locale_qual = True
+ locale_requirements += existing_qualifications
+
+ if not has_locale_qual and not client_is_sandbox(client):
+ allowed_locales = get_config_arg("mturk", "allowed_locales")
+ if allowed_locales is None:
+ allowed_locales = [
+ {"Country": "US"},
+ {"Country": "CA"},
+ {"Country": "GB"},
+ {"Country": "AU"},
+ {"Country": "NZ"},
+ ]
+ locale_requirements.append(
+ {
+ "QualificationTypeId": MTURK_LOCALE_REQUIREMENT,
+ "Comparator": "In",
+ "LocaleValues": allowed_locales,
+ "ActionsGuarded": "DiscoverPreviewAndAccept",
+ }
+ )
+
+ # Create the HIT type
+ response = client.create_hit_type(
+ AutoApprovalDelayInSeconds=auto_approve_delay,
+ AssignmentDurationInSeconds=assignment_duration_in_seconds,
+ Reward=str(hit_reward),
+ Title=hit_title,
+ Keywords=hit_keywords,
+ Description=hit_description,
+ QualificationRequirements=locale_requirements,
+ )
+ hit_type_id = response["HITTypeId"]
+ return hit_type_id
+
+
+def create_hit_with_hit_type(
+ client: MTurkClient,
+ frame_height: int,
+ page_url: str,
+ hit_type_id: str,
+ num_assignments: int = 1,
+) -> Tuple[str, str, Dict[str, Any]]:
+ """Creates the actual HIT given the type and page to direct clients to"""
+ page_url = page_url.replace("&", "&")
+ amazon_ext_url = (
+ "http://mechanicalturk.amazonaws.com/"
+ "AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd"
+ )
+ question_data_struture = (
+ ''
+ "{}" # noqa: E131
+ "{}"
+ ""
+ "".format(amazon_ext_url, page_url, 650)
+ )
+
+ is_sandbox = client_is_sandbox(client)
+
+ # Create the HIT
+ response = client.create_hit_with_hit_type(
+ HITTypeId=hit_type_id,
+ MaxAssignments=num_assignments,
+ LifetimeInSeconds=60 * 60 * 24 * 31,
+ Question=question_data_struture,
+ )
+
+ # The response included several fields that will be helpful later
+ hit_type_id = response["HIT"]["HITTypeId"]
+ hit_id = response["HIT"]["HITId"]
+
+ # Construct the hit URL
+ url_target = "workersandbox"
+ if not is_sandbox:
+ url_target = "www"
+ hit_link = "https://{}.mturk.com/mturk/preview?groupId={}".format(
+ url_target, hit_type_id
+ )
+ return hit_link, hit_id, response
+
+
+def expire_hit(client: MTurkClient, hit_id: str):
+ # Update expiration to a time in the past, the HIT expires instantly
+ past_time = datetime(2015, 1, 1)
+ client.update_expiration_for_hit(HITId=hit_id, ExpireAt=past_time)
+
+
+def setup_sns_topic(
+ session: boto3.Session, task_name: str, server_url: str, task_run_id: str
+) -> str:
+ """Create an sns topic and return the arn identifier"""
+ # Create the topic and subscribe to it so that our server receives notifs
+ client = session.client("sns", region_name="us-east-1", config=botoconfig)
+ pattern = re.compile("[^a-zA-Z0-9_-]+")
+ filtered_task_name = pattern.sub("", task_name)
+ response = client.create_topic(Name=filtered_task_name)
+ arn = response["TopicArn"]
+ topic_sub_url = "{}/sns_posts?task_run_id={}".format(server_url, task_run_id)
+ client.subscribe(TopicArn=arn, Protocol="https", Endpoint=topic_sub_url)
+ response = client.get_topic_attributes(TopicArn=arn)
+ policy_json = """{{
+ "Version": "2008-10-17",
+ "Id": "{}/MTurkOnlyPolicy",
+ "Statement": [
+ {{
+ "Sid": "MTurkOnlyPolicy",
+ "Effect": "Allow",
+ "Principal": {{
+ "Service": "mturk-requester.amazonaws.com"
+ }},
+ "Action": "SNS:Publish",
+ "Resource": "{}"
+ }}
+ ]}}""".format(
+ arn, arn
+ )
+ client.set_topic_attributes(
+ TopicArn=arn, AttributeName="Policy", AttributeValue=policy_json
+ )
+ return arn
+
+
+def subscribe_to_hits(client: MTurkClient, hit_type_id: str, sns_arn: str) -> None:
+ """Subscribe an sns channel to the specific hit type"""
+ # Get the mturk client and create notifications for our hits
+ client.update_notification_settings(
+ HITTypeId=hit_type_id,
+ Notification={
+ "Destination": sns_arn,
+ "Transport": "SNS",
+ "Version": "2006-05-05",
+ "EventTypes": [
+ "AssignmentAbandoned",
+ "AssignmentReturned",
+ "AssignmentSubmitted",
+ ],
+ },
+ Active=True,
+ )
+
+
+def send_test_notif(client: MTurkClient, topic_arn: str, event_type: str) -> None:
+ """
+ Send a test notification of the given event type to the sns
+ queue associated with the given arn
+ """
+ client.send_test_event_notification(
+ Notification={
+ "Destination": topic_arn,
+ "Transport": "SNS",
+ "Version": "2006-05-05",
+ "EventTypes": [
+ "AssignmentAbandoned",
+ "AssignmentReturned",
+ "AssignmentSubmitted",
+ ],
+ },
+ TestEventType=event_type,
+ )
+
+
+def delete_sns_topic(session: boto3.Session, topic_arn: str) -> None:
+ """Remove the sns queue of the given identifier"""
+ client = session.client("sns", region_name="us-east-1", config=botoconfig)
+ client.delete_topic(TopicArn=topic_arn)
+
+
+def get_hit(client: MTurkClient, hit_id: str) -> Dict[str, Any]:
+ """Get hit from mturk by hit_id"""
+ return client.get_hit(HITId=hit_id)
+
+
+def get_assignment(client: MTurkClient, assignment_id: str) -> Dict[str, Any]:
+ """Gets assignment from mturk by assignment_id. Only works if the
+ assignment is in a completed state
+ """
+ return client.get_assignment(AssignmentId=assignment_id)
+
+
+def get_assignments_for_hit(client: MTurkClient, hit_id: str) -> List[Dict[str, Any]]:
+ """Get completed assignments for a hit"""
+ assignments_info = client.list_assignments_for_hit(HITId=hit_id)
+ return assignments_info.get("Assignments", [])
+
+
+def approve_work(
+ client: MTurkClient, assignment_id: str, override_rejection: bool = False
+) -> None:
+ """approve work for a given assignment through the mturk client"""
+ try:
+ client.approve_assignment(
+ AssignmentId=assignment_id, OverrideRejection=override_rejection
+ )
+ except Exception as e:
+ # TODO(#93) Break down this error to the many reasons why approve may fail,
+ # only silently pass on approving an already approved assignment
+ logger.exception(
+ f"Approving MTurk assignment failed, likely because it has auto-approved. Details: {e}",
+ exc_info=True,
+ )
+
+
+def reject_work(client: MTurkClient, assignment_id: str, reason: str) -> None:
+ """reject work for a given assignment through the mturk client"""
+ try:
+ client.reject_assignment(AssignmentId=assignment_id, RequesterFeedback=reason)
+ except Exception as e:
+ # TODO(#93) Break down this error to the many reasons why approve may fail,
+ # only silently pass on approving an already approved assignment
+ logger.exception(
+ f"Rejecting MTurk assignment failed, likely because it has auto-approved. Details:{e}",
+ exc_info=True,
+ )
+
+
+def approve_assignments_for_hit(
+ client: MTurkClient, hit_id: str, override_rejection: bool = False
+):
+ """Approve work for assignments associated with a given hit, through
+ mturk client
+ """
+ assignments = get_assignments_for_hit(client, hit_id)
+ for assignment in assignments:
+ assignment_id = assignment["AssignmentId"]
+ client.approve_assignment(
+ AssignmentId=assignment_id, OverrideRejection=override_rejection
+ )
+
+
+def block_worker(client: MTurkClient, worker_id: str, reason: str) -> None:
+ """Block a worker by id using the mturk client, passes reason along"""
+ res = client.create_worker_block(WorkerId=worker_id, Reason=reason)
+
+
+def unblock_worker(client: MTurkClient, worker_id: str, reason: str) -> None:
+ """Remove a block on the given worker"""
+ client.delete_worker_block(WorkerId=worker_id, Reason=reason)
+
+
+def is_worker_blocked(client: MTurkClient, worker_id: str) -> bool:
+ """Determine if the given worker is blocked by this client"""
+ blocks = client.list_worker_blocks(MaxResults=100)["WorkerBlocks"]
+ blocked_ids = [x["WorkerId"] for x in blocks]
+ return worker_id in blocked_ids
+
+
+def pay_bonus(
+ client: MTurkClient,
+ worker_id: str,
+ bonus_amount: float,
+ assignment_id: str,
+ reason: str,
+ unique_request_token: str,
+) -> bool:
+ """Handles paying bonus to a Turker, fails for insufficient funds.
+ Returns True on success and False on failure
+ """
+ total_cost = bonus_amount + calculate_mturk_bonus_fee(bonus_amount)
+ if not check_mturk_balance(client, balance_needed=total_cost):
+ print("Cannot pay bonus. Reason: Insufficient " "funds in your MTurk account.")
+ return False
+
+ client.send_bonus(
+ WorkerId=worker_id,
+ BonusAmount=str(bonus_amount),
+ AssignmentId=assignment_id,
+ Reason=reason,
+ UniqueRequestToken=unique_request_token,
+ )
+
+ return True
+
+
+def email_worker(
+ client: MTurkClient, worker_id: str, subject: str, message_text: str
+) -> Tuple[bool, str]:
+ """Send an email to a worker through the mturk client"""
+ response = client.notify_workers(
+ Subject=subject, MessageText=message_text, WorkerIds=[worker_id]
+ )
+ if len(response["NotifyWorkersFailureStatuses"]) > 0:
+ failure_message = response["NotifyWorkersFailureStatuses"][0]
+ return (False, failure_message["NotifyWorkersFailureMessage"])
+ else:
+ return (True, "")
+
+
+def get_outstanding_hits(client: MTurkClient) -> Dict[str, List[Dict[str, Any]]]:
+ """Return the HITs sorted by HITTypeId that are still on the MTurk Server"""
+ new_hits = client.list_hits(MaxResults=100)
+ all_hits = new_hits["HITs"]
+ while len(new_hits["HITs"]) > 0:
+ new_hits = client.list_hits(MaxResults=100, NextToken=new_hits["NextToken"])
+ all_hits += new_hits["HITs"]
+
+ hit_by_type: Dict[str, List[Dict[str, Any]]] = {}
+ for h in all_hits:
+ hit_type = h["HITTypeId"]
+ if hit_type not in hit_by_type:
+ hit_by_type[hit_type] = []
+ hit_by_type[hit_type].append(h)
+
+ return hit_by_type
+
+
+def expire_and_dispose_hits(
+ client: MTurkClient, hits: List[Dict[str, Any]]
+) -> List[Dict[str, Any]]:
+ """
+ Loops over attempting to expire and dispose any hits in the hits list that can be disposed
+
+ Returns any HITs that could not be disposed of
+ """
+ non_disposed_hits = []
+ for h in hits:
+ try:
+ client.delete_hit(HITId=h["HITId"])
+ except:
+ client.update_expiration_for_hit(
+ HITId=h["HITId"], ExpireAt=datetime(2015, 1, 1)
+ )
+ non_disposed_hits.append(h)
+ return non_disposed_hits
diff --git a/mephisto/abstractions/providers/mturk/mturk_worker.py b/mephisto/abstractions/providers/mturk/mturk_worker.py
new file mode 100644
index 000000000..243577fd6
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk/mturk_worker.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.data_model.worker import Worker
+from mephisto.data_model.requester import Requester
+from mephisto.abstractions.providers.mturk.provider_type import PROVIDER_TYPE
+from mephisto.abstractions.providers.mturk.mturk_utils import (
+ pay_bonus,
+ block_worker,
+ unblock_worker,
+ is_worker_blocked,
+ give_worker_qualification,
+ remove_worker_qualification,
+)
+from mephisto.abstractions.providers.mturk.mturk_requester import MTurkRequester
+
+from uuid import uuid4
+
+from typing import List, Optional, Tuple, Dict, Mapping, Any, cast, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.providers.mturk.mturk_datastore import MTurkDatastore
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.data_model.assignment import Unit
+ from mephisto.abstractions.providers.mturk.mturk_unit import MTurkUnit
+ from mephisto.abstractions.providers.mturk.mturk_requester import MTurkRequester
+
+from mephisto.operations.logger_core import get_logger
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+
+class MTurkWorker(Worker):
+ """
+ This class represents an individual - namely a person. It maintains components of ongoing identity for a user.
+ """
+
+ # Ensure inherited methods use this level's provider type
+ PROVIDER_TYPE = PROVIDER_TYPE
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ super().__init__(db, db_id, row=row)
+ self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
+ self.PROVIDER_TYPE
+ )
+ self._worker_name = self.worker_name # sandbox workers use a different name
+
+ @classmethod
+ def get_from_mturk_worker_id(
+ cls, db: "MephistoDB", mturk_worker_id: str
+ ) -> Optional["MTurkWorker"]:
+ """Get the MTurkWorker from the given worker_id"""
+ if cls.PROVIDER_TYPE != PROVIDER_TYPE:
+ mturk_worker_id += "_sandbox"
+ workers = db.find_workers(
+ worker_name=mturk_worker_id, provider_type=cls.PROVIDER_TYPE
+ )
+ if len(workers) == 0:
+ # TODO warn?
+ return None
+ return workers[0]
+
+ def get_mturk_worker_id(self):
+ return self._worker_name
+
+ def _get_client(self, requester_name: str) -> Any:
+ """
+ Get an mturk client for usage with mturk_utils
+ """
+ return self.datastore.get_client_for_requester(requester_name)
+
+ def grant_crowd_qualification(
+ self, qualification_name: str, value: int = 1
+ ) -> None:
+ """
+ Grant a qualification by the given name to this worker. Check the local
+ MTurk db to find the matching MTurk qualification to grant, and pass
+ that. If no qualification exists, try to create one.
+
+ In creating a new qualification, Mephisto resolves the ambiguity over which
+ requester to associate that qualification with by using the FIRST requester
+ of the given account type (either `mturk` or `mturk_sandbox`)
+ """
+ mturk_qual_details = self.datastore.get_qualification_mapping(
+ qualification_name
+ )
+ if mturk_qual_details is not None:
+ requester = Requester(self.db, mturk_qual_details["requester_id"])
+ qualification_id = mturk_qual_details["mturk_qualification_id"]
+ else:
+ target_type = (
+ "mturk_sandbox" if qualification_name.endswith("sandbox") else "mturk"
+ )
+ requester = self.db.find_requesters(provider_type=target_type)[0]
+ assert isinstance(
+ requester, MTurkRequester
+ ), "find_requesters must return mturk requester for given provider types"
+ qualification_id = requester._create_new_mturk_qualification(
+ qualification_name
+ )
+ assert isinstance(
+ requester, MTurkRequester
+ ), "Must be an MTurk requester for MTurk quals"
+ client = self._get_client(requester._requester_name)
+ give_worker_qualification(
+ client, self.get_mturk_worker_id(), qualification_id, value
+ )
+ return None
+
+ def revoke_crowd_qualification(self, qualification_name: str) -> None:
+ """
+ Revoke the qualification by the given name from this worker. Check the local
+ MTurk db to find the matching MTurk qualification to revoke, pass if
+ no such qualification exists.
+ """
+ mturk_qual_details = self.datastore.get_qualification_mapping(
+ qualification_name
+ )
+ if mturk_qual_details is None:
+ logger.error(
+ f"No locally stored MTurk qualification to revoke for name {qualification_name}"
+ )
+ return None
+
+ requester = Requester(self.db, mturk_qual_details["requester_id"])
+ assert isinstance(
+ requester, MTurkRequester
+ ), "Must be an MTurk requester from MTurk quals"
+ client = self._get_client(requester._requester_name)
+ qualification_id = mturk_qual_details["mturk_qualification_id"]
+ remove_worker_qualification(
+ client, self.get_mturk_worker_id(), qualification_id
+ )
+ return None
+
+ def bonus_worker(
+ self, amount: float, reason: str, unit: Optional["Unit"] = None
+ ) -> Tuple[bool, str]:
+ """Bonus this worker for work any reason. Return tuple of success and failure message"""
+ if unit is None:
+ # TODO(WISH) implement
+ return False, "bonusing via compensation tasks not yet available"
+
+ unit = cast("MTurkUnit", unit)
+ requester = unit.get_assignment().get_task_run().get_requester()
+ client = self._get_client(requester._requester_name)
+ mturk_assignment_id = unit.get_mturk_assignment_id()
+ assert mturk_assignment_id is not None, "Cannot bonus for a unit with no agent"
+ pay_bonus(
+ client, self._worker_name, amount, mturk_assignment_id, reason, str(uuid4())
+ )
+ return True, ""
+
+ def block_worker(
+ self,
+ reason: str,
+ unit: Optional["Unit"] = None,
+ requester: Optional["Requester"] = None,
+ ) -> Tuple[bool, str]:
+ """Block this worker for a specified reason. Return success of block"""
+ if unit is None and requester is None:
+ # TODO(WISH) soft block from all requesters? Maybe have the master
+ # requester soft block?
+ return (
+ False,
+ "Blocking without a unit or requester not yet supported for MTurkWorkers",
+ )
+ elif unit is not None and requester is None:
+ requester = unit.get_assignment().get_task_run().get_requester()
+ requester = cast("MTurkRequester", requester)
+ client = self._get_client(requester._requester_name)
+ block_worker(client, self._worker_name, reason)
+ return True, ""
+
+ def unblock_worker(self, reason: str, requester: "Requester") -> bool:
+ """unblock a blocked worker for the specified reason. Return success of unblock"""
+ requester = cast("MTurkRequester", requester)
+ client = self._get_client(requester._requester_name)
+ unblock_worker(client, self._worker_name, reason)
+ return True
+
+ def is_blocked(self, requester: "Requester") -> bool:
+ """Determine if a worker is blocked"""
+ requester = cast("MTurkRequester", requester)
+ client = self._get_client(requester._requester_name)
+ return is_worker_blocked(client, self._worker_name)
+
+ def is_eligible(self, task_run: "TaskRun") -> bool:
+ """
+ Qualifications are handled primarily by MTurk, so if a worker is able to get
+ through to be able to access the task, they should be eligible
+ """
+ return True
+
+ @staticmethod
+ def new(db: "MephistoDB", worker_id: str) -> "Worker":
+ return MTurkWorker._register_worker(db, worker_id, PROVIDER_TYPE)
diff --git a/mephisto/abstractions/providers/mturk/provider_type.py b/mephisto/abstractions/providers/mturk/provider_type.py
new file mode 100644
index 000000000..240b3ffa5
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk/provider_type.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+PROVIDER_TYPE = "mturk"
diff --git a/mephisto/abstractions/providers/mturk/utils/__init__.py b/mephisto/abstractions/providers/mturk/utils/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk/utils/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/mephisto/providers/mturk/utils/script_utils.py b/mephisto/abstractions/providers/mturk/utils/script_utils.py
similarity index 94%
rename from mephisto/providers/mturk/utils/script_utils.py
rename to mephisto/abstractions/providers/mturk/utils/script_utils.py
index 81733399d..63e24d6ce 100644
--- a/mephisto/providers/mturk/utils/script_utils.py
+++ b/mephisto/abstractions/providers/mturk/utils/script_utils.py
@@ -6,12 +6,12 @@
from typing import List, Optional, TYPE_CHECKING, Dict
-from mephisto.providers.mturk.mturk_utils import give_worker_qualification
+from mephisto.abstractions.providers.mturk.mturk_utils import give_worker_qualification
from mephisto.data_model.requester import Requester
from mephisto.data_model.assignment import Unit
if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
+ from mephisto.abstractions.database import MephistoDB
def direct_soft_block_mturk_workers(
diff --git a/mephisto/providers/mturk/wrap_crowd_source.js b/mephisto/abstractions/providers/mturk/wrap_crowd_source.js
similarity index 100%
rename from mephisto/providers/mturk/wrap_crowd_source.js
rename to mephisto/abstractions/providers/mturk/wrap_crowd_source.js
diff --git a/mephisto/abstractions/providers/mturk_sandbox/__init__.py b/mephisto/abstractions/providers/mturk_sandbox/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk_sandbox/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/mephisto/abstractions/providers/mturk_sandbox/provider_type.py b/mephisto/abstractions/providers/mturk_sandbox/provider_type.py
new file mode 100644
index 000000000..2c944bfe9
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk_sandbox/provider_type.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+PROVIDER_TYPE = "mturk_sandbox"
diff --git a/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_agent.py b/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_agent.py
new file mode 100644
index 000000000..8bf47096d
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_agent.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.providers.mturk_sandbox.provider_type import PROVIDER_TYPE
+from mephisto.abstractions.providers.mturk.mturk_agent import MTurkAgent
+
+from typing import Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.providers.mturk.requester import MTurkRequester
+ from mephisto.data_model.assignment import Unit
+ from mephisto.data_model.agent import Agent
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.worker import Worker
+
+
+class SandboxMTurkAgent(MTurkAgent):
+ """
+ Wrapper for a regular MTurk agent that will only communicate with sandbox
+ """
+
+ # Ensure inherited methods use this level's provider type
+ PROVIDER_TYPE = PROVIDER_TYPE
+
+ def _get_client(self) -> Any:
+ """
+ Get an mturk client for usage with mturk_utils for this agent
+ """
+ unit = self.get_unit()
+ requester: "MTurkRequester" = unit.get_requester()
+ return self.datastore.get_sandbox_client_for_requester(
+ requester._requester_name
+ )
+
+ @staticmethod
+ def new(db: "MephistoDB", worker: "Worker", unit: "Unit") -> "Agent":
+ """Create an agent for this worker to be used for work on the given Unit."""
+ return SandboxMTurkAgent._register_agent(db, worker, unit, PROVIDER_TYPE)
diff --git a/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_provider.py b/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_provider.py
new file mode 100644
index 000000000..b9c007d77
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_provider.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.providers.mturk_sandbox.provider_type import PROVIDER_TYPE
+from mephisto.abstractions.providers.mturk.mturk_provider import (
+ MTurkProvider,
+ MTurkProviderArgs,
+)
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_agent import (
+ SandboxMTurkAgent,
+)
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_requester import (
+ SandboxMTurkRequester,
+)
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_unit import (
+ SandboxMTurkUnit,
+)
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_worker import (
+ SandboxMTurkWorker,
+)
+from mephisto.operations.registry import register_mephisto_abstraction
+
+import os
+from dataclasses import dataclass
+
+from typing import Any, ClassVar, Type, List, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.assignment import Unit
+ from mephisto.data_model.worker import Worker
+ from mephisto.data_model.requester import Requester
+ from mephisto.data_model.agent import Agent
+
+
+@dataclass
+class SandboxMTurkProviderArgs(MTurkProviderArgs):
+ """Provider args for a sandbox MTurk provider"""
+
+ _provider_type: str = PROVIDER_TYPE
+
+
+@register_mephisto_abstraction()
+class SandboxMTurkProvider(MTurkProvider):
+ """
+ Mock implementation of a CrowdProvider that stores everything
+ in a local state in the class for use in tests.
+ """
+
+ # Ensure inherited methods use this level's provider type
+ PROVIDER_TYPE = PROVIDER_TYPE
+
+ UnitClass: ClassVar[Type["Unit"]] = SandboxMTurkUnit
+
+ RequesterClass: ClassVar[Type["Requester"]] = SandboxMTurkRequester
+
+ WorkerClass: ClassVar[Type["Worker"]] = SandboxMTurkWorker
+
+ AgentClass: ClassVar[Type["Agent"]] = SandboxMTurkAgent
+
+ ArgsClass = SandboxMTurkProviderArgs
+
+ SUPPORTED_TASK_TYPES: ClassVar[List[str]] = [
+ # TODO
+ ]
+
+ def _get_client(self, requester_name: str) -> Any:
+ """
+ Get an mturk client for usage with mturk_utils
+ """
+ return self.datastore.get_sandbox_client_for_requester(requester_name)
+
+ @classmethod
+ def get_wrapper_js_path(cls):
+ """
+ Return the path to the `wrap_crowd_source.js` file for this
+ provider to be deployed to the server
+ """
+ return os.path.join(os.path.dirname(__file__), "wrap_crowd_source.js")
+
+ def cleanup_qualification(self, qualification_name: str) -> None:
+ """Remove the qualification from the sandbox server"""
+ return super().cleanup_qualification(f"{qualification_name}_sandbox")
diff --git a/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_requester.py b/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_requester.py
new file mode 100644
index 000000000..14e53899b
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_requester.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.providers.mturk.mturk_requester import MTurkRequester
+from mephisto.abstractions.providers.mturk_sandbox.provider_type import PROVIDER_TYPE
+
+from typing import Any, Optional, Mapping, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.requester import Requester
+ from mephisto.abstractions.providers.mturk.mturk_datastore import MTurkDatastore
+
+
+class SandboxMTurkRequester(MTurkRequester):
+ """Wrapper around regular requester that handles removing the appended "sandbox" name"""
+
+ # Ensure inherited methods use this level's provider type
+ PROVIDER_TYPE = PROVIDER_TYPE
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ super().__init__(db, db_id, row=row)
+ self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
+ self.PROVIDER_TYPE
+ )
+ # Use _requester_name to preserve sandbox behavior which
+ # utilizes a different requester_name
+ assert self.requester_name.endswith(
+ "_sandbox"
+ ), f"{self.requester_name} is not a sandbox requester"
+ self._requester_name = self.requester_name[:-8]
+
+ def _get_client(self, requester_name: str) -> Any:
+ """
+ Get an mturk client for usage with mturk_utils
+ """
+ return self.datastore.get_sandbox_client_for_requester(requester_name)
+
+ def is_sandbox(self) -> bool:
+ """
+ Determine if this is a requester on sandbox
+ """
+ return True
+
+ # Required functions for a Requester implementation
+
+ @staticmethod
+ def new(db: "MephistoDB", requester_name: str) -> "Requester":
+ if not requester_name.endswith("_sandbox"):
+ requester_name += "_sandbox"
+ return SandboxMTurkRequester._register_requester(
+ db, requester_name, PROVIDER_TYPE
+ )
diff --git a/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_unit.py b/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_unit.py
new file mode 100644
index 000000000..2b21f5433
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_unit.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from datetime import datetime
+
+from mephisto.abstractions.providers.mturk.mturk_unit import MTurkUnit
+from mephisto.abstractions.providers.mturk_sandbox.provider_type import PROVIDER_TYPE
+from typing import Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.assignment import Unit
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.assignment import Assignment
+
+
+class SandboxMTurkUnit(MTurkUnit):
+ """
+ This class tracks the status of an individual worker's contribution to a
+ higher level assignment. It is the smallest 'unit' of work to complete
+ the assignment, and this class is only responsible for checking
+ the status of that work itself being done.
+ """
+
+ # Ensure inherited methods use this level's provider type
+ PROVIDER_TYPE = PROVIDER_TYPE
+
+ def _get_client(self, requester_name: str) -> Any:
+ """
+ Get an mturk client for usage with mturk_utils
+ """
+ return self.datastore.get_sandbox_client_for_requester(requester_name)
+
+ @staticmethod
+ def new(
+ db: "MephistoDB", assignment: "Assignment", index: int, pay_amount: float
+ ) -> "Unit":
+ """Create a Unit for the given assignment"""
+ return SandboxMTurkUnit._register_unit(
+ db, assignment, index, pay_amount, PROVIDER_TYPE
+ )
diff --git a/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_worker.py b/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_worker.py
new file mode 100644
index 000000000..9ee4f87bb
--- /dev/null
+++ b/mephisto/abstractions/providers/mturk_sandbox/sandbox_mturk_worker.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.providers.mturk.mturk_worker import MTurkWorker
+from mephisto.abstractions.providers.mturk_sandbox.provider_type import PROVIDER_TYPE
+
+from typing import Any, Mapping, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.providers.mturk.mturk_datastore import MTurkDatastore
+ from mephisto.data_model.worker import Worker
+ from mephisto.abstractions.database import MephistoDB
+
+
+class SandboxMTurkWorker(MTurkWorker):
+ """
+ This class represents an individual - namely a person. It maintains components of ongoing identity for a user.
+ """
+
+ # Ensure inherited methods use this level's provider type
+ PROVIDER_TYPE = PROVIDER_TYPE
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ super().__init__(db, db_id, row=row)
+ self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
+ self.PROVIDER_TYPE
+ )
+ # sandbox workers use a different name
+ self._worker_name = self.worker_name[:-8]
+
+ def grant_crowd_qualification(
+ self, qualification_name: str, value: int = 1
+ ) -> None:
+ """
+ Grant a qualification by the given name to this worker. Check the local
+ MTurk db to find the matching MTurk qualification to grant, and pass
+ that. If no qualification exists, try to create one.
+ """
+ return super().grant_crowd_qualification(qualification_name + "_sandbox", value)
+
+ def revoke_crowd_qualification(self, qualification_name: str) -> None:
+ """
+ Revoke the qualification by the given name from this worker. Check the local
+ MTurk db to find the matching MTurk qualification to revoke, pass if
+ no such qualification exists.
+ """
+ return super().revoke_crowd_qualification(qualification_name + "_sandbox")
+
+ def _get_client(self, requester_name: str) -> Any:
+ """
+ Get an mturk client for usage with mturk_utils
+ """
+ return self.datastore.get_sandbox_client_for_requester(requester_name)
+
+ @staticmethod
+ def new(db: "MephistoDB", worker_id: str) -> "Worker":
+ return MTurkWorker._register_worker(db, worker_id + "_sandbox", PROVIDER_TYPE)
diff --git a/mephisto/providers/mturk_sandbox/wrap_crowd_source.js b/mephisto/abstractions/providers/mturk_sandbox/wrap_crowd_source.js
similarity index 100%
rename from mephisto/providers/mturk_sandbox/wrap_crowd_source.js
rename to mephisto/abstractions/providers/mturk_sandbox/wrap_crowd_source.js
diff --git a/mephisto/client/api.py b/mephisto/client/api.py
index 371c111ca..1be0fcf54 100644
--- a/mephisto/client/api.py
+++ b/mephisto/client/api.py
@@ -6,12 +6,12 @@
from flask import Blueprint, jsonify, request
from flask import current_app as app
-from mephisto.data_model.database import EntryAlreadyExistsException
-from mephisto.data_model.assignment_state import AssignmentState
-from mephisto.data_model.task import TaskRun
+from mephisto.abstractions.database import EntryAlreadyExistsException
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.data_model.task_run import TaskRun
from mephisto.data_model.assignment import Assignment, Unit
from mephisto.core.argparse_parser import get_extra_argument_dicts, parse_arg_dict
-from mephisto.core.registry import (
+from mephisto.operations.registry import (
get_blueprint_from_type,
get_crowd_provider_from_type,
get_architect_from_type,
diff --git a/mephisto/client/cli.py b/mephisto/client/cli.py
index b2cdf11ba..c486c6020 100644
--- a/mephisto/client/cli.py
+++ b/mephisto/client/cli.py
@@ -48,8 +48,8 @@ def review(review_app_dir, port, output, output_method, csv_headers, json, debug
@cli.command("check")
def check():
"""Checks that mephisto is setup correctly"""
- from mephisto.core.local_database import LocalMephistoDB
- from mephisto.core.utils import get_mock_requester
+ from mephisto.abstractions.databases.local_database import LocalMephistoDB
+ from mephisto.operations.utils import get_mock_requester
try:
db = LocalMephistoDB()
@@ -64,7 +64,7 @@ def check():
@cli.command("requesters")
def list_requesters():
"""Lists all registered requesters"""
- from mephisto.core.local_database import LocalMephistoDB
+ from mephisto.abstractions.databases.local_database import LocalMephistoDB
from tabulate import tabulate
db = LocalMephistoDB()
@@ -81,8 +81,8 @@ def register_provider(args):
click.echo("Usage: mephisto register arg1=value arg2=value")
return
- from mephisto.core.local_database import LocalMephistoDB
- from mephisto.core.registry import get_crowd_provider_from_type
+ from mephisto.abstractions.databases.local_database import LocalMephistoDB
+ from mephisto.operations.registry import get_crowd_provider_from_type
from mephisto.core.argparse_parser import parse_arg_dict, get_extra_argument_dicts
provider_type, requester_args = args[0], args[1:]
@@ -130,7 +130,7 @@ def get_help_arguments(args):
)
return
- from mephisto.core.registry import (
+ from mephisto.operations.registry import (
get_blueprint_from_type,
get_crowd_provider_from_type,
get_architect_from_type,
diff --git a/mephisto/client/server.py b/mephisto/client/full/server.py
similarity index 86%
rename from mephisto/client/server.py
rename to mephisto/client/full/server.py
index 42211a8e0..d44564df3 100644
--- a/mephisto/client/server.py
+++ b/mephisto/client/full/server.py
@@ -7,14 +7,14 @@
from flask import Flask, send_file, jsonify
from mephisto.client.config import Config
from mephisto.client.api import api
-from mephisto.core.operator import Operator
-from mephisto.core.local_database import LocalMephistoDB
+from mephisto.operations.operator import Operator
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
import os
import atexit
import signal
-app = Flask(__name__, static_url_path="/static", static_folder="../webapp/build/static")
+app = Flask(__name__, static_url_path="/static", static_folder="webapp/build/static")
app.config.from_object(Config)
app.register_blueprint(api, url_prefix="/api/v1")
@@ -31,7 +31,7 @@
@app.route("/", defaults={"path": "index.html"})
@app.route("/")
def index(path):
- return send_file(os.path.join("..", "webapp", "build", "index.html"))
+ return send_file(os.path.join("webapp", "build", "index.html"))
@app.after_request
diff --git a/mephisto/webapp/.gitattributes b/mephisto/client/full/webapp/.gitattributes
similarity index 100%
rename from mephisto/webapp/.gitattributes
rename to mephisto/client/full/webapp/.gitattributes
diff --git a/mephisto/webapp/.gitignore b/mephisto/client/full/webapp/.gitignore
similarity index 100%
rename from mephisto/webapp/.gitignore
rename to mephisto/client/full/webapp/.gitignore
diff --git a/mephisto/webapp/README.md b/mephisto/client/full/webapp/README.md
similarity index 100%
rename from mephisto/webapp/README.md
rename to mephisto/client/full/webapp/README.md
diff --git a/mephisto/webapp/build/asset-manifest.json b/mephisto/client/full/webapp/build/asset-manifest.json
similarity index 100%
rename from mephisto/webapp/build/asset-manifest.json
rename to mephisto/client/full/webapp/build/asset-manifest.json
diff --git a/mephisto/webapp/build/index.html b/mephisto/client/full/webapp/build/index.html
similarity index 100%
rename from mephisto/webapp/build/index.html
rename to mephisto/client/full/webapp/build/index.html
diff --git a/mephisto/webapp/build/precache-manifest.6a3f8be9c9b4eda91b48c53412c4ac04.js b/mephisto/client/full/webapp/build/precache-manifest.6a3f8be9c9b4eda91b48c53412c4ac04.js
similarity index 100%
rename from mephisto/webapp/build/precache-manifest.6a3f8be9c9b4eda91b48c53412c4ac04.js
rename to mephisto/client/full/webapp/build/precache-manifest.6a3f8be9c9b4eda91b48c53412c4ac04.js
diff --git a/mephisto/client/full/webapp/build/precache-manifest.9f9697fc33c2a769605084eb8123c827.js b/mephisto/client/full/webapp/build/precache-manifest.9f9697fc33c2a769605084eb8123c827.js
new file mode 100644
index 000000000..2987066ca
--- /dev/null
+++ b/mephisto/client/full/webapp/build/precache-manifest.9f9697fc33c2a769605084eb8123c827.js
@@ -0,0 +1,34 @@
+self.__precacheManifest = (self.__precacheManifest || []).concat([
+ {
+ "revision": "413929c22f5eb0b865838e3cf75e0cb2",
+ "url": "/index.html"
+ },
+ {
+ "revision": "e60a7159900a72987567",
+ "url": "/static/css/main.f511f1d3.chunk.css"
+ },
+ {
+ "revision": "54446ba46b7001c87af6",
+ "url": "/static/js/2.c380e4ee.chunk.js"
+ },
+ {
+ "revision": "53b91fb6912e14c9689b5be87306d93a",
+ "url": "/static/js/2.c380e4ee.chunk.js.LICENSE"
+ },
+ {
+ "revision": "e60a7159900a72987567",
+ "url": "/static/js/main.d1c9df08.chunk.js"
+ },
+ {
+ "revision": "e0acb5af05f59d50df01",
+ "url": "/static/js/runtime-main.3d6e97dd.js"
+ },
+ {
+ "revision": "cf23121829070fedf98e5caa6585792f",
+ "url": "/static/media/M.cf231218.svg"
+ },
+ {
+ "revision": "44730b5bd732ae067bd4f9f8abe88757",
+ "url": "/static/media/sample.44730b5b.png"
+ }
+]);
\ No newline at end of file
diff --git a/mephisto/webapp/build/robots.txt b/mephisto/client/full/webapp/build/robots.txt
similarity index 100%
rename from mephisto/webapp/build/robots.txt
rename to mephisto/client/full/webapp/build/robots.txt
diff --git a/mephisto/webapp/build/service-worker.js b/mephisto/client/full/webapp/build/service-worker.js
similarity index 100%
rename from mephisto/webapp/build/service-worker.js
rename to mephisto/client/full/webapp/build/service-worker.js
diff --git a/mephisto/webapp/build/static/css/2.d6c513eb.chunk.css b/mephisto/client/full/webapp/build/static/css/2.d6c513eb.chunk.css
similarity index 100%
rename from mephisto/webapp/build/static/css/2.d6c513eb.chunk.css
rename to mephisto/client/full/webapp/build/static/css/2.d6c513eb.chunk.css
diff --git a/mephisto/webapp/build/static/css/2.d6c513eb.chunk.css.map b/mephisto/client/full/webapp/build/static/css/2.d6c513eb.chunk.css.map
similarity index 100%
rename from mephisto/webapp/build/static/css/2.d6c513eb.chunk.css.map
rename to mephisto/client/full/webapp/build/static/css/2.d6c513eb.chunk.css.map
diff --git a/mephisto/webapp/build/static/css/main.ec5c0f52.chunk.css b/mephisto/client/full/webapp/build/static/css/main.ec5c0f52.chunk.css
similarity index 100%
rename from mephisto/webapp/build/static/css/main.ec5c0f52.chunk.css
rename to mephisto/client/full/webapp/build/static/css/main.ec5c0f52.chunk.css
diff --git a/mephisto/webapp/build/static/css/main.ec5c0f52.chunk.css.map b/mephisto/client/full/webapp/build/static/css/main.ec5c0f52.chunk.css.map
similarity index 100%
rename from mephisto/webapp/build/static/css/main.ec5c0f52.chunk.css.map
rename to mephisto/client/full/webapp/build/static/css/main.ec5c0f52.chunk.css.map
diff --git a/mephisto/client/full/webapp/build/static/css/main.f511f1d3.chunk.css b/mephisto/client/full/webapp/build/static/css/main.f511f1d3.chunk.css
new file mode 100644
index 000000000..36b8c556a
--- /dev/null
+++ b/mephisto/client/full/webapp/build/static/css/main.f511f1d3.chunk.css
@@ -0,0 +1,2 @@
+body{margin:0;padding:0;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}code{font-family:source-code-pro,Menlo,Monaco,Consolas,Courier New,monospace}.App{text-align:center;background-color:#eaf3fd;display:flex;min-height:100vh;flex-direction:column}*{box-sizing:border-box}.App-header{background-color:#fff;display:flex;font-size:30px;color:#345;height:80px;margin-bottom:30px;border-bottom:1px solid #c7ddf3}.App-header,.App-header a{flex-direction:row;align-items:center;justify-content:center}.App-header a{text-decoration:none;color:inherit;display:inline-flex}.App-header a:hover{color:#5f9ea0}.App-header a:hover .logo{position:relative;-webkit-animation:emphasizeLogo .3s cubic-bezier(.5,0,0,1);animation:emphasizeLogo .3s cubic-bezier(.5,0,0,1);-webkit-animation-fill-mode:forwards;animation-fill-mode:forwards}.logo{margin-right:12px}@-webkit-keyframes emphasizeLogo{0%{-webkit-transform:scale(1) rotate(0);transform:scale(1) rotate(0)}to{-webkit-transform:scale(1.2) rotate(-15deg);transform:scale(1.2) rotate(-15deg)}}@keyframes emphasizeLogo{0%{-webkit-transform:scale(1) rotate(0);transform:scale(1) rotate(0)}to{-webkit-transform:scale(1.2) rotate(-15deg);transform:scale(1.2) rotate(-15deg)}}p{line-height:1.4em;margin:0}.card.create-task,.card.export-data{font-size:14px}.card.create-task p,.card.export-data p,.card p{margin-bottom:20px}.App-link{display:block;margin-top:5px;font-size:14px;font-weight:700}.page-body{flex:1 1;flex-direction:column;align-items:center;justify-content:center;overflow:auto}a{color:#1a809c}.card-container{display:flex;flex-direction:row;width:100%;align-items:flex-start}.card-outer-container{display:inline-flex}.card{background-color:#fff;box-shadow:0 15px 25px #cee1f5;padding:20px;border-radius:20px;font-size:16px;text-align:left;margin:15px;min-width:200px;max-width:300px;color:#345}.card h3{font-weight:700;margin:0 0 20px;color:#8b9db1;font-size:18px}.card h3 em{font-style:normal;color:#345}span.badge{border-radius:20px;background-color:#345;width:20px;color:#fff;display:inline-flex;text-align:center;font-size:10px;height:20px;align-items:center;justify-content:center;vertical-align:text-bottom;margin-right:7px}ul{padding-left:20px}ul li{margin-bottom:5px}.live-task{padding:10px 20px;margin:0 -20px 10px;border-top:1px solid #d6ecd7;border-bottom:1px solid #fff;background-image:-webkit-gradient(linear,left top,left bottom,from(#f0f9f7),to(rgba(240,249,247,0)));background-image:linear-gradient(180deg,#f0f9f7,rgba(240,249,247,0))}.live-task.interactive{cursor:pointer}.live-task.interactive:hover{background-color:#e0f6ff;border-bottom:1px solid #3ec9ff;background-image:none;border-top:1px solid #3ec9ff;-webkit-transition:border-color .3s,background .3s;transition:border-color .3s,background .3s}.live-task.interactive:hover .title{color:#0dadea;-webkit-transition:color .3s;transition:color .3s}.live-task .title{font-weight:700;color:#4fa254}.hyperparameters{color:#888;font-size:12px;word-break:break-word;font-style:italic;margin-bottom:4px}.hyperparameter{margin-right:5px;white-space:nowrap;display:inline-block}.hyperparameter .value{font-weight:700;color:#666}.hyperparameter .key,.hyperparameter .value{white-space:nowrap}.details{font-size:13px;margin-top:10px}.btn{padding:7px;cursor:pointer;font-size:14px;background-color:#334355;border:1px solid #080808;color:#fff;font-weight:700;margin-top:9px;width:100%;text-decoration:none}.bar{background-color:#b8d7ff;height:14px;margin-bottom:2px}.metrics{display:flex;flex-direction:row;justify-content:center}.metric{margin:0;text-align:center;padding:10px;font-size:16px;color:#333;flex:1 1}.metrics.highlight-first .metric:first-child{font-weight:700}.metric label{display:block;font-size:12px}.metrics.anticipate-double label{margin-top:10px}p.warning,p a{font-weight:700}p.warning{background-color:#ffffd1;padding:5px;border:1px solid #e2d108;margin:5px 0}code{white-space:nowrap}.card{-webkit-animation:fadeIn .3s cubic-bezier(.5,0,0,1);animation:fadeIn .3s cubic-bezier(.5,0,0,1);-webkit-animation-fill-mode:forwards;animation-fill-mode:forwards;opacity:0}.card:first-child{-webkit-animation-delay:0s;animation-delay:0s}.card:nth-child(2){-webkit-animation-delay:.1s;animation-delay:.1s}.card:nth-child(3){-webkit-animation-delay:.2s;animation-delay:.2s}.card:nth-child(4){-webkit-animation-delay:.3s;animation-delay:.3s}@-webkit-keyframes fadeIn{0%{opacity:0;-webkit-transform:scale(.9) translateY(80px);transform:scale(.9) translateY(80px)}60%{-webkit-transform:translateY(-2px);transform:translateY(-2px)}to{opacity:1;-webkit-transform:scale(1) translateY(0);transform:scale(1) translateY(0)}}@keyframes fadeIn{0%{opacity:0;-webkit-transform:scale(.9) translateY(80px);transform:scale(.9) translateY(80px)}60%{-webkit-transform:translateY(-2px);transform:translateY(-2px)}to{opacity:1;-webkit-transform:scale(1) translateY(0);transform:scale(1) translateY(0)}}.sandbox{font-size:10px;text-transform:uppercase;background-color:#f3d9b7;color:#694903;padding:2px;margin-left:3px;vertical-align:initial;float:right}a.unstyled{text-decoration:inherit;color:inherit}.btn.rounded{width:auto;border-radius:20px;border:none;padding:10px 20px}.user_info{padding:10px 20px;margin:0 -20px 10px}.user_info .title{color:#6e91b3;font-size:16px;font-weight:700}.task-list{display:flex;flex-direction:column;border-right:1px solid #cee1f5;margin:-20px 0 -20px -20px;padding:20px 0;align-content:flex-start}.task-list-item{display:inline-block;padding:15px 30px;max-width:400px;min-width:200px;text-align:left}.task-list-item.selected,.task-list-item.selected:hover{color:#fff;background-color:#345}.task-list-item:hover{background-color:#ecf0f3;cursor:pointer}.name{font-weight:700}.tags span{margin-right:7px;display:inline-block;font-size:13px}.search{margin:10px 20px;padding:10px;border-radius:10px;border:none;background-color:#eff3f7;font-size:inherit;font-weight:700;color:inherit}.search::-webkit-input-placeholder{color:#a4b5c7;font-weight:400}.search::-moz-placeholder{color:#a4b5c7;font-weight:400}.search::-ms-input-placeholder{color:#a4b5c7;font-weight:400}.search::placeholder{color:#a4b5c7;font-weight:400}.search:focus{outline:none;box-shadow:inset 0 0 2px 1px #c8d8f5;background-color:#f7f7f7}.text-container,textarea{margin:0;padding:10px;border-radius:10px;border:none;background-color:#eff3f7;font-size:inherit;font-weight:700;color:inherit;font-family:monospace;width:100%}textarea:focus{outline:none;box-shadow:inset 0 0 2px 1px #c8d8f5}.task-description{padding:0 30px;flex:1 1}.task-description h1{margin:10px 0 5px}.card .task-description p{color:#68879e;font-size:21px}.task-description h3{margin-bottom:10px}.task-description h3 code{background-color:#f2f2f2;font-size:16px;padding:0 2px}.controls{margin-top:40px}.btn.inverse{background-color:#f5f5f5;color:#324456}.btn.inverse:hover{background-color:#efefef;color:#324456}.ace_editor .ace_marker-layer .ace_bracket,.ace_hidden-cursors .ace_cursor{display:none}.controls{margin-bottom:40px;background-color:#fbfbfb;border:1px solid #f5f5f5;padding:20px;border-radius:20px;box-shadow:0 3px 3px 0 rgba(0,0,0,.1)}.hero{font-size:45px;font-weight:700;color:#033c3e;line-height:1.2em;padding:80px 0;-webkit-animation:fadeUp 2s cubic-bezier(0,1,0,1);animation:fadeUp 2s cubic-bezier(0,1,0,1)}@-webkit-keyframes fadeUp{0%{opacity:0;-webkit-transform:translateY(100px);transform:translateY(100px)}to{opacity:1;-webkit-transform:translateY(0);transform:translateY(0)}}@keyframes fadeUp{0%{opacity:0;-webkit-transform:translateY(100px);transform:translateY(100px)}to{opacity:1;-webkit-transform:translateY(0);transform:translateY(0)}}
+/*# sourceMappingURL=main.f511f1d3.chunk.css.map */
\ No newline at end of file
diff --git a/mephisto/client/full/webapp/build/static/css/main.f511f1d3.chunk.css.map b/mephisto/client/full/webapp/build/static/css/main.f511f1d3.chunk.css.map
new file mode 100644
index 000000000..084c4ea32
--- /dev/null
+++ b/mephisto/client/full/webapp/build/static/css/main.f511f1d3.chunk.css.map
@@ -0,0 +1 @@
+{"version":3,"sources":["index.css","App.css","review.css","taskgallery.css","splash.css"],"names":[],"mappings":"AAAA,KACE,QAAS,CACT,SAAU,CACV,mIAEY,CACZ,kCAAmC,CACnC,iCACF,CAEA,KACE,uEAEF,CCbA,KACE,iBAAkB,CAClB,wBAAyB,CACzB,YAAa,CACb,gBAAiB,CACjB,qBACF,CAEA,EACE,qBACF,CAEA,YACE,qBAAuB,CACvB,YAAa,CAIb,cAAe,CACf,UAAc,CACd,WAAY,CACZ,kBAAmB,CACnB,+BACF,CAEA,0BAVE,kBAAmB,CACnB,kBAAmB,CACnB,sBAeF,CAPA,cACE,oBAAqB,CACrB,aAAc,CACd,mBAIF,CAEA,oBACE,aACF,CAEA,0BACE,iBAAkB,CAClB,0DAAwD,CAAxD,kDAAwD,CACxD,oCAA6B,CAA7B,4BACF,CAOA,MACE,iBACF,CAEA,iCACE,GACE,oCAAgC,CAAhC,4BACF,CACA,GACE,2CAAyC,CAAzC,mCACF,CACF,CAPA,yBACE,GACE,oCAAgC,CAAhC,4BACF,CACA,GACE,2CAAyC,CAAzC,mCACF,CACF,CAEA,EACE,iBAAkB,CAClB,QACF,CAEA,oCAEE,cACF,CAOA,gDACE,kBACF,CAEA,UACE,aAAc,CACd,cAAe,CACf,cAAe,CACf,eACF,CAEA,WAEE,QAAO,CACP,qBAAsB,CACtB,kBAAmB,CACnB,sBAAuB,CACvB,aACF,CAEA,EACE,aACF,CAEA,gBACE,YAAa,CACb,kBAAmB,CACnB,UAAW,CACX,sBACF,CAEA,sBACE,mBACF,CAEA,MACE,qBAAuB,CACvB,8BAAiC,CACjC,YAAa,CACb,kBAAmB,CACnB,cAAe,CACf,eAAgB,CAChB,WAAY,CAEZ,eAAgB,CAChB,eAAgB,CAChB,UACF,CAEA,SACE,eAAiB,CACjB,eAAgB,CAChB,aAAc,CACd,cACF,CAEA,YACE,iBAAkB,CAClB,UACF,CAEA,WACE,kBAAmB,CACnB,qBAAyB,CACzB,UAAW,CACX,UAAY,CACZ,mBAAoB,CACpB,iBAAkB,CAClB,cAAe,CACf,WAAY,CACZ,kBAAmB,CACnB,sBAAuB,CACvB,0BAA2B,CAC3B,gBACF,CAEA,GACE,iBACF,CACA,MACE,iBACF,CAEA,WAKE,iBAA4B,CAC5B,mBAAoB,CACpB,4BAA6B,CAC7B,4BAA+C,CAC/C,oGAIC,CAJD,oEAKF,CAEA,uBACE,cACF,CAEA,6BACE,wBAAyB,CACzB,+BAAgC,CAChC,qBAAsB,CACtB,4BAA6B,CAC7B,kDAA8C,CAA9C,0CACF,CAEA,oCACE,aAAc,CACd,4BAAsB,CAAtB,oBACF,CAEA,kBACE,eAAiB,CACjB,aAGF,CAEA,iBACE,UAAW,CACX,cAAe,CACf,qBAAsB,CACtB,iBAAkB,CAClB,iBACF,CACA,gBACE,gBAAiB,CACjB,kBAAmB,CACnB,oBACF,CACA,uBACE,eAAiB,CACjB,UACF,CACA,4CAEE,kBACF,CAEA,SACE,cAAe,CACf,eACF,CAEA,KACE,WAAY,CACZ,cAAe,CACf,cAAe,CACf,wBAAyB,CACzB,wBAAyB,CACzB,UAAY,CACZ,eAAiB,CAEjB,cAAe,CAEf,UAAW,CACX,oBACF,CAEA,KACE,wBAAyB,CACzB,WAAY,CACZ,iBACF,CAEA,SACE,YAAa,CACb,kBAAmB,CACnB,sBACF,CAEA,QACE,QAAmB,CACnB,iBAAkB,CAGlB,YAAa,CACb,cAAe,CACf,UAAW,CACX,QACF,CAEA,6CACE,eACF,CAEA,cACE,aAAc,CACd,cACF,CAEA,iCACE,eACF,CAMA,cAHE,eASF,CANA,UACE,wBAAyB,CACzB,WAAY,CACZ,wBAAyB,CACzB,YAEF,CAEA,KACE,kBACF,CAEA,MACE,mDAAiD,CAAjD,2CAAiD,CACjD,oCAA6B,CAA7B,4BAA6B,CAC7B,SACF,CAEA,kBACE,0BAAmB,CAAnB,kBACF,CACA,mBACE,2BAAqB,CAArB,mBACF,CACA,mBACE,2BAAqB,CAArB,mBACF,CACA,mBACE,2BAAqB,CAArB,mBACF,CAEA,0BACE,GACE,SAAU,CACV,4CAA2C,CAA3C,oCACF,CACA,IACE,kCAA2B,CAA3B,0BACF,CACA,GACE,SAAU,CACV,wCAAsC,CAAtC,gCACF,CACF,CAZA,kBACE,GACE,SAAU,CACV,4CAA2C,CAA3C,oCACF,CACA,IACE,kCAA2B,CAA3B,0BACF,CACA,GACE,SAAU,CACV,wCAAsC,CAAtC,gCACF,CACF,CAEA,SACE,cAAe,CACf,wBAAyB,CACzB,wBAAyB,CACzB,aAAsB,CACtB,WAAY,CACZ,eAAgB,CAChB,sBAAwB,CACxB,WACF,CAEA,WACE,uBAAwB,CACxB,aACF,CAEA,aACE,UAAW,CACX,kBAAmB,CACnB,WAAY,CACZ,iBACF,CC3VA,WACE,iBAA4B,CAC5B,mBACF,CAEA,kBACE,aAAc,CACd,cAAe,CACf,eACF,CCTA,WACE,YAAa,CACb,qBAAsB,CACtB,8BAA+B,CAC/B,0BAA6B,CAC7B,cAAe,CACf,wBAEF,CAEA,gBACE,oBAAqB,CACrB,iBAAkB,CAClB,eAAgB,CAChB,eAAgB,CAChB,eACF,CAEA,wDAEE,UAAY,CACZ,qBACF,CAEA,sBACE,wBAAyB,CACzB,cACF,CAEA,MACE,eACF,CAEA,WACE,gBAAiB,CACjB,oBAAqB,CACrB,cACF,CAEA,QACE,gBAAiB,CACjB,YAAa,CACb,kBAAmB,CACnB,WAAY,CACZ,wBAAyB,CACzB,iBAAkB,CAClB,eAAiB,CACjB,aACF,CAEA,mCACE,aAAc,CACd,eACF,CAHA,0BACE,aAAc,CACd,eACF,CAHA,+BACE,aAAc,CACd,eACF,CAHA,qBACE,aAAc,CACd,eACF,CAEA,cACE,YAAa,CACb,oCAAyC,CACzC,wBACF,CAEA,yBAEE,QAAW,CACX,YAAa,CACb,kBAAmB,CACnB,WAAY,CACZ,wBAAyB,CACzB,iBAAkB,CAClB,eAAiB,CACjB,aAAc,CACd,qBAAsB,CACtB,UACF,CAEA,eACE,YAAa,CACb,oCACF,CAEA,kBACE,cAAe,CACf,QACF,CAEA,qBACE,iBACF,CAEA,0BACE,aAAc,CACd,cACF,CAEA,qBACE,kBACF,CAEA,0BACE,wBAAyB,CACzB,cAAe,CACf,aACF,CAEA,UACE,eACF,CAEA,aACE,wBAAyB,CACzB,aACF,CAEA,mBACE,wBAAyB,CACzB,aACF,CAMA,2EACE,YACF,CAEA,UACE,kBAAmB,CACnB,wBAAyB,CAEzB,wBAAyB,CAEzB,YAAoB,CACpB,kBAAmB,CACnB,qCAGF,CCzIA,MACE,cAAe,CACf,eAAiB,CACjB,aAAc,CACd,iBAAkB,CAClB,cAAe,CACf,iDAA6C,CAA7C,yCACF,CAaA,0BACE,GACE,SAAU,CACV,mCAA4B,CAA5B,2BACF,CACA,GACE,SAAU,CACV,+BAA0B,CAA1B,uBACF,CACF,CATA,kBACE,GACE,SAAU,CACV,mCAA4B,CAA5B,2BACF,CACA,GACE,SAAU,CACV,+BAA0B,CAA1B,uBACF,CACF","file":"main.f511f1d3.chunk.css","sourcesContent":["body {\n margin: 0;\n padding: 0;\n font-family: -apple-system, BlinkMacSystemFont, \"Segoe UI\", \"Roboto\", \"Oxygen\",\n \"Ubuntu\", \"Cantarell\", \"Fira Sans\", \"Droid Sans\", \"Helvetica Neue\",\n sans-serif;\n -webkit-font-smoothing: antialiased;\n -moz-osx-font-smoothing: grayscale;\n}\n\ncode {\n font-family: source-code-pro, Menlo, Monaco, Consolas, \"Courier New\",\n monospace;\n}\n",".App {\n text-align: center;\n background-color: #eaf3fd;\n display: flex;\n min-height: 100vh;\n flex-direction: column;\n}\n\n* {\n box-sizing: border-box;\n}\n\n.App-header {\n background-color: white;\n display: flex;\n flex-direction: row;\n align-items: center;\n justify-content: center;\n font-size: 30px;\n color: #334455;\n height: 80px;\n margin-bottom: 30px;\n border-bottom: 1px solid #c7ddf3;\n}\n\n.App-header a {\n text-decoration: none;\n color: inherit;\n display: inline-flex;\n flex-direction: row;\n align-items: center;\n justify-content: center;\n}\n\n.App-header a:hover {\n color: cadetblue;\n}\n\n.App-header a:hover .logo {\n position: relative;\n animation: emphasizeLogo 0.3s cubic-bezier(0.5, 0, 0, 1);\n animation-fill-mode: forwards;\n}\n\n/* .App-header a:active {\n position: relative;\n top: 1px;\n} */\n\n.logo {\n margin-right: 12px;\n}\n\n@keyframes emphasizeLogo {\n from {\n transform: scale(1, 1) rotate(0);\n }\n to {\n transform: scale(1.2, 1.2) rotate(-15deg);\n }\n}\n\np {\n line-height: 1.4em;\n margin: 0;\n}\n\n.card.create-task,\n.card.export-data {\n font-size: 14px;\n}\n\n.card.create-task p,\n.card.export-data p {\n margin-bottom: 20px;\n}\n\n.card p {\n margin-bottom: 20px;\n}\n\n.App-link {\n display: block;\n margin-top: 5px;\n font-size: 14px;\n font-weight: bold;\n}\n\n.page-body {\n /* display: flex; */\n flex: 1;\n flex-direction: column;\n align-items: center;\n justify-content: center;\n overflow: auto;\n}\n\na {\n color: #1a809c;\n}\n\n.card-container {\n display: flex;\n flex-direction: row;\n width: 100%;\n align-items: flex-start;\n}\n\n.card-outer-container {\n display: inline-flex;\n}\n\n.card {\n background-color: white;\n box-shadow: 0px 15px 25px #cee1f5;\n padding: 20px;\n border-radius: 20px;\n font-size: 16px;\n text-align: left;\n margin: 15px;\n /* margin-right: 30px; */\n min-width: 200px;\n max-width: 300px;\n color: #334455;\n}\n\n.card h3 {\n font-weight: bold;\n margin: 0 0 20px;\n color: #8b9db1;\n font-size: 18px;\n}\n\n.card h3 em {\n font-style: normal;\n color: #334455;\n}\n\nspan.badge {\n border-radius: 20px;\n background-color: #334455;\n width: 20px;\n color: white;\n display: inline-flex;\n text-align: center;\n font-size: 10px;\n height: 20px;\n align-items: center;\n justify-content: center;\n vertical-align: text-bottom;\n margin-right: 7px;\n}\n\nul {\n padding-left: 20px;\n}\nul li {\n margin-bottom: 5px;\n}\n\n.live-task {\n /* margin-bottom: 10px;\n border-left: 5px solid #4fa254;\n padding: 0px 5px 1px 10px;\n cursor: pointer; */\n padding: 10px 20px 10px 20px;\n margin: 0 -20px 10px;\n border-top: 1px solid #d6ecd7;\n border-bottom: 1px solid rgba(255, 255, 255, 1);\n background-image: linear-gradient(\n to bottom,\n rgba(240, 249, 247, 1),\n rgba(240, 249, 247, 0)\n );\n}\n\n.live-task.interactive {\n cursor: pointer;\n}\n\n.live-task.interactive:hover {\n background-color: #e0f6ff;\n border-bottom: 1px solid #3ec9ff;\n background-image: none;\n border-top: 1px solid #3ec9ff;\n transition: border-color 0.3s, background 0.3s;\n}\n\n.live-task.interactive:hover .title {\n color: #0dadea;\n transition: color 0.3s;\n}\n\n.live-task .title {\n font-weight: bold;\n color: #4fa254;\n /* background-color: rgb(230, 255, 230); */\n /* padding: 3px 0px; */\n}\n\n.hyperparameters {\n color: #888;\n font-size: 12px;\n word-break: break-word;\n font-style: italic;\n margin-bottom: 4px;\n}\n.hyperparameter {\n margin-right: 5px;\n white-space: nowrap;\n display: inline-block;\n}\n.hyperparameter .value {\n font-weight: bold;\n color: #666;\n}\n.hyperparameter .value,\n.hyperparameter .key {\n white-space: nowrap;\n}\n\n.details {\n font-size: 13px;\n margin-top: 10px;\n}\n\n.btn {\n padding: 7px;\n cursor: pointer;\n font-size: 14px;\n background-color: #334355;\n border: 1px solid #080808;\n color: white;\n font-weight: bold;\n\n margin-top: 9px;\n /* display: block; */\n width: 100%;\n text-decoration: none;\n}\n\n.bar {\n background-color: #b8d7ff;\n height: 14px;\n margin-bottom: 2px;\n}\n\n.metrics {\n display: flex;\n flex-direction: row;\n justify-content: center;\n}\n\n.metric {\n margin: 0px 0px 0px;\n text-align: center;\n /* background-color: white; */\n /* border: 1px solid #ddd; */\n padding: 10px;\n font-size: 16px;\n color: #333;\n flex: 1;\n}\n\n.metrics.highlight-first .metric:first-child {\n font-weight: bold;\n}\n\n.metric label {\n display: block;\n font-size: 12px;\n}\n\n.metrics.anticipate-double label {\n margin-top: 10px;\n}\n\np a {\n font-weight: bold;\n}\n\np.warning {\n background-color: #ffffd1;\n padding: 5px;\n border: 1px solid #e2d108;\n margin: 5px 0;\n font-weight: bold;\n}\n\ncode {\n white-space: nowrap;\n}\n\n.card {\n animation: fadeIn 0.3s cubic-bezier(0.5, 0, 0, 1);\n animation-fill-mode: forwards;\n opacity: 0;\n}\n\n.card:nth-child(1) {\n animation-delay: 0s;\n}\n.card:nth-child(2) {\n animation-delay: 0.1s;\n}\n.card:nth-child(3) {\n animation-delay: 0.2s;\n}\n.card:nth-child(4) {\n animation-delay: 0.3s;\n}\n\n@keyframes fadeIn {\n 0% {\n opacity: 0;\n transform: scale(0.9, 0.9) translateY(80px);\n }\n 60% {\n transform: translateY(-2px);\n }\n 100% {\n opacity: 1;\n transform: scale(1, 1) translateY(0px);\n }\n}\n\n.sandbox {\n font-size: 10px;\n text-transform: uppercase;\n background-color: #f3d9b7;\n color: rgb(105, 73, 3);\n padding: 2px;\n margin-left: 3px;\n vertical-align: baseline;\n float: right;\n}\n\na.unstyled {\n text-decoration: inherit;\n color: inherit;\n}\n\n.btn.rounded {\n width: auto;\n border-radius: 20px;\n border: none;\n padding: 10px 20px;\n}\n",".user_info {\n padding: 10px 20px 10px 20px;\n margin: 0 -20px 10px;\n}\n\n.user_info .title {\n color: #6e91b3;\n font-size: 16px;\n font-weight: bold;\n}\n",".task-list {\n display: flex;\n flex-direction: column;\n border-right: 1px solid #cee1f5;\n margin: -20px 0px -20px -20px;\n padding: 20px 0;\n align-content: flex-start;\n /* padding: \"20px 30px\"; */\n}\n\n.task-list-item {\n display: inline-block;\n padding: 15px 30px;\n max-width: 400px;\n min-width: 200px;\n text-align: left;\n}\n\n.task-list-item.selected,\n.task-list-item.selected:hover {\n color: white;\n background-color: #334455;\n}\n\n.task-list-item:hover {\n background-color: #ecf0f3;\n cursor: pointer;\n}\n\n.name {\n font-weight: bold;\n}\n\n.tags span {\n margin-right: 7px;\n display: inline-block;\n font-size: 13px;\n}\n\n.search {\n margin: 10px 20px;\n padding: 10px;\n border-radius: 10px;\n border: none;\n background-color: #eff3f7;\n font-size: inherit;\n font-weight: bold;\n color: inherit;\n}\n\n.search::placeholder {\n color: #a4b5c7;\n font-weight: normal;\n}\n\n.search:focus {\n outline: none;\n box-shadow: 0px 0px 2px 1px #c8d8f5 inset;\n background-color: #f7f7f7;\n}\n\ntextarea,\n.text-container {\n margin: 0px;\n padding: 10px;\n border-radius: 10px;\n border: none;\n background-color: #eff3f7;\n font-size: inherit;\n font-weight: bold;\n color: inherit;\n font-family: monospace;\n width: 100%;\n}\n\ntextarea:focus {\n outline: none;\n box-shadow: 0px 0px 2px 1px #c8d8f5 inset;\n}\n\n.task-description {\n padding: 0 30px;\n flex: 1;\n}\n\n.task-description h1 {\n margin: 10px 0 5px 0;\n}\n\n.card .task-description p {\n color: #68879e;\n font-size: 21px;\n}\n\n.task-description h3 {\n margin-bottom: 10px;\n}\n\n.task-description h3 code {\n background-color: #f2f2f2;\n font-size: 16px;\n padding: 0 2px;\n}\n\n.controls {\n margin-top: 40px;\n}\n\n.btn.inverse {\n background-color: #f5f5f5;\n color: #324456;\n}\n\n.btn.inverse:hover {\n background-color: #efefef;\n color: #324456;\n}\n\n.ace_editor .ace_marker-layer .ace_bracket {\n display: none;\n}\n\n.ace_hidden-cursors .ace_cursor {\n display: none;\n}\n\n.controls {\n margin-bottom: 40px;\n background-color: #fbfbfb;\n padding: 20px;\n border: 1px solid #f5f5f5;\n padding-top: 20px;\n padding-bottom: 20px;\n border-radius: 20px;\n box-shadow: 0px 3px 3px 0px rgba(0, 0, 0, 0.1);\n\n /* border-bottom: 1px solid #eee; */\n}\n",".hero {\n font-size: 45px;\n font-weight: bold;\n color: #033c3e;\n line-height: 1.2em;\n padding: 80px 0;\n animation: fadeUp 2s cubic-bezier(0, 1, 0, 1);\n}\n/* \n.hero a {\n text-decoration: inherit;\n color: inherit;\n}\n\n.hero a:hover {\n text-decoration: inherit;\n color: inherit;\n opacity: 0.9;\n} */\n\n@keyframes fadeUp {\n from {\n opacity: 0;\n transform: translateY(100px);\n }\n to {\n opacity: 1;\n transform: translateY(0px);\n }\n}\n"]}
\ No newline at end of file
diff --git a/mephisto/webapp/build/static/js/2.34d4026e.chunk.js b/mephisto/client/full/webapp/build/static/js/2.34d4026e.chunk.js
similarity index 100%
rename from mephisto/webapp/build/static/js/2.34d4026e.chunk.js
rename to mephisto/client/full/webapp/build/static/js/2.34d4026e.chunk.js
diff --git a/mephisto/webapp/build/static/js/2.34d4026e.chunk.js.LICENSE b/mephisto/client/full/webapp/build/static/js/2.34d4026e.chunk.js.LICENSE
similarity index 100%
rename from mephisto/webapp/build/static/js/2.34d4026e.chunk.js.LICENSE
rename to mephisto/client/full/webapp/build/static/js/2.34d4026e.chunk.js.LICENSE
diff --git a/mephisto/webapp/build/static/js/2.34d4026e.chunk.js.map b/mephisto/client/full/webapp/build/static/js/2.34d4026e.chunk.js.map
similarity index 100%
rename from mephisto/webapp/build/static/js/2.34d4026e.chunk.js.map
rename to mephisto/client/full/webapp/build/static/js/2.34d4026e.chunk.js.map
diff --git a/mephisto/webapp/build/static/js/main.6e109e52.chunk.js b/mephisto/client/full/webapp/build/static/js/main.6e109e52.chunk.js
similarity index 100%
rename from mephisto/webapp/build/static/js/main.6e109e52.chunk.js
rename to mephisto/client/full/webapp/build/static/js/main.6e109e52.chunk.js
diff --git a/mephisto/webapp/build/static/js/main.6e109e52.chunk.js.map b/mephisto/client/full/webapp/build/static/js/main.6e109e52.chunk.js.map
similarity index 100%
rename from mephisto/webapp/build/static/js/main.6e109e52.chunk.js.map
rename to mephisto/client/full/webapp/build/static/js/main.6e109e52.chunk.js.map
diff --git a/mephisto/client/full/webapp/build/static/js/main.d1c9df08.chunk.js b/mephisto/client/full/webapp/build/static/js/main.d1c9df08.chunk.js
new file mode 100644
index 000000000..15b26f022
--- /dev/null
+++ b/mephisto/client/full/webapp/build/static/js/main.d1c9df08.chunk.js
@@ -0,0 +1,2 @@
+(this.webpackJsonpturk=this.webpackJsonpturk||[]).push([[0],{27:function(e,a,t){e.exports=t.p+"static/media/sample.44730b5b.png"},32:function(e,a,t){e.exports=t(54)},37:function(e,a,t){},38:function(e,a,t){},43:function(e,a,t){},44:function(e,a,t){},53:function(e,a,t){},54:function(e,a,t){"use strict";t.r(a);var n=t(0),l=t.n(n),r=t(21),c=t.n(r),s=(t(37),t(22)),i=t(23),m=t(30),o=t(24),d=t(31),u=(t(38),t(3)),p=t(8),E=t(5),v=function(){return l.a.createElement("div",{className:"page-body"},l.a.createElement("div",{className:"card-outer-container"},l.a.createElement("div",{className:"card-container"},l.a.createElement("div",{className:"card create-task"},l.a.createElement("h3",null,l.a.createElement("span",{className:"badge"},"1"),l.a.createElement("em",null,"Create")," a task"),l.a.createElement("div",null,l.a.createElement("p",null,"You will need an MTurk Requester Account and an AWS account (separate accounts).",l.a.createElement("a",{className:"App-link",href:"https://parl.ai/docs/tutorial_mturk.html#running-a-task",target:"_blank",rel:"noopener noreferrer"},"Setting up AWS \xbb")),l.a.createElement("p",null,"We have a growing collection of tasks already available. You can re-use one of them, clone & modify one of them to create a similar yet different task, or create a completely new custom task altogether. Custom tasks can range from simple static pages, to multi-agent dialogue, to dynamic & interactive React applications!",l.a.createElement(u.b,{className:"App-link",to:"/task-gallery"},"View gallery of existing tasks \xbb"),l.a.createElement("a",{className:"App-link",href:"https://parl.ai/docs/tutorial_task.html#creating-a-new-task-the-more-complete-way",target:"_blank",rel:"noopener noreferrer"},"Create a custom task \xbb")))),l.a.createElement("div",{className:"card"},l.a.createElement("h3",null,l.a.createElement("span",{className:"badge"},"2"),l.a.createElement("em",null,"Launch")," tasks"),l.a.createElement("p",null,"You currently have ",l.a.createElement("strong",null,"3 tasks")," running live."),[1,2,3].map((function(e){return l.a.createElement("div",{className:"live-task interactive"},l.a.createElement("div",{className:"title"},e>1?"semantic annotation":"LIGHT pilot (v2)",1===e&&l.a.createElement("span",{className:"sandbox"},"Sandbox")),l.a.createElement("div",{className:"hyperparameters"},[null].map((function(){return Object.entries({dataset:"twitter",model:"v3.123",enableSafety:!0,saveDataFile:"localDB"}).map((function(e){var a=Object(E.a)(e,2),t=a[0],n=a[1];return l.a.createElement("span",{className:"hyperparameter"},l.a.createElement("span",{className:"key"},t),"=",l.a.createElement("span",{className:"value"},n.toString()))}))}))[0]),l.a.createElement("div",{className:"details"},"Started 8 hours ago. 25 completed HITs. 3 disconnects."))})),l.a.createElement("button",{className:"btn"},"Launch a new task")),l.a.createElement("div",{className:"card"},l.a.createElement("h3",null,l.a.createElement("span",{className:"badge"},"3"),l.a.createElement("em",null,"Review")," data"),l.a.createElement("p",null,"You have"," ",l.a.createElement(u.b,{to:"/review",alt:"link"},"300 HITs")," ","left to review."),l.a.createElement(u.b,{className:"unstyled",to:"/review"},l.a.createElement("div",{className:"live-task interactive"},l.a.createElement("div",{className:"title"},"LIGHT dialogue"),l.a.createElement("div",{className:"hyperparameters"},[null].map((function(){return Object.entries({dataset:"twitter",model:"v3.123",enableSafety:!0,saveDataFile:"localDB"}).map((function(e){var a=Object(E.a)(e,2),t=a[0],n=a[1];return l.a.createElement("span",{className:"hyperparameter"},l.a.createElement("span",{className:"key"},t),"=",l.a.createElement("span",{className:"value"},n.toString()))}))}))[0]),l.a.createElement("div",{className:"details"},l.a.createElement("div",{className:"metrics highlight-first"},l.a.createElement("div",{className:"metric"},"70",l.a.createElement("label",null,"Unreviewed")),l.a.createElement("div",{className:"metric"},"300",l.a.createElement("label",null,"Approved")),l.a.createElement("div",{className:"metric"},"23",l.a.createElement("label",null,"Rejected"))),l.a.createElement("p",{className:"warning"},"Warning: 10 HITs are nearing their 2 week deadline and risk being auto-approved.")))),l.a.createElement("div",{className:"live-task interactive"},l.a.createElement("div",{className:"title"},"LIGHT dialogue (pilot v1)"),l.a.createElement("div",{className:"hyperparameters"},[null].map((function(){return Object.entries({dataset:"twitter",model:"v3.123",enableSafety:!0,saveDataFile:"localDB"}).map((function(e){var a=Object(E.a)(e,2),t=a[0],n=a[1];return l.a.createElement("span",{className:"hyperparameter"},l.a.createElement("span",{className:"key"},t),"=",l.a.createElement("span",{className:"value"},n.toString()))}))}))[0]),l.a.createElement("div",{className:"details"},l.a.createElement("div",{className:"metrics highlight-first"},l.a.createElement("div",{className:"metric"},"230",l.a.createElement("label",null,"Unreviewed")),l.a.createElement("div",{className:"metric"},"300",l.a.createElement("label",null,"Approved")),l.a.createElement("div",{className:"metric"},"23",l.a.createElement("label",null,"Rejected"))))),l.a.createElement("div",{style:{marginTop:20}},l.a.createElement("a",{className:"App-link",href:"#",target:"_blank",rel:"noopener noreferrer"},"View all completed tasks \xbb"))),l.a.createElement("div",{className:"card export-data"},l.a.createElement("h3",null,l.a.createElement("span",{className:"badge"},"4"),l.a.createElement("em",null,"Export")," data"),l.a.createElement("p",null,"Output data for tasks is stored in:"," ",l.a.createElement("code",null,"/data//")),l.a.createElement("p",null,"Best practices:",l.a.createElement("ul",null,l.a.createElement("li",null,"Use a Jupyter notebook to post-process your data to encode reproducibility steps.")))))))},h=(t(43),t(27)),g=t.n(h),f=function(){return l.a.createElement("div",{style:{display:"flex",flexDirection:"row",maxWidth:1440,width:"100%",margin:"0 auto",alignItems:"flex-start"}},l.a.createElement("div",{className:"card"},l.a.createElement("h3",null,l.a.createElement("span",{className:"badge"},"3"),l.a.createElement("em",null,"Review")," data"),l.a.createElement("p",null,"You have"," ",l.a.createElement(u.b,{to:"/review",alt:"link"},"300 total HITs")," ","left to review across 5 tasks."),l.a.createElement("div",{style:{background:"#ee1054",margin:"0px -20px",padding:"5px 20px",color:"white",fontWeight:"bold"}},"Currently Reviewing Task:"),l.a.createElement("div",{className:"live-task"},l.a.createElement("div",{className:"title"},"LIGHT dialogue"),l.a.createElement("div",{className:"hyperparameters"},[null].map((function(){return Object.entries({dataset:"twitter",model:"v3.123",enableSafety:!0,saveDataFile:"localDB"}).map((function(e){var a=Object(E.a)(e,2),t=a[0],n=a[1];return l.a.createElement("span",{className:"hyperparameter"},l.a.createElement("span",{className:"key"},t),"=",l.a.createElement("span",{className:"value"},n.toString()))}))}))[0]),l.a.createElement("div",{className:"details"},l.a.createElement("div",{className:"metrics highlight-first"},l.a.createElement("div",{className:"metric"},"70",l.a.createElement("label",null,"Unreviewed")),l.a.createElement("div",{className:"metric"},"300",l.a.createElement("label",null,"Approved")),l.a.createElement("div",{className:"metric"},"23",l.a.createElement("label",null,"Rejected"))))),l.a.createElement("div",{style:{background:"#ee1054",margin:"0px -20px",padding:"5px 20px",color:"white",fontWeight:"bold"}},"Currently Reviewing User:"),l.a.createElement("div",{className:"user_info"},l.a.createElement("div",{className:"title"},"User #182"),l.a.createElement("div",{className:"hyperparameters"},[null].map((function(){return Object.entries({"All-Time Approved":"96%","All-Time Evaluated":"83",timezone:"EST",browser:"Chrome"}).map((function(e){var a=Object(E.a)(e,2),t=a[0],n=a[1];return l.a.createElement("span",{className:"hyperparameter"},l.a.createElement("span",{className:"key"},t),"=",l.a.createElement("span",{className:"value"},n.toString()))}))}))[0]),l.a.createElement("div",{className:"details"},l.a.createElement("div",{className:"metrics highlight-first"},l.a.createElement("div",{className:"metric"},"3/3",l.a.createElement("label",null,"Golden tasks")),l.a.createElement("div",{className:"metric"},"31",l.a.createElement("label",null,"Submitted")),l.a.createElement("div",{className:"metric"},"0",l.a.createElement("label",null,"Disconnects"))),l.a.createElement("div",{className:"metrics highlight-first anticipate-double",style:{marginTop:15}},l.a.createElement("div",{className:"metric"},"\u2014 / 0"," ",l.a.createElement("label",null,"Current Task: %\xa0Approved\xa0/\xa0#\xa0Evaluated"))))),l.a.createElement("div",{style:{backgroundColor:"#ffffcf",padding:"5px 10px 10px",border:"1px solid #f5f5e0"}},l.a.createElement("button",{className:"btn",style:{background:"green"}},"Approve"),l.a.createElement("button",{className:"btn"},"Approve all by this user"),l.a.createElement("button",{className:"btn",style:{background:"crimson"}},"Reject"))),l.a.createElement("div",{className:"card",style:{flex:"1",backgroundColor:"white",width:"100%",maxWidth:"none",margin:"15px 20px 0 10px",minHeight:300}},l.a.createElement("div",{className:"review-control-panel"},"\xa0"),l.a.createElement("div",{className:"task-frame",style:{backgroundColor:"white",color:"black"}},l.a.createElement("img",{src:g.a,width:"100%"}))))},b=(t(44),t(28)),N=t.n(b),y=(t(12),t(29)),k=t.n(y),w=(t(51),[{name:"QA Data Collection",desc:"Collect questions and answers from Turkers, given a random Wikipedia paragraph from SQuAD",tags:["qa","wikipedia","SQuAD"]},{name:"Model Evaluator",desc:"Ask Turkers to evaluate the information retrieval baseline model on the Reddit movie dialog dataset",tags:["reddit","evaluate"]},{name:"Multi-Agent Dialog",desc:"Round-robin chat between a local human agent and two Turkers",tags:["round-robin","dialog"]},{name:"Deal or No Deal",desc:"Negotiation chat between two agents over how to fairly divide a fixed set of items when each agent values the items differently",tags:["negotiation"]},{name:"Qualification Flow Example",desc:"Filter out workers from working on more instances of your task if they fail to complete a test instance properly",tags:["example"]},{name:"Semantic Alignment",desc:"Allow Turkers to pick matching words between a pair of sentences based on some criteria",tags:["sentences","matching"],initialData:'{\n "text1": "Brad and Angelina tied the knot on Friday",\n "text2": "Angelina got married to Brad on Friday"\n}'}]),x=function(){var e=l.a.useState(null),a=Object(E.a)(e,2),t=a[0],n=a[1],r=l.a.useState(""),c=Object(E.a)(r,2),s=c[0],i=c[1];return l.a.createElement("div",{style:{display:"flex",flexDirection:"row",maxWidth:1440,width:"100%",margin:"0 auto",alignItems:"stretch",padding:"0 20px",boxSizing:"border-box"}},l.a.createElement("div",{className:"card",style:{maxWidth:"none",width:"100%",minHeight:"300px",display:"flex"}},l.a.createElement("div",{className:"task-list"},l.a.createElement("input",{className:"search",placeholder:"search...",value:s,onChange:function(e){return i(e.target.value)}}),w.map((function(e,a){return(""===s||e.name.match(new RegExp(s,"i"))||e.tags.join(" ").match(new RegExp(s,"i")))&&l.a.createElement("div",{className:N()("task-list-item",{selected:a===t}),onClick:function(){return n(a)}},l.a.createElement("div",{className:"name"},e.name),l.a.createElement("div",{className:"tags"},e.tags.map((function(e){return l.a.createElement("span",null,"#",e)}))))})))," ",null!==t&&l.a.createElement("div",{className:"task-description",key:t},l.a.createElement("h1",null,w[t].name),l.a.createElement("p",null,w[t].desc),l.a.createElement("div",{className:"controls"},l.a.createElement("h3",null,"Pass ",l.a.createElement("code",null,"task_data"),":"),l.a.createElement(S,{id:t,initial:w[t].initialData||""}),l.a.createElement("button",{className:"btn inverse rounded"},"Submit")),l.a.createElement("div",{style:{display:"flex",justifyContent:"space-between",marginBottom:10}},l.a.createElement("h3",null,"Live Example:"),l.a.createElement("div",null,"Viewing As:"," ",l.a.createElement("select",{style:{font:"inherit"}},l.a.createElement("option",null,"Worker"),l.a.createElement("option",null,"Teacher"),l.a.createElement("option",null,"Reviewer")))),l.a.createElement("iframe",{src:"https://codesandbox.io/embed/x3oy3myvyp?fontsize=14&hidenavigation=1&view=preview",title:"Paired Phrases Picker v0.1",style:{width:"100%",height:"500px",border:0,borderRadius:"4px",overflow:"hidden"},sandbox:"allow-modals allow-forms allow-popups allow-scripts allow-same-origin"}))))};function S(e){var a=e.initial,t=e.id,n=l.a.useState("test"),r=Object(E.a)(n,2);r[0],r[1];return l.a.createElement("div",{className:"text-container"},l.a.createElement(k.a,{highlightActiveLine:!1,value:a,style:{backgroundColor:"transparent",padding:"10px",width:"100%",fontSize:14,boxSizing:"border-box"},mode:"javascript",placeholder:"Type JSON payload here...",tabSize:2,maxLines:1/0,theme:"tuesday",height:"100px",showGutter:!1,showPrintMargin:!1,name:t,editorProps:{$blockScrolling:1/0}}))}t(53);function j(){return(j=Object.assign||function(e){for(var a=1;a=0||(l[t]=e[t]);return l}(e,a);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(l[t]=e[t])}return l}var A=l.a.createElement("title",null,"M"),T=l.a.createElement("desc",null,"Created with Sketch."),R=l.a.createElement("g",{id:"Page-1",stroke:"none",strokeWidth:1,fill:"none",fillRule:"evenodd",fontFamily:"Phosphate-Inline, Phosphate",fontSize:48,fontWeight:"normal",letterSpacing:-.8135598},l.a.createElement("g",{id:"Landing-Page",transform:"translate(-73.000000, -49.000000)",fill:"#2768FF"},l.a.createElement("text",{id:"M"},l.a.createElement("tspan",{x:72.4507799,y:82},"M")))),D=function(e){var a=e.svgRef,t=e.title,n=O(e,["svgRef","title"]);return l.a.createElement("svg",j({className:"logo",width:"35px",height:"33px",viewBox:"0 0 35 33",ref:a},n),void 0===t?A:t?l.a.createElement("title",null,t):null,T,R)},C=l.a.forwardRef((function(e,a){return l.a.createElement(D,j({svgRef:a},e))})),W=(t.p,function(e){function a(){return Object(s.a)(this,a),Object(m.a)(this,Object(o.a)(a).apply(this,arguments))}return Object(d.a)(a,e),Object(i.a)(a,[{key:"render",value:function(){return l.a.createElement("div",{className:"App"},l.a.createElement(u.a,null,l.a.createElement("header",{className:"App-header"},l.a.createElement(p.c,null,l.a.createElement(p.a,{exact:!0,path:"/",render:function(){return l.a.createElement(u.b,{to:"/dashboard"},l.a.createElement(C,null),"mephisto")}}),l.a.createElement(p.a,{render:function(){return l.a.createElement(u.b,{to:"/dashboard"},l.a.createElement(C,null),"mephisto")}}))),l.a.createElement(p.a,{exact:!0,path:"/",render:function(){return l.a.createElement(v,null)}}),l.a.createElement(p.a,{exact:!0,path:"/dashboard",render:function(){return l.a.createElement(v,null)}}),l.a.createElement(p.a,{exact:!0,path:"/task-gallery",render:function(){return l.a.createElement(x,null)}}),l.a.createElement(p.a,{exact:!0,path:"/review",render:function(){return l.a.createElement(f,null)}})))}}]),a}(n.Component));c.a.render(l.a.createElement(W,null),document.getElementById("root"))}},[[32,1,2]]]);
+//# sourceMappingURL=main.d1c9df08.chunk.js.map
\ No newline at end of file
diff --git a/mephisto/client/full/webapp/build/static/js/main.d1c9df08.chunk.js.map b/mephisto/client/full/webapp/build/static/js/main.d1c9df08.chunk.js.map
new file mode 100644
index 000000000..284e6f63e
--- /dev/null
+++ b/mephisto/client/full/webapp/build/static/js/main.d1c9df08.chunk.js.map
@@ -0,0 +1 @@
+{"version":3,"sources":["sample.png","Dashboard.js","Review.js","TaskGallery.js","M.svg","App.js","index.js"],"names":["module","exports","className","href","target","rel","to","map","i","Object","entries","dataset","model","enableSafety","saveDataFile","key","value","toString","alt","style","marginTop","display","flexDirection","maxWidth","width","margin","alignItems","background","padding","color","fontWeight","timezone","browser","backgroundColor","border","flex","minHeight","src","samplePic","data_model","name","desc","tags","initialData","React","useState","selectedIndex","setSelectedIndex","searchTerm","setSearchTerm","boxSizing","placeholder","onChange","e","task","idx","match","RegExp","join","cx","selected","onClick","tag","JSONEditor","id","initial","justifyContent","marginBottom","font","title","height","borderRadius","overflow","sandbox","highlightActiveLine","fontSize","mode","tabSize","maxLines","Infinity","theme","showGutter","showPrintMargin","editorProps","$blockScrolling","_extends","assign","arguments","length","source","prototype","hasOwnProperty","call","apply","this","_objectWithoutProperties","excluded","sourceKeys","keys","indexOf","_objectWithoutPropertiesLoose","getOwnPropertySymbols","sourceSymbolKeys","propertyIsEnumerable","createElement","stroke","strokeWidth","fill","fillRule","fontFamily","letterSpacing","transform","x","y","_ref","svgRef","props","viewBox","ref","undefined","ForwardRef","forwardRef","App","exact","path","render","Dashboard","TaskGallery","Review","Component","ReactDOM","document","getElementById"],"mappings":"gFAAAA,EAAOC,QAAU,IAA0B,oC,sSCG5B,oBACb,yBAAKC,UAAU,aACb,yBAAKA,UAAU,wBACb,yBAAKA,UAAU,kBACb,yBAAKA,UAAU,oBACb,4BACE,0BAAMA,UAAU,SAAhB,KACA,sCAFF,WAOA,6BACE,8GAGE,uBACEA,UAAU,WACVC,KAAK,0DACLC,OAAO,SACPC,IAAI,uBAJN,wBASF,+VAOE,kBAAC,IAAD,CAAMH,UAAU,WAAWI,GAAG,iBAA9B,uCAGA,uBACEJ,UAAU,WACVC,KAAK,oFACLC,OAAO,SACPC,IAAI,uBAJN,gCAWN,yBAAKH,UAAU,QACb,4BACE,0BAAMA,UAAU,SAAhB,KACA,sCAFF,UAIA,iDACqB,2CADrB,kBAGC,CAAC,EAAG,EAAG,GAAGK,KAAI,SAAAC,GAAC,OACd,yBAAKN,UAAU,yBACb,yBAAKA,UAAU,SACZM,EAAI,EAAI,sBAAwB,mBAC1B,IAANA,GAAW,0BAAMN,UAAU,WAAhB,YAEd,yBAAKA,UAAU,mBAEX,CAAC,MAAMK,KAAI,WACT,OAAOE,OAAOC,QAAQ,CACpBC,QAAS,UACTC,MAAO,SACPC,cAAc,EACdC,aAAc,YACbP,KAAI,mCAAEQ,EAAF,KAAOC,EAAP,YACL,0BAAMd,UAAU,kBACd,0BAAMA,UAAU,OAAOa,GADzB,IAEE,0BAAMb,UAAU,SAASc,EAAMC,mBAGlC,IAGP,yBAAKf,UAAU,WAAf,8DAKJ,4BAAQA,UAAU,OAAlB,sBAEF,yBAAKA,UAAU,QACb,4BACE,0BAAMA,UAAU,SAAhB,KACA,sCAFF,SAQA,sCACW,IACT,kBAAC,IAAD,CAAMI,GAAG,UAAUY,IAAI,QAAvB,YAEQ,IAJV,mBAcA,kBAAC,IAAD,CAAMhB,UAAU,WAAWI,GAAG,WAC5B,yBAAKJ,UAAU,yBACb,yBAAKA,UAAU,SAAf,kBACA,yBAAKA,UAAU,mBAEX,CAAC,MAAMK,KAAI,WACT,OAAOE,OAAOC,QAAQ,CACpBC,QAAS,UACTC,MAAO,SACPC,cAAc,EACdC,aAAc,YACbP,KAAI,mCAAEQ,EAAF,KAAOC,EAAP,YACL,0BAAMd,UAAU,kBACd,0BAAMA,UAAU,OAAOa,GADzB,IAEE,0BAAMb,UAAU,SAASc,EAAMC,mBAGlC,IAGP,yBAAKf,UAAU,WACb,yBAAKA,UAAU,2BACb,yBAAKA,UAAU,UAAf,KACI,8CAEJ,yBAAKA,UAAU,UAAf,MACK,4CAEL,yBAAKA,UAAU,UAAf,KACI,6CAIN,uBAAGA,UAAU,WAAb,uFAON,yBAAKA,UAAU,yBACb,yBAAKA,UAAU,SAAf,6BACA,yBAAKA,UAAU,mBAEX,CAAC,MAAMK,KAAI,WACT,OAAOE,OAAOC,QAAQ,CACpBC,QAAS,UACTC,MAAO,SACPC,cAAc,EACdC,aAAc,YACbP,KAAI,mCAAEQ,EAAF,KAAOC,EAAP,YACL,0BAAMd,UAAU,kBACd,0BAAMA,UAAU,OAAOa,GADzB,IAEE,0BAAMb,UAAU,SAASc,EAAMC,mBAGlC,IAGP,yBAAKf,UAAU,WACb,yBAAKA,UAAU,2BACb,yBAAKA,UAAU,UAAf,MACK,8CAEL,yBAAKA,UAAU,UAAf,MACK,4CAEL,yBAAKA,UAAU,UAAf,KACI,+CAKV,yBAAKiB,MAAO,CAAEC,UAAW,KACvB,uBACElB,UAAU,WACVC,KAAK,IACLC,OAAO,SACPC,IAAI,uBAJN,mCAgBJ,yBAAKH,UAAU,oBACb,4BACE,0BAAMA,UAAU,SAAhB,KACA,sCAFF,SAIA,iEACsC,IACpC,6DAEF,6CAEE,4BACE,wH,yBC/MC,oBACb,yBACEiB,MAAO,CACLE,QAAS,OACTC,cAAe,MACfC,SAAU,KACVC,MAAO,OACPC,OAAQ,SACRC,WAAY,eAGd,yBAAKxB,UAAU,QACb,4BACE,0BAAMA,UAAU,SAAhB,KACA,sCAFF,SAIA,sCACW,IACT,kBAAC,IAAD,CAAMI,GAAG,UAAUY,IAAI,QAAvB,kBAEQ,IAJV,kCAOA,yBACEC,MAAO,CACLQ,WAAY,UACZF,OAAQ,YACRG,QAAS,WACTC,MAAO,QACPC,WAAY,SANhB,6BAWA,yBAAK5B,UAAU,aACb,yBAAKA,UAAU,SAAf,kBACA,yBAAKA,UAAU,mBAEX,CAAC,MAAMK,KAAI,WACT,OAAOE,OAAOC,QAAQ,CACpBC,QAAS,UACTC,MAAO,SACPC,cAAc,EACdC,aAAc,YACbP,KAAI,mCAAEQ,EAAF,KAAOC,EAAP,YACL,0BAAMd,UAAU,kBACd,0BAAMA,UAAU,OAAOa,GADzB,IAEE,0BAAMb,UAAU,SAASc,EAAMC,mBAGlC,IAGP,yBAAKf,UAAU,WACb,yBAAKA,UAAU,2BACb,yBAAKA,UAAU,UAAf,KACI,8CAEJ,yBAAKA,UAAU,UAAf,MACK,4CAEL,yBAAKA,UAAU,UAAf,KACI,+CAUV,yBACEiB,MAAO,CACLQ,WAAY,UACZF,OAAQ,YACRG,QAAS,WACTC,MAAO,QACPC,WAAY,SANhB,6BAYA,yBAAK5B,UAAU,aACb,yBAAKA,UAAU,SAAf,aACA,yBAAKA,UAAU,mBAEX,CAAC,MAAMK,KAAI,WACT,OAAOE,OAAOC,QAAQ,CACpB,oBAAqB,MACrB,qBAAsB,KACtBqB,SAAU,MACVC,QAAS,WACRzB,KAAI,mCAAEQ,EAAF,KAAOC,EAAP,YACL,0BAAMd,UAAU,kBACd,0BAAMA,UAAU,OAAOa,GADzB,IAEE,0BAAMb,UAAU,SAASc,EAAMC,mBAGlC,IAGP,yBAAKf,UAAU,WACb,yBAAKA,UAAU,2BACb,yBAAKA,UAAU,UAAf,MACK,gDAEL,yBAAKA,UAAU,UAAf,KACI,6CAEJ,yBAAKA,UAAU,UAAf,IACG,gDAGL,yBACEA,UAAU,4CACViB,MAAO,CAAEC,UAAW,KAEpB,yBAAKlB,UAAU,UAAf,aACc,IACZ,yFAaR,yBACEiB,MAAO,CACLc,gBAAiB,UACjBL,QAAS,gBACTM,OAAQ,sBAGV,4BAAQhC,UAAU,MAAMiB,MAAO,CAAEQ,WAAY,UAA7C,WAGA,4BAAQzB,UAAU,OAAlB,4BACA,4BAAQA,UAAU,MAAMiB,MAAO,CAAEQ,WAAY,YAA7C,YAKJ,yBACEzB,UAAU,OACViB,MAAO,CACLgB,KAAM,IACNF,gBAAiB,QACjBT,MAAO,OACPD,SAAU,OACVE,OAAQ,mBACRW,UAAW,MAGb,yBAAKlC,UAAU,wBAAf,QACA,yBACEA,UAAU,aACViB,MAAO,CACLc,gBAAiB,QACjBJ,MAAO,UAIT,yBAAKQ,IAAKC,IAAWd,MAAM,a,kDCvK7Be,G,MAAa,CACjB,CACEC,KAAM,qBACNC,KACE,4FACFC,KAAM,CAAC,KAAM,YAAa,UAE5B,CACEF,KAAM,kBACNC,KACE,sGACFC,KAAM,CAAC,SAAU,aAEnB,CACEF,KAAM,qBACNC,KAAM,+DACNC,KAAM,CAAC,cAAe,WAExB,CACEF,KAAM,kBACNC,KACE,kIACFC,KAAM,CAAC,gBAET,CACEF,KAAM,6BACNC,KACE,mHACFC,KAAM,CAAC,YAET,CACEF,KAAM,qBACNC,KACE,0FACFC,KAAM,CAAC,YAAa,YACpBC,YAAY,wHAOD,aAAO,IAAD,EACuBC,IAAMC,SAAS,MADtC,mBACZC,EADY,KACGC,EADH,OAEiBH,IAAMC,SAAS,IAFhC,mBAEZG,EAFY,KAEAC,EAFA,KAInB,OACE,yBACE9B,MAAO,CACLE,QAAS,OACTC,cAAe,MACfC,SAAU,KACVC,MAAO,OACPC,OAAQ,SACRC,WAAY,UACZE,QAAS,SACTsB,UAAW,eAGb,yBACEhD,UAAU,OACViB,MAAO,CACLI,SAAU,OACVC,MAAO,OACPY,UAAW,QACXf,QAAS,SAGX,yBAAKnB,UAAU,aACb,2BACEA,UAAU,SACViD,YAAY,YACZnC,MAAOgC,EACPI,SAAU,SAAAC,GAAC,OAAIJ,EAAcI,EAAEjD,OAAOY,UAEvCuB,EAAWhC,KACV,SAAC+C,EAAMC,GAAP,OACkB,KAAfP,GACCM,EAAKd,KAAKgB,MAAM,IAAIC,OAAOT,EAAY,OACvCM,EAAKZ,KAAKgB,KAAK,KAAKF,MAAM,IAAIC,OAAOT,EAAY,QACjD,yBACE9C,UAAWyD,IAAG,iBAAkB,CAC9BC,SAAUL,IAAQT,IAEpBe,QAAS,kBAAMd,EAAiBQ,KAEhC,yBAAKrD,UAAU,QAAQoD,EAAKd,MAC5B,yBAAKtC,UAAU,QACZoD,EAAKZ,KAAKnC,KAAI,SAAAuD,GAAG,OAChB,kCAAQA,YAMf,IACY,OAAlBhB,GACC,yBAAK5C,UAAU,mBAAmBa,IAAK+B,GACrC,4BAAKP,EAAWO,GAAeN,MAC/B,2BAAID,EAAWO,GAAeL,MAC9B,yBAAKvC,UAAU,YACb,oCACO,2CADP,KAIA,kBAAC6D,EAAD,CACEC,GAAIlB,EACJmB,QAAS1B,EAAWO,GAAeH,aAAe,KAEpD,4BAAQzC,UAAU,uBAAlB,WAEF,yBACEiB,MAAO,CACLE,QAAS,OACT6C,eAAgB,gBAChBC,aAAc,KAGhB,6CACA,2CACc,IACZ,4BAAQhD,MAAO,CAAEiD,KAAM,YACrB,0CACA,2CACA,+CAIN,4BACE/B,IAAI,oFACJgC,MAAM,6BACNlD,MAAO,CACLK,MAAO,OACP8C,OAAQ,QACRpC,OAAQ,EACRqC,aAAc,MACdC,SAAU,UAEZC,QAAQ,8EAStB,SAASV,EAAT,GAAsC,IAAhBE,EAAe,EAAfA,QAASD,EAAM,EAANA,GAAM,EACCpB,IAAMC,SAAS,QADhB,6BAGnC,OACE,yBAAK3C,UAAU,kBACb,kBAAC,IAAD,CACEwE,qBAAqB,EACrB1D,MAAOiD,EACP9C,MAAO,CACLc,gBAAiB,cACjBL,QAAS,OACTJ,MAAO,OACPmD,SAAU,GACVzB,UAAW,cAEb0B,KAAK,aACLzB,YAAa,4BACb0B,QAAS,EACTC,SAAUC,IACVC,MAAM,UACNV,OAAO,QACPW,YAAY,EACZC,iBAAiB,EACjB1C,KAAMwB,EACNmB,YAAa,CACXC,gBAAiBL,Q,MCpL3B,SAASM,IAA2Q,OAA9PA,EAAW5E,OAAO6E,QAAU,SAAUlF,GAAU,IAAK,IAAII,EAAI,EAAGA,EAAI+E,UAAUC,OAAQhF,IAAK,CAAE,IAAIiF,EAASF,UAAU/E,GAAI,IAAK,IAAIO,KAAO0E,EAAchF,OAAOiF,UAAUC,eAAeC,KAAKH,EAAQ1E,KAAQX,EAAOW,GAAO0E,EAAO1E,IAAY,OAAOX,IAA2ByF,MAAMC,KAAMP,WAEhT,SAASQ,EAAyBN,EAAQO,GAAY,GAAc,MAAVP,EAAgB,MAAO,GAAI,IAAkE1E,EAAKP,EAAnEJ,EAEzF,SAAuCqF,EAAQO,GAAY,GAAc,MAAVP,EAAgB,MAAO,GAAI,IAA2D1E,EAAKP,EAA5DJ,EAAS,GAAQ6F,EAAaxF,OAAOyF,KAAKT,GAAqB,IAAKjF,EAAI,EAAGA,EAAIyF,EAAWT,OAAQhF,IAAOO,EAAMkF,EAAWzF,GAAQwF,EAASG,QAAQpF,IAAQ,IAAaX,EAAOW,GAAO0E,EAAO1E,IAAQ,OAAOX,EAFxMgG,CAA8BX,EAAQO,GAAuB,GAAIvF,OAAO4F,sBAAuB,CAAE,IAAIC,EAAmB7F,OAAO4F,sBAAsBZ,GAAS,IAAKjF,EAAI,EAAGA,EAAI8F,EAAiBd,OAAQhF,IAAOO,EAAMuF,EAAiB9F,GAAQwF,EAASG,QAAQpF,IAAQ,GAAkBN,OAAOiF,UAAUa,qBAAqBX,KAAKH,EAAQ1E,KAAgBX,EAAOW,GAAO0E,EAAO1E,IAAU,OAAOX,EAMne,IAAI,EAEJ,IAAMoG,cAAc,QAAS,KAAM,KAE/B,EAEJ,IAAMA,cAAc,OAAQ,KAAM,wBAE9B,EAEJ,IAAMA,cAAc,IAAK,CACvBxC,GAAI,SACJyC,OAAQ,OACRC,YAAa,EACbC,KAAM,OACNC,SAAU,UACVC,WAAY,8BACZlC,SAAU,GACV7C,WAAY,SACZgF,eAAgB,UACf,IAAMN,cAAc,IAAK,CAC1BxC,GAAI,eACJ+C,UAAW,oCACXJ,KAAM,WACL,IAAMH,cAAc,OAAQ,CAC7BxC,GAAI,KACH,IAAMwC,cAAc,QAAS,CAC9BQ,EAAG,WACHC,EAAG,IACF,QAEC,EAAO,SAAcC,GACvB,IAAIC,EAASD,EAAKC,OACd9C,EAAQ6C,EAAK7C,MACb+C,EAAQrB,EAAyBmB,EAAM,CAAC,SAAU,UAEtD,OAAO,IAAMV,cAAc,MAAOnB,EAAS,CACzCnF,UAAW,OACXsB,MAAO,OACP8C,OAAQ,OACR+C,QAAS,YACTC,IAAKH,GACJC,QAAkBG,IAAVlD,EAAsB,EAAQA,EAAQ,IAAMmC,cAAc,QAAS,KAAMnC,GAAS,KAAM,EAAO,IAGxGmD,EAAa,IAAMC,YAAW,SAAUL,EAAOE,GACjD,OAAO,IAAMd,cAAc,EAAMnB,EAAS,CACxC8B,OAAQG,GACPF,OCOUM,GDLA,I,iLC/CX,OACE,yBAAKxH,UAAU,OACb,kBAAC,IAAD,KACE,4BAAQA,UAAU,cAChB,kBAAC,IAAD,KACE,kBAAC,IAAD,CACEyH,OAAK,EACLC,KAAK,IACLC,OAAQ,kBACN,kBAAC,IAAD,CAAMvH,GAAG,cASP,kBAAC,EAAD,MATF,eAcJ,kBAAC,IAAD,CACEuH,OAAQ,kBACN,kBAAC,IAAD,CAAMvH,GAAG,cASP,kBAAC,EAAD,MATF,iBAiBR,kBAAC,IAAD,CAAOqH,OAAK,EAACC,KAAK,IAAIC,OAAQ,kBAAM,kBAACC,EAAD,SACpC,kBAAC,IAAD,CAAOH,OAAK,EAACC,KAAK,aAAaC,OAAQ,kBAAM,kBAACC,EAAD,SAC7C,kBAAC,IAAD,CAAOH,OAAK,EAACC,KAAK,gBAAgBC,OAAQ,kBAAM,kBAACE,EAAD,SAChD,kBAAC,IAAD,CAAOJ,OAAK,EAACC,KAAK,UAAUC,OAAQ,kBAAM,kBAACG,EAAD,e,GA/ClCC,cCJlBC,IAASL,OAAO,kBAAC,EAAD,MAASM,SAASC,eAAe,W","file":"static/js/main.d1c9df08.chunk.js","sourcesContent":["module.exports = __webpack_public_path__ + \"static/media/sample.44730b5b.png\";","import React from \"react\";\nimport { Link } from \"react-router-dom\";\n\nexport default () => (\n
\n
\n
\n
\n
\n 1\n Create a task\n
\n {/*
\nEdit src/App.js and save to reload.\n
*/}\n
\n
\n You will need an MTurk Requester Account and an AWS account\n (separate accounts).\n \n Setting up AWS »\n \n
\n
\n We have a growing collection of tasks already available. You can\n re-use one of them, clone & modify one of them to create a\n similar yet different task, or create a completely new custom task\n altogether. Custom tasks can range from simple static pages, to\n multi-agent dialogue, to dynamic & interactive React\n applications!\n \n View gallery of existing tasks »\n \n \n Create a custom task »\n \n
\n Output data for tasks is stored in:{\" \"}\n /data/<task-name>/<run-id>\n
\n
\n Best practices:\n
\n
\n Use a Jupyter notebook to post-process your data to encode\n reproducibility steps.\n
\n
\n \n
\n
\n
\n
\n);\n","import React from \"react\";\nimport { Link } from \"react-router-dom\";\nimport \"./review.css\";\nimport samplePic from \"./sample.png\"\n\nexport default () => (\n
\n
\n
\n 3\n Review data\n
\n
\n You have{\" \"}\n \n 300 total HITs\n {\" \"}\n left to review across 5 tasks.\n
\n);\n","import React from \"react\";\nimport \"./taskgallery.css\";\nimport cx from \"classnames\";\n\nimport brace from \"brace\";\nimport AceEditor from \"react-ace\";\nimport \"brace/mode/javascript\";\n\nconst data_model = [\n {\n name: \"QA Data Collection\",\n desc:\n \"Collect questions and answers from Turkers, given a random Wikipedia paragraph from SQuAD\",\n tags: [\"qa\", \"wikipedia\", \"SQuAD\"]\n },\n {\n name: \"Model Evaluator\",\n desc:\n \"Ask Turkers to evaluate the information retrieval baseline model on the Reddit movie dialog dataset\",\n tags: [\"reddit\", \"evaluate\"]\n },\n {\n name: \"Multi-Agent Dialog\",\n desc: \"Round-robin chat between a local human agent and two Turkers\",\n tags: [\"round-robin\", \"dialog\"]\n },\n {\n name: \"Deal or No Deal\",\n desc:\n \"Negotiation chat between two agents over how to fairly divide a fixed set of items when each agent values the items differently\",\n tags: [\"negotiation\"]\n },\n {\n name: \"Qualification Flow Example\",\n desc:\n \"Filter out workers from working on more instances of your task if they fail to complete a test instance properly\",\n tags: [\"example\"]\n },\n {\n name: \"Semantic Alignment\",\n desc:\n \"Allow Turkers to pick matching words between a pair of sentences based on some criteria\",\n tags: [\"sentences\", \"matching\"],\n initialData: `{\n \"text1\": \"Brad and Angelina tied the knot on Friday\",\n \"text2\": \"Angelina got married to Brad on Friday\"\n}`\n }\n];\n\nexport default () => {\n const [selectedIndex, setSelectedIndex] = React.useState(null);\n const [searchTerm, setSearchTerm] = React.useState(\"\");\n\n return (\n
\n );\n }\n}\n\nexport default App;\n","import React from \"react\";\nimport ReactDOM from \"react-dom\";\nimport \"./index.css\";\nimport App from \"./App\";\n\nReactDOM.render(, document.getElementById(\"root\"));\n"],"sourceRoot":""}
\ No newline at end of file
diff --git a/mephisto/webapp/build/static/js/runtime-main.97447897.js b/mephisto/client/full/webapp/build/static/js/runtime-main.97447897.js
similarity index 100%
rename from mephisto/webapp/build/static/js/runtime-main.97447897.js
rename to mephisto/client/full/webapp/build/static/js/runtime-main.97447897.js
diff --git a/mephisto/webapp/build/static/js/runtime-main.97447897.js.map b/mephisto/client/full/webapp/build/static/js/runtime-main.97447897.js.map
similarity index 100%
rename from mephisto/webapp/build/static/js/runtime-main.97447897.js.map
rename to mephisto/client/full/webapp/build/static/js/runtime-main.97447897.js.map
diff --git a/mephisto/webapp/build/static/media/Oval.456d8835.svg b/mephisto/client/full/webapp/build/static/media/Oval.456d8835.svg
similarity index 100%
rename from mephisto/webapp/build/static/media/Oval.456d8835.svg
rename to mephisto/client/full/webapp/build/static/media/Oval.456d8835.svg
diff --git a/mephisto/webapp/build/static/media/icons-16.782154b8.ttf b/mephisto/client/full/webapp/build/static/media/icons-16.782154b8.ttf
similarity index 100%
rename from mephisto/webapp/build/static/media/icons-16.782154b8.ttf
rename to mephisto/client/full/webapp/build/static/media/icons-16.782154b8.ttf
diff --git a/mephisto/webapp/build/static/media/icons-16.7fb844a7.woff b/mephisto/client/full/webapp/build/static/media/icons-16.7fb844a7.woff
similarity index 100%
rename from mephisto/webapp/build/static/media/icons-16.7fb844a7.woff
rename to mephisto/client/full/webapp/build/static/media/icons-16.7fb844a7.woff
diff --git a/mephisto/webapp/build/static/media/icons-16.d4f6722b.eot b/mephisto/client/full/webapp/build/static/media/icons-16.d4f6722b.eot
similarity index 100%
rename from mephisto/webapp/build/static/media/icons-16.d4f6722b.eot
rename to mephisto/client/full/webapp/build/static/media/icons-16.d4f6722b.eot
diff --git a/mephisto/webapp/build/static/media/icons-20.90afdd23.eot b/mephisto/client/full/webapp/build/static/media/icons-20.90afdd23.eot
similarity index 100%
rename from mephisto/webapp/build/static/media/icons-20.90afdd23.eot
rename to mephisto/client/full/webapp/build/static/media/icons-20.90afdd23.eot
diff --git a/mephisto/webapp/build/static/media/icons-20.c0e4874e.woff b/mephisto/client/full/webapp/build/static/media/icons-20.c0e4874e.woff
similarity index 100%
rename from mephisto/webapp/build/static/media/icons-20.c0e4874e.woff
rename to mephisto/client/full/webapp/build/static/media/icons-20.c0e4874e.woff
diff --git a/mephisto/webapp/build/static/media/icons-20.d1f0f6c1.ttf b/mephisto/client/full/webapp/build/static/media/icons-20.d1f0f6c1.ttf
similarity index 100%
rename from mephisto/webapp/build/static/media/icons-20.d1f0f6c1.ttf
rename to mephisto/client/full/webapp/build/static/media/icons-20.d1f0f6c1.ttf
diff --git a/mephisto/webapp/package.json b/mephisto/client/full/webapp/package.json
similarity index 100%
rename from mephisto/webapp/package.json
rename to mephisto/client/full/webapp/package.json
diff --git a/mephisto/webapp/public/index.html b/mephisto/client/full/webapp/public/index.html
similarity index 100%
rename from mephisto/webapp/public/index.html
rename to mephisto/client/full/webapp/public/index.html
diff --git a/mephisto/webapp/public/robots.txt b/mephisto/client/full/webapp/public/robots.txt
similarity index 100%
rename from mephisto/webapp/public/robots.txt
rename to mephisto/client/full/webapp/public/robots.txt
diff --git a/mephisto/webapp/src/App.css b/mephisto/client/full/webapp/src/App.css
similarity index 100%
rename from mephisto/webapp/src/App.css
rename to mephisto/client/full/webapp/src/App.css
diff --git a/mephisto/webapp/src/App.tsx b/mephisto/client/full/webapp/src/App.tsx
similarity index 100%
rename from mephisto/webapp/src/App.tsx
rename to mephisto/client/full/webapp/src/App.tsx
diff --git a/mephisto/webapp/src/Oval.svg b/mephisto/client/full/webapp/src/Oval.svg
similarity index 100%
rename from mephisto/webapp/src/Oval.svg
rename to mephisto/client/full/webapp/src/Oval.svg
diff --git a/mephisto/webapp/src/axios.ts b/mephisto/client/full/webapp/src/axios.ts
similarity index 100%
rename from mephisto/webapp/src/axios.ts
rename to mephisto/client/full/webapp/src/axios.ts
diff --git a/mephisto/webapp/src/index.css b/mephisto/client/full/webapp/src/index.css
similarity index 100%
rename from mephisto/webapp/src/index.css
rename to mephisto/client/full/webapp/src/index.css
diff --git a/mephisto/webapp/src/index.tsx b/mephisto/client/full/webapp/src/index.tsx
similarity index 100%
rename from mephisto/webapp/src/index.tsx
rename to mephisto/client/full/webapp/src/index.tsx
diff --git a/mephisto/webapp/src/lib/Async.ts b/mephisto/client/full/webapp/src/lib/Async.ts
similarity index 100%
rename from mephisto/webapp/src/lib/Async.ts
rename to mephisto/client/full/webapp/src/lib/Async.ts
diff --git a/mephisto/webapp/src/lib/toaster.ts b/mephisto/client/full/webapp/src/lib/toaster.ts
similarity index 100%
rename from mephisto/webapp/src/lib/toaster.ts
rename to mephisto/client/full/webapp/src/lib/toaster.ts
diff --git a/mephisto/webapp/src/logo.svg b/mephisto/client/full/webapp/src/logo.svg
similarity index 100%
rename from mephisto/webapp/src/logo.svg
rename to mephisto/client/full/webapp/src/logo.svg
diff --git a/mephisto/webapp/src/mocks.ts b/mephisto/client/full/webapp/src/mocks.ts
similarity index 100%
rename from mephisto/webapp/src/mocks.ts
rename to mephisto/client/full/webapp/src/mocks.ts
diff --git a/mephisto/webapp/src/models.ts b/mephisto/client/full/webapp/src/models.ts
similarity index 100%
rename from mephisto/webapp/src/models.ts
rename to mephisto/client/full/webapp/src/models.ts
diff --git a/mephisto/webapp/src/react-app-env.d.ts b/mephisto/client/full/webapp/src/react-app-env.d.ts
similarity index 100%
rename from mephisto/webapp/src/react-app-env.d.ts
rename to mephisto/client/full/webapp/src/react-app-env.d.ts
diff --git a/mephisto/webapp/src/service.ts b/mephisto/client/full/webapp/src/service.ts
similarity index 100%
rename from mephisto/webapp/src/service.ts
rename to mephisto/client/full/webapp/src/service.ts
diff --git a/mephisto/webapp/src/setupTests.ts b/mephisto/client/full/webapp/src/setupTests.ts
similarity index 100%
rename from mephisto/webapp/src/setupTests.ts
rename to mephisto/client/full/webapp/src/setupTests.ts
diff --git a/mephisto/webapp/src/utils.ts b/mephisto/client/full/webapp/src/utils.ts
similarity index 100%
rename from mephisto/webapp/src/utils.ts
rename to mephisto/client/full/webapp/src/utils.ts
diff --git a/mephisto/webapp/src/widgets/Base.tsx b/mephisto/client/full/webapp/src/widgets/Base.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/Base.tsx
rename to mephisto/client/full/webapp/src/widgets/Base.tsx
diff --git a/mephisto/webapp/src/widgets/GridReview.js b/mephisto/client/full/webapp/src/widgets/GridReview.js
similarity index 100%
rename from mephisto/webapp/src/widgets/GridReview.js
rename to mephisto/client/full/webapp/src/widgets/GridReview.js
diff --git a/mephisto/webapp/src/widgets/Launch.tsx b/mephisto/client/full/webapp/src/widgets/Launch.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/Launch.tsx
rename to mephisto/client/full/webapp/src/widgets/Launch.tsx
diff --git a/mephisto/webapp/src/widgets/Prepare.tsx b/mephisto/client/full/webapp/src/widgets/Prepare.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/Prepare.tsx
rename to mephisto/client/full/webapp/src/widgets/Prepare.tsx
diff --git a/mephisto/webapp/src/widgets/Review.tsx b/mephisto/client/full/webapp/src/widgets/Review.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/Review.tsx
rename to mephisto/client/full/webapp/src/widgets/Review.tsx
diff --git a/mephisto/webapp/src/widgets/TaskRunSummary.tsx b/mephisto/client/full/webapp/src/widgets/TaskRunSummary.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/TaskRunSummary.tsx
rename to mephisto/client/full/webapp/src/widgets/TaskRunSummary.tsx
diff --git a/mephisto/webapp/src/widgets/components/ArchitectSelect.tsx b/mephisto/client/full/webapp/src/widgets/components/ArchitectSelect.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/components/ArchitectSelect.tsx
rename to mephisto/client/full/webapp/src/widgets/components/ArchitectSelect.tsx
diff --git a/mephisto/webapp/src/widgets/components/BlueprintSelect.tsx b/mephisto/client/full/webapp/src/widgets/components/BlueprintSelect.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/components/BlueprintSelect.tsx
rename to mephisto/client/full/webapp/src/widgets/components/BlueprintSelect.tsx
diff --git a/mephisto/webapp/src/widgets/components/FormField.tsx b/mephisto/client/full/webapp/src/widgets/components/FormField.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/components/FormField.tsx
rename to mephisto/client/full/webapp/src/widgets/components/FormField.tsx
diff --git a/mephisto/webapp/src/widgets/components/OptionsForm.tsx b/mephisto/client/full/webapp/src/widgets/components/OptionsForm.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/components/OptionsForm.tsx
rename to mephisto/client/full/webapp/src/widgets/components/OptionsForm.tsx
diff --git a/mephisto/webapp/src/widgets/components/ProviderSelect.tsx b/mephisto/client/full/webapp/src/widgets/components/ProviderSelect.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/components/ProviderSelect.tsx
rename to mephisto/client/full/webapp/src/widgets/components/ProviderSelect.tsx
diff --git a/mephisto/webapp/src/widgets/components/RequesterForm.tsx b/mephisto/client/full/webapp/src/widgets/components/RequesterForm.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/components/RequesterForm.tsx
rename to mephisto/client/full/webapp/src/widgets/components/RequesterForm.tsx
diff --git a/mephisto/webapp/src/widgets/components/RequesterSelect.tsx b/mephisto/client/full/webapp/src/widgets/components/RequesterSelect.tsx
similarity index 100%
rename from mephisto/webapp/src/widgets/components/RequesterSelect.tsx
rename to mephisto/client/full/webapp/src/widgets/components/RequesterSelect.tsx
diff --git a/mephisto/webapp/tsconfig.json b/mephisto/client/full/webapp/tsconfig.json
similarity index 100%
rename from mephisto/webapp/tsconfig.json
rename to mephisto/client/full/webapp/tsconfig.json
diff --git a/mephisto/webapp/yarn.lock b/mephisto/client/full/webapp/yarn.lock
similarity index 100%
rename from mephisto/webapp/yarn.lock
rename to mephisto/client/full/webapp/yarn.lock
diff --git a/mephisto/client/review_server.py b/mephisto/client/review/review_server.py
similarity index 100%
rename from mephisto/client/review_server.py
rename to mephisto/client/review/review_server.py
diff --git a/mephisto/core/argparse_parser.py b/mephisto/core/argparse_parser.py
deleted file mode 100644
index a293e6952..000000000
--- a/mephisto/core/argparse_parser.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) Facebook, Inc. and its affiliates.
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""
-The following is a series of functions built to work with argparse
-version 1.1. They exist to be able to extract arguments out from
-an argparser for usage in other places. This allows Mephisto
-to be able to request the correct arguments from the frontend
-and construct valid argument strings from user input there.
-
-It relies on underlying implementation details of argparse (ick)
-and as such is only guaranteed stable for argparse 1.1
-"""
-
-import argparse
-from omegaconf import OmegaConf, MISSING, DictConfig
-from typing import Optional, Dict, Any, List, Tuple
-from dataclasses import fields, Field
-
-
-def get_dict_from_field(in_field: Field) -> Dict[str, Any]:
- """
- Extract all of the arguments from an argument group
- and return a dict mapping from argument dest to argument dict
- """
- found_type = "str"
- try:
- found_type = in_field.type.__name__
- except AttributeError:
- found_type = "unknown"
- return {
- "dest": in_field.name,
- "type": found_type,
- "default": in_field.default,
- "help": in_field.metadata.get("help"),
- "choices": in_field.metadata.get("choices"),
- "required": in_field.metadata.get("required", False),
- }
-
-
-def get_extra_argument_dicts(customizable_class: Any) -> List[Dict[str, Any]]:
- """
- Produce the argument dicts for the given customizable class
- (Blueprint, Architect, etc)
- """
- dict_fields = fields(customizable_class.ArgsClass)
- usable_fields = []
- group_field = None
- for f in dict_fields:
- if not f.name.startswith("_"):
- usable_fields.append(f)
- elif f.name == "_group":
- group_field = f
- parsed_fields = [get_dict_from_field(f) for f in usable_fields]
- help_text = ""
- if group_field is not None:
- help_text = group_field.metadata.get("help", "")
- return [{"desc": help_text, "args": {f["dest"]: f for f in parsed_fields}}]
-
-
-def parse_arg_dict(customizable_class: Any, args: Dict[str, Any]) -> DictConfig:
- """
- Get the ArgsClass for a class, then parse the given args using
- it. Return the DictConfig of the finalized namespace.
- """
- return OmegaConf.structured(customizable_class.ArgsClass(**args))
diff --git a/mephisto/core/config_handler.py b/mephisto/core/config_handler.py
index 0dc0b0ec2..f2d68bb0f 100644
--- a/mephisto/core/config_handler.py
+++ b/mephisto/core/config_handler.py
@@ -4,63 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-import os
-import yaml
-from typing import Dict, Any
-
-CORE_SECTION = "core"
-DATA_STORAGE_KEY = "main_data_directory"
-
-DEFAULT_CONFIG_FOLDER = os.path.expanduser("~/.mephisto/")
-DEFAULT_CONFIG_FILE = os.path.join(DEFAULT_CONFIG_FOLDER, "config.yml")
-OLD_DATA_CONFIG_LOC = os.path.join(
- os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "DATA_LOC"
+from mephisto.operations.config_handler import *
+import warnings
+
+warnings.warn(
+ "Imports from `mephisto.core` are going away soon. "
+ "Please replace all of your imports from mephisto.core.config_handler "
+ "to mephisto.operations.config_handler ",
+ PendingDeprecationWarning,
)
-
-
-def get_config() -> Dict[str, Any]:
- """Get the data out of the YAML config file"""
- with open(DEFAULT_CONFIG_FILE, "r") as config_file:
- return yaml.safe_load(config_file.read().strip())
-
-
-def write_config(config_data: Dict[str, Any]):
- """Write the given dictionary to the config yaml"""
- with open(DEFAULT_CONFIG_FILE, "w") as config_file:
- config_file.write(yaml.dump(config_data))
-
-
-def init_config() -> None:
- if not os.path.exists(DEFAULT_CONFIG_FOLDER):
- os.mkdir(DEFAULT_CONFIG_FOLDER)
-
- if os.path.exists(OLD_DATA_CONFIG_LOC):
- print(
- f"We are migrating Mephisto's configuration to a YAML file stored at {DEFAULT_CONFIG_FILE}"
- )
- with open(OLD_DATA_CONFIG_LOC, "r") as data_dir_file:
- loaded_data_dir = data_dir_file.read().strip()
- with open(DEFAULT_CONFIG_FILE, "w") as config_file:
- config_file.write(
- yaml.dump({CORE_SECTION: {DATA_STORAGE_KEY: loaded_data_dir}})
- )
- print(f"Removing DATA_LOC configuration file from {OLD_DATA_CONFIG_LOC}")
- os.unlink(OLD_DATA_CONFIG_LOC)
- elif not os.path.exists(DEFAULT_CONFIG_FILE):
- with open(DEFAULT_CONFIG_FILE, "w") as config_fp:
- config_fp.write(yaml.dump({CORE_SECTION: {}}))
-
-
-def add_config_arg(section: str, key: str, value: Any) -> None:
- """Add an argument to the YAML config, overwriting existing"""
- config = get_config()
- if section not in config:
- config[section] = {}
- config[section][key] = value
- write_config(config)
-
-
-def get_config_arg(section: str, key: str) -> Any:
- """Get an argument from the YAML config. Return None if it doesn't exist"""
- config = get_config()
- return config.get(section, {}).get(key, None)
diff --git a/mephisto/core/data_browser.py b/mephisto/core/data_browser.py
index 808f0aabf..10e0928fa 100644
--- a/mephisto/core/data_browser.py
+++ b/mephisto/core/data_browser.py
@@ -4,66 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.database import MephistoDB
-from mephisto.data_model.assignment import Unit
-from mephisto.data_model.task import TaskRun
-from mephisto.data_model.blueprint import AgentState
-from mephisto.data_model.agent import Agent
-
-from mephisto.core.local_database import LocalMephistoDB
-from mephisto.data_model.assignment_state import AssignmentState
-
-from typing import List, Optional, Any, Dict
-
-
-class DataBrowser:
- """
- Class with convenience methods for getting completed data
- back from runs to parse and manage with other scripts
- """
-
- def __init__(self, db=None):
- if db is None:
- db = LocalMephistoDB()
- self.db = db
-
- def _get_units_for_task_runs(self, task_runs: List[TaskRun]) -> List[Unit]:
- units = []
- for task_run in task_runs:
- assignments = task_run.get_assignments()
- for assignment in assignments:
- found_units = assignment.get_units()
- for unit in found_units:
- if unit.get_status() in [
- AssignmentState.COMPLETED,
- AssignmentState.ACCEPTED,
- AssignmentState.REJECTED,
- AssignmentState.SOFT_REJECTED,
- ]:
- units.append(unit)
- return units
-
- def get_units_for_task_name(self, task_name: str) -> List[Unit]:
- tasks = self.db.find_tasks(task_name=task_name)
- assert len(tasks) >= 1, f"No task found under name {task_name}"
- task_runs = self.db.find_task_runs(task_id=tasks[0].db_id)
- return self._get_units_for_task_runs(task_runs)
-
- def get_units_for_run_id(self, run_id: str) -> List[Unit]:
- task_run = TaskRun(self.db, run_id)
- return self._get_units_for_task_runs([task_run])
-
- def get_data_from_unit(self, unit: Unit) -> Dict[str, Any]:
- agent = unit.get_assigned_agent()
- assert (
- agent is not None
- ), f"Trying to get completed data from unassigned unit {unit}"
- return {
- "worker_id": agent.worker_id,
- "unit_id": unit.db_id,
- "assignment_id": unit.assignment_id,
- "status": agent.db_status,
- "data": agent.state.get_parsed_data(),
- "task_start": agent.state.get_task_start(),
- "task_end": agent.state.get_task_end(),
- }
+from mephisto.tools.data_browser import *
+import warnings
+
+warnings.warn(
+ "Imports from `mephisto.core` are going away soon. "
+ "Please replace all of your imports from mephisto.core.data_browser "
+ "to mephisto.tools.data_browser ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/core/hydra_config.py b/mephisto/core/hydra_config.py
index 7648756ba..eb9196bfd 100644
--- a/mephisto/core/hydra_config.py
+++ b/mephisto/core/hydra_config.py
@@ -4,48 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from hydra.core.config_store import ConfigStoreWithProvider
-from mephisto.data_model.blueprint import BlueprintArgs
-from mephisto.data_model.architect import ArchitectArgs
-from mephisto.data_model.crowd_provider import ProviderArgs
-from mephisto.data_model.task_config import TaskConfigArgs
-from dataclasses import dataclass, field
-from omegaconf import MISSING
-from typing import List, Any
-
-config = ConfigStoreWithProvider("mephisto")
-
-
-@dataclass
-class MephistoConfig:
- blueprint: BlueprintArgs = BlueprintArgs()
- provider: ProviderArgs = ProviderArgs()
- architect: ArchitectArgs = ArchitectArgs()
- task: TaskConfigArgs = TaskConfigArgs()
-
-
-@dataclass
-class RunScriptConfig:
- mephisto: MephistoConfig = MephistoConfig()
-
-
-def register_abstraction_config(name: str, node: Any, abstraction_type: str):
- config.store(
- name=name, node=node, group=f"mephisto/{abstraction_type}", package="_group_"
- )
-
-
-def initialize_named_configs():
- """
- Functionality to register the core mephisto configuration structure. Must be done in __init__
- """
- config.store(
- name="base_mephisto_config",
- node=MephistoConfig,
- group="mephisto",
- package="_group_",
- )
-
-
-def register_script_config(name: str, module: Any):
- config.store(name=name, node=module)
+from mephisto.operations.hydra_config import *
+import warnings
+
+warnings.warn(
+ "Imports from `mephisto.core` are going away soon. "
+ "Please replace all of your imports from mephisto.core.hydra_config "
+ "to mephisto.operations.hydra_config ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/core/local_database.py b/mephisto/core/local_database.py
index a7986c73a..249bd8bb6 100644
--- a/mephisto/core/local_database.py
+++ b/mephisto/core/local_database.py
@@ -4,1365 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.database import (
- MephistoDB,
- MephistoDBException,
- EntryAlreadyExistsException,
- EntryDoesNotExistException,
+from mephisto.abstractions.databases.local_database import *
+import warnings
+
+warnings.warn(
+ "Imports from `mephisto.core` are going away soon. "
+ "Please replace all of your imports from mephisto.core.local_database "
+ "to mephisto.abstractions.databases.local_database ",
+ PendingDeprecationWarning,
)
-from typing import Mapping, Optional, Any, List, Dict
-from mephisto.core.utils import get_data_dir
-from mephisto.core.registry import get_valid_provider_types
-from mephisto.data_model.agent import Agent, AgentState, OnboardingAgent
-from mephisto.data_model.assignment import Assignment, Unit, AssignmentState
-from mephisto.data_model.constants import NO_PROJECT_NAME
-from mephisto.data_model.project import Project
-from mephisto.data_model.requester import Requester
-from mephisto.data_model.task import Task, TaskRun
-from mephisto.data_model.worker import Worker
-from mephisto.data_model.qualification import Qualification, GrantedQualification
-
-import sqlite3
-from sqlite3 import Connection, Cursor
-import threading
-
-from mephisto.core.logger_core import get_logger
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-
-def nonesafe_int(in_string: Optional[str]) -> Optional[int]:
- """Cast input to an int or None"""
- if in_string is None:
- return None
- return int(in_string)
-
-
-def assert_valid_provider(provider_type: str) -> None:
- """Throw an assertion error if the given provider type is not valid"""
- valid_types = get_valid_provider_types()
- if provider_type not in valid_types:
- raise MephistoDBException(
- f"Supplied provider {provider_type} is not in supported list of providers {valid_types}."
- )
-
-
-def is_key_failure(e: sqlite3.IntegrityError) -> bool:
- """
- Return if the given error is representing a foreign key
- failure, where an insertion was expecting something to
- exist already in the DB but it didn't.
- """
- return str(e) == "FOREIGN KEY constraint failed"
-
-
-def is_unique_failure(e: sqlite3.IntegrityError) -> bool:
- """
- Return if the given error is representing a foreign key
- failure, where an insertion was expecting something to
- exist already in the DB but it didn't.
- """
- return str(e).startswith("UNIQUE constraint")
-
-
-CREATE_PROJECTS_TABLE = """CREATE TABLE IF NOT EXISTS projects (
- project_id INTEGER PRIMARY KEY AUTOINCREMENT,
- project_name TEXT NOT NULL UNIQUE,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
-);
-"""
-
-CREATE_TASKS_TABLE = """CREATE TABLE IF NOT EXISTS tasks (
- task_id INTEGER PRIMARY KEY AUTOINCREMENT,
- task_name TEXT NOT NULL UNIQUE,
- task_type TEXT NOT NULL,
- project_id INTEGER,
- parent_task_id INTEGER,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
- FOREIGN KEY (parent_task_id) REFERENCES tasks (task_id),
- FOREIGN KEY (project_id) REFERENCES projects (project_id)
-);
-"""
-
-CREATE_REQUESTERS_TABLE = """CREATE TABLE IF NOT EXISTS requesters (
- requester_id INTEGER PRIMARY KEY AUTOINCREMENT,
- requester_name TEXT NOT NULL UNIQUE,
- provider_type TEXT NOT NULL,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
-);
-"""
-
-CREATE_TASK_RUNS_TABLE = """
- CREATE TABLE IF NOT EXISTS task_runs (
- task_run_id INTEGER PRIMARY KEY AUTOINCREMENT,
- task_id INTEGER NOT NULL,
- requester_id INTEGER NOT NULL,
- init_params TEXT NOT NULL,
- is_completed BOOLEAN NOT NULL,
- provider_type TEXT NOT NULL,
- task_type TEXT NOT NULL,
- sandbox BOOLEAN NOT NULL,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
- FOREIGN KEY (task_id) REFERENCES tasks (task_id),
- FOREIGN KEY (requester_id) REFERENCES requesters (requester_id)
-);
-"""
-
-CREATE_ASSIGNMENTS_TABLE = """CREATE TABLE IF NOT EXISTS assignments (
- assignment_id INTEGER PRIMARY KEY AUTOINCREMENT,
- task_id INTEGER NOT NULL,
- task_run_id INTEGER NOT NULL,
- requester_id INTEGER NOT NULL,
- task_type TEXT NOT NULL,
- provider_type TEXT NOT NULL,
- sandbox BOOLEAN NOT NULL,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
- FOREIGN KEY (task_id) REFERENCES tasks (task_id),
- FOREIGN KEY (task_run_id) REFERENCES task_runs (task_run_id),
- FOREIGN KEY (requester_id) REFERENCES requesters (requester_id)
-);
-"""
-
-CREATE_UNITS_TABLE = """CREATE TABLE IF NOT EXISTS units (
- unit_id INTEGER PRIMARY KEY AUTOINCREMENT,
- assignment_id INTEGER NOT NULL,
- unit_index INTEGER NOT NULL,
- pay_amount FLOAT NOT NULL,
- provider_type TEXT NOT NULL,
- status TEXT NOT NULL,
- agent_id INTEGER,
- worker_id INTEGER,
- task_type TEXT NOT NULL,
- task_id INTEGER NOT NULL,
- task_run_id INTEGER NOT NULL,
- sandbox BOOLEAN NOT NULL,
- requester_id INTEGER NOT NULL,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
- FOREIGN KEY (assignment_id) REFERENCES assignments (assignment_id),
- FOREIGN KEY (agent_id) REFERENCES agents (agent_id),
- FOREIGN KEY (task_run_id) REFERENCES task_runs (task_run_id),
- FOREIGN KEY (task_id) REFERENCES tasks (task_id),
- FOREIGN KEY (requester_id) REFERENCES requesters (requester_id),
- FOREIGN KEY (worker_id) REFERENCES workers (worker_id),
- UNIQUE (assignment_id, unit_index)
-);
-"""
-
-CREATE_WORKERS_TABLE = """CREATE TABLE IF NOT EXISTS workers (
- worker_id INTEGER PRIMARY KEY AUTOINCREMENT,
- worker_name TEXT NOT NULL UNIQUE,
- provider_type TEXT NOT NULL,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
-);
-"""
-
-CREATE_AGENTS_TABLE = """CREATE TABLE IF NOT EXISTS agents (
- agent_id INTEGER PRIMARY KEY AUTOINCREMENT,
- worker_id INTEGER NOT NULL,
- unit_id INTEGER NOT NULL,
- task_id INTEGER NOT NULL,
- task_run_id INTEGER NOT NULL,
- assignment_id INTEGER NOT NULL,
- task_type TEXT NOT NULL,
- provider_type TEXT NOT NULL,
- status TEXT NOT NULL,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
- FOREIGN KEY (worker_id) REFERENCES workers (worker_id),
- FOREIGN KEY (unit_id) REFERENCES units (unit_id)
-);
-"""
-
-CREATE_ONBOARDING_AGENTS_TABLE = """CREATE TABLE IF NOT EXISTS onboarding_agents (
- onboarding_agent_id INTEGER PRIMARY KEY AUTOINCREMENT,
- worker_id INTEGER NOT NULL,
- task_id INTEGER NOT NULL,
- task_run_id INTEGER NOT NULL,
- task_type TEXT NOT NULL,
- status TEXT NOT NULL,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
- FOREIGN KEY (worker_id) REFERENCES workers (worker_id),
- FOREIGN KEY (task_run_id) REFERENCES task_runs (task_run_id)
-);
-"""
-
-CREATE_QUALIFICATIONS_TABLE = """CREATE TABLE IF NOT EXISTS qualifications (
- qualification_id INTEGER PRIMARY KEY AUTOINCREMENT,
- qualification_name TEXT NOT NULL UNIQUE,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
-);
-"""
-
-CREATE_GRANTED_QUALIFICATIONS_TABLE = """
-CREATE TABLE IF NOT EXISTS granted_qualifications (
- granted_qualification_id INTEGER PRIMARY KEY AUTOINCREMENT,
- worker_id INTEGER NOT NULL,
- qualification_id INTEGER NOT NULL,
- value INTEGER NOT NULL,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP,
- FOREIGN KEY (worker_id) REFERENCES workers (worker_id),
- FOREIGN KEY (qualification_id) REFERENCES qualifications (qualification_id),
- UNIQUE (worker_id, qualification_id)
-);
-"""
-
-
-class StringIDRow(sqlite3.Row):
- def __getitem__(self, key: str) -> Any:
- val = super().__getitem__(key)
- if key.endswith("_id") and val is not None:
- return str(val)
- else:
- return val
-
-
-# TODO(101) find_x queries are pretty slow right now, as we query the same table once to get
-# all of the rows, but only select the ids, then we later construct them individually,
-# making a second set of requests.
-# It would be better to expose an init param for DB Objects that takes in the full row
-# and inits with that if provided, and queries the database if not.
-class LocalMephistoDB(MephistoDB):
- """
- Local database for core Mephisto data storage, the LocalMephistoDatabase handles
- grounding all of the python interactions with the Mephisto architecture to
- local files and a database.
- """
-
- def __init__(self, database_path=None):
- logger.debug(f"database path: {database_path}")
- self.conn: Dict[int, Connection] = {}
- self.table_access_condition = threading.Condition()
- super().__init__(database_path)
-
- def _get_connection(self) -> Connection:
- """Returns a singular database connection to be shared amongst all
- calls for a given thread.
- """
- # TODO(101) is there a problem with having just one db connection?
- # Will this cause bugs with failed commits?
- curr_thread = threading.get_ident()
- if curr_thread not in self.conn or self.conn[curr_thread] is None:
- try:
- conn = sqlite3.connect(self.db_path)
- conn.row_factory = StringIDRow
- self.conn[curr_thread] = conn
- except sqlite3.Error as e:
- raise MephistoDBException(e)
- return self.conn[curr_thread]
-
- def shutdown(self) -> None:
- """Close all open connections"""
- with self.table_access_condition:
- curr_thread = threading.get_ident()
- self.conn[curr_thread].close()
- del self.conn[curr_thread]
-
- def init_tables(self) -> None:
- """
- Run all the table creation SQL queries to ensure the expected tables exist
- """
- # TODO(#93) maybe raise flag when the schema of existing tables isn't what we expect
- # it to be?
- # "How to know that schema changes?"
- # logger.warning("some message")
- with self.table_access_condition:
- conn = self._get_connection()
- conn.execute("PRAGMA foreign_keys = 1")
- with conn:
- c = conn.cursor()
- c.execute(CREATE_PROJECTS_TABLE)
- c.execute(CREATE_TASKS_TABLE)
- c.execute(CREATE_REQUESTERS_TABLE)
- c.execute(CREATE_TASK_RUNS_TABLE)
- c.execute(CREATE_ASSIGNMENTS_TABLE)
- c.execute(CREATE_UNITS_TABLE)
- c.execute(CREATE_WORKERS_TABLE)
- c.execute(CREATE_AGENTS_TABLE)
- c.execute(CREATE_QUALIFICATIONS_TABLE)
- c.execute(CREATE_GRANTED_QUALIFICATIONS_TABLE)
- c.execute(CREATE_ONBOARDING_AGENTS_TABLE)
-
- def __get_one_by_id(
- self, table_name: str, id_name: str, db_id: str
- ) -> Mapping[str, Any]:
- """
- Try to request the row for the given table and entry,
- raise EntryDoesNotExistException if it isn't present
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- f"""
- SELECT * FROM {table_name}
- WHERE ({id_name} = ?)
- """,
- (int(db_id),),
- )
- results = c.fetchall()
- if len(results) != 1:
- raise EntryDoesNotExistException(
- f"Table {table_name} has no {id_name} {db_id}"
- )
- return results[0]
-
- def new_project(self, project_name: str) -> str:
- """
- Create a new project with the given project name. Raise EntryAlreadyExistsException if a project
- with this name has already been created.
- """
- if project_name in [NO_PROJECT_NAME, ""]:
- raise MephistoDBException(f'Invalid project name "{project_name}')
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- "INSERT INTO projects(project_name) VALUES (?);", (project_name,)
- )
- project_id = str(c.lastrowid)
- return project_id
- except sqlite3.IntegrityError as e:
- if is_key_failure(e):
- raise EntryDoesNotExistException()
- elif is_unique_failure(e):
- raise EntryAlreadyExistsException(
- f"Project {project_name} already exists"
- )
- raise MephistoDBException(e)
-
- def get_project(self, project_id: str) -> Mapping[str, Any]:
- """
- Return project's fields by the given project_id, raise EntryDoesNotExistException
- if no id exists in projects
-
- Returns a SQLite Row object with the expected fields
- """
- return self.__get_one_by_id("projects", "project_id", project_id)
-
- def find_projects(self, project_name: Optional[str] = None) -> List[Project]:
- """
- Try to find any project that matches the above. When called with no arguments,
- return all projects.
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from projects
- WHERE (?1 IS NULL OR project_name = ?1)
- """,
- (project_name,),
- )
- rows = c.fetchall()
- return [Project(self, str(r["project_id"]), row=r) for r in rows]
-
- def new_task(
- self,
- task_name: str,
- task_type: str,
- project_id: Optional[str] = None,
- parent_task_id: Optional[str] = None,
- ) -> str:
- """
- Create a new task with the given task name. Raise EntryAlreadyExistsException if a task
- with this name has already been created.
- """
- if task_name in [""]:
- raise MephistoDBException(f'Invalid task name "{task_name}')
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- """INSERT INTO tasks(
- task_name,
- task_type,
- project_id,
- parent_task_id
- ) VALUES (?, ?, ?, ?);""",
- (
- task_name,
- task_type,
- nonesafe_int(project_id),
- nonesafe_int(parent_task_id),
- ),
- )
- task_id = str(c.lastrowid)
- return task_id
- except sqlite3.IntegrityError as e:
- if is_key_failure(e):
- raise EntryDoesNotExistException(e)
- elif is_unique_failure(e):
- raise EntryAlreadyExistsException(e)
- raise MephistoDBException(e)
-
- def get_task(self, task_id: str) -> Mapping[str, Any]:
- """
- Return task's fields by task_id, raise EntryDoesNotExistException if no id exists
- in tasks
-
- Returns a SQLite Row object with the expected fields
- """
- return self.__get_one_by_id("tasks", "task_id", task_id)
-
- def find_tasks(
- self,
- task_name: Optional[str] = None,
- project_id: Optional[str] = None,
- parent_task_id: Optional[str] = None,
- ) -> List[Task]:
- """
- Try to find any task that matches the above. When called with no arguments,
- return all tasks.
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from tasks
- WHERE (?1 IS NULL OR task_name = ?1)
- AND (?2 IS NULL OR project_id = ?2)
- AND (?3 IS NULL OR parent_task_id = ?3)
- """,
- (task_name, nonesafe_int(project_id), nonesafe_int(parent_task_id)),
- )
- rows = c.fetchall()
- return [Task(self, str(r["task_id"]), row=r) for r in rows]
-
- def update_task(
- self,
- task_id: str,
- task_name: Optional[str] = None,
- project_id: Optional[str] = None,
- ) -> None:
- """
- Update the given task with the given parameters if possible, raise appropriate exception otherwise.
-
- Tasks can only be updated if no runs exist for this task yet, otherwise there's too much state
- and we shouldn't make changes.
- """
- if len(self.find_task_runs(task_id=task_id)) != 0:
- raise MephistoDBException(
- "Cannot edit a task that has already been run, for risk of data corruption."
- )
- if task_name in [""]:
- raise MephistoDBException(f'Invalid task name "{task_name}')
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- if task_name is not None:
- c.execute(
- """
- UPDATE tasks
- SET task_name = ?
- WHERE task_id = ?;
- """,
- (task_name, int(task_id)),
- )
- if project_id is not None:
- c.execute(
- """
- UPDATE tasks
- SET project_id = ?
- WHERE task_id = ?;
- """,
- (int(project_id), int(task_id)),
- )
- except sqlite3.IntegrityError as e:
- if is_key_failure(e):
- raise EntryDoesNotExistException(e)
- elif is_unique_failure(e):
- raise EntryAlreadyExistsException(
- f"Task name {task_name} is already in use"
- )
- raise MephistoDBException(e)
-
- def new_task_run(
- self,
- task_id: str,
- requester_id: str,
- init_params: str,
- provider_type: str,
- task_type: str,
- sandbox: bool = True,
- ) -> str:
- """Create a new task_run for the given task."""
- with self.table_access_condition, self._get_connection() as conn:
- # Ensure given ids are valid
- c = conn.cursor()
- try:
- c.execute(
- """
- INSERT INTO task_runs(
- task_id,
- requester_id,
- init_params,
- is_completed,
- provider_type,
- task_type,
- sandbox
- )
- VALUES (?, ?, ?, ?, ?, ?, ?);""",
- (
- int(task_id),
- int(requester_id),
- init_params,
- False,
- provider_type,
- task_type,
- sandbox,
- ),
- )
- task_run_id = str(c.lastrowid)
- return task_run_id
- except sqlite3.IntegrityError as e:
- if is_key_failure(e):
- raise EntryDoesNotExistException(e)
- raise MephistoDBException(e)
-
- def get_task_run(self, task_run_id: str) -> Mapping[str, Any]:
- """
- Return the given task_run's fields by task_run_id, raise EntryDoesNotExistException if no id exists
- in task_runs.
-
- Returns a SQLite Row object with the expected fields
- """
- return self.__get_one_by_id("task_runs", "task_run_id", task_run_id)
-
- def find_task_runs(
- self,
- task_id: Optional[str] = None,
- requester_id: Optional[str] = None,
- is_completed: Optional[bool] = None,
- ) -> List[TaskRun]:
- """
- Try to find any task_run that matches the above. When called with no arguments,
- return all task_runs.
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from task_runs
- WHERE (?1 IS NULL OR task_id = ?1)
- AND (?2 IS NULL OR requester_id = ?2)
- AND (?3 IS NULL OR is_completed = ?3)
- """,
- (nonesafe_int(task_id), nonesafe_int(requester_id), is_completed),
- )
- rows = c.fetchall()
- return [TaskRun(self, str(r["task_run_id"]), row=r) for r in rows]
-
- def update_task_run(self, task_run_id: str, is_completed: bool):
- """
- Update a task run. At the moment, can only update completion status
- """
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- """
- UPDATE task_runs
- SET is_completed = ?
- WHERE task_run_id = ?;
- """,
- (is_completed, int(task_run_id)),
- )
- except sqlite3.IntegrityError as e:
- if is_key_failure(e):
- raise EntryDoesNotExistException(e)
- raise MephistoDBException(e)
-
- def new_assignment(
- self,
- task_id: str,
- task_run_id: str,
- requester_id: str,
- task_type: str,
- provider_type: str,
- sandbox: bool = True,
- ) -> str:
- """Create a new assignment for the given task"""
- # Ensure task run exists
- self.get_task_run(task_run_id)
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- c.execute(
- """
- INSERT INTO assignments(
- task_id,
- task_run_id,
- requester_id,
- task_type,
- provider_type,
- sandbox
- ) VALUES (?, ?, ?, ?, ?, ?);""",
- (
- int(task_id),
- int(task_run_id),
- int(requester_id),
- task_type,
- provider_type,
- sandbox,
- ),
- )
- assignment_id = str(c.lastrowid)
- return assignment_id
-
- def get_assignment(self, assignment_id: str) -> Mapping[str, Any]:
- """
- Return assignment's fields by assignment_id, raise EntryDoesNotExistException
- if no id exists in tasks
-
- Returns a SQLite Row object with the expected fields
- """
- return self.__get_one_by_id("assignments", "assignment_id", assignment_id)
-
- def find_assignments(
- self,
- task_run_id: Optional[str] = None,
- task_id: Optional[str] = None,
- requester_id: Optional[str] = None,
- task_type: Optional[str] = None,
- provider_type: Optional[str] = None,
- sandbox: Optional[bool] = None,
- ) -> List[Assignment]:
- """
- Try to find any task that matches the above. When called with no arguments,
- return all tasks.
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from assignments
- WHERE (?1 IS NULL OR task_run_id = ?1)
- AND (?2 IS NULL OR task_id = ?2)
- AND (?3 IS NULL OR requester_id = ?3)
- AND (?4 IS NULL OR task_type = ?4)
- AND (?5 IS NULL OR provider_type = ?5)
- AND (?6 IS NULL OR sandbox = ?6)
- """,
- (
- nonesafe_int(task_run_id),
- nonesafe_int(task_id),
- nonesafe_int(requester_id),
- task_type,
- provider_type,
- sandbox,
- ),
- )
- rows = c.fetchall()
- return [Assignment(self, str(r["assignment_id"]), row=r) for r in rows]
-
- def new_unit(
- self,
- task_id: str,
- task_run_id: str,
- requester_id: str,
- assignment_id: str,
- unit_index: int,
- pay_amount: float,
- provider_type: str,
- task_type: str,
- sandbox: bool = True,
- ) -> str:
- """
- Create a new unit with the given index. Raises EntryAlreadyExistsException
- if there is already a unit for the given assignment with the given index.
- """
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- """INSERT INTO units(
- task_id,
- task_run_id,
- requester_id,
- assignment_id,
- unit_index,
- pay_amount,
- provider_type,
- task_type,
- sandbox,
- status
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);""",
- (
- int(task_id),
- int(task_run_id),
- int(requester_id),
- int(assignment_id),
- unit_index,
- pay_amount,
- provider_type,
- task_type,
- sandbox,
- AssignmentState.CREATED,
- ),
- )
- unit_id = str(c.lastrowid)
- return unit_id
- except sqlite3.IntegrityError as e:
- if is_key_failure(e):
- raise EntryDoesNotExistException(e)
- elif is_unique_failure(e):
- raise EntryAlreadyExistsException(e)
- raise MephistoDBException(e)
-
- def get_unit(self, unit_id: str) -> Mapping[str, Any]:
- """
- Return unit's fields by unit_id, raise EntryDoesNotExistException
- if no id exists in units
-
- Returns a SQLite Row object with the expected fields
- """
- return self.__get_one_by_id("units", "unit_id", unit_id)
-
- def find_units(
- self,
- task_id: Optional[str] = None,
- task_run_id: Optional[str] = None,
- requester_id: Optional[str] = None,
- assignment_id: Optional[str] = None,
- unit_index: Optional[int] = None,
- provider_type: Optional[str] = None,
- task_type: Optional[str] = None,
- agent_id: Optional[str] = None,
- worker_id: Optional[str] = None,
- sandbox: Optional[bool] = None,
- status: Optional[str] = None,
- ) -> List[Unit]:
- """
- Try to find any unit that matches the above. When called with no arguments,
- return all units.
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from units
- WHERE (?1 IS NULL OR task_id = ?1)
- AND (?2 IS NULL OR task_run_id = ?2)
- AND (?3 IS NULL OR requester_id = ?3)
- AND (?4 IS NULL OR assignment_id = ?4)
- AND (?5 IS NULL OR unit_index = ?5)
- AND (?6 IS NULL OR provider_type = ?6)
- AND (?7 IS NULL OR task_type = ?7)
- AND (?8 IS NULL OR agent_id = ?8)
- AND (?9 IS NULL OR worker_id = ?9)
- AND (?10 IS NULL OR sandbox = ?10)
- AND (?11 IS NULL OR status = ?11)
- """,
- (
- nonesafe_int(task_id),
- nonesafe_int(task_run_id),
- nonesafe_int(requester_id),
- nonesafe_int(assignment_id),
- unit_index,
- provider_type,
- task_type,
- nonesafe_int(agent_id),
- nonesafe_int(worker_id),
- sandbox,
- status,
- ),
- )
- rows = c.fetchall()
- return [Unit(self, str(r["unit_id"]), row=r) for r in rows]
-
- def clear_unit_agent_assignment(self, unit_id: str) -> None:
- """
- Update the given unit by removing the agent that is assigned to it, thus updating
- the status to assignable.
- """
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- """
- UPDATE units
- SET agent_id = ?, worker_id = ?, status = ?
- WHERE unit_id = ?;
- """,
- (None, None, AssignmentState.LAUNCHED, int(unit_id)),
- )
- except sqlite3.IntegrityError as e:
- if is_key_failure(e):
- raise EntryDoesNotExistException(
- f"Given unit_id {unit_id} not found in the database"
- )
- raise MephistoDBException(e)
-
- def update_unit(
- self, unit_id: str, agent_id: Optional[str] = None, status: Optional[str] = None
- ) -> None:
- """
- Update the given task with the given parameters if possible, raise appropriate exception otherwise.
- """
- if status not in AssignmentState.valid_unit():
- raise MephistoDBException(f"Invalid status {status} for a unit")
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- if agent_id is not None:
- c.execute(
- """
- UPDATE units
- SET agent_id = ?
- WHERE unit_id = ?;
- """,
- (int(agent_id), int(unit_id)),
- )
- if status is not None:
- c.execute(
- """
- UPDATE units
- SET status = ?
- WHERE unit_id = ?;
- """,
- (status, int(unit_id)),
- )
- except sqlite3.IntegrityError as e:
- if is_key_failure(e):
- raise EntryDoesNotExistException(
- f"Given unit_id {unit_id} not found in the database"
- )
- raise MephistoDBException(e)
-
- def new_requester(self, requester_name: str, provider_type: str) -> str:
- """
- Create a new requester with the given name and provider type.
- Raises EntryAlreadyExistsException
- if there is already a requester with this name
- """
- if requester_name == "":
- raise MephistoDBException("Empty string is not a valid requester name")
- assert_valid_provider(provider_type)
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- "INSERT INTO requesters(requester_name, provider_type) VALUES (?, ?);",
- (requester_name, provider_type),
- )
- requester_id = str(c.lastrowid)
- return requester_id
- except sqlite3.IntegrityError as e:
- if is_unique_failure(e):
- raise EntryAlreadyExistsException()
- raise MephistoDBException(e)
-
- def get_requester(self, requester_id: str) -> Mapping[str, Any]:
- """
- Return requester's fields by requester_id, raise EntryDoesNotExistException
- if no id exists in requesters
-
- Returns a SQLite Row object with the expected fields
- """
- return self.__get_one_by_id("requesters", "requester_id", requester_id)
-
- def find_requesters(
- self, requester_name: Optional[str] = None, provider_type: Optional[str] = None
- ) -> List[Requester]:
- """
- Try to find any requester that matches the above. When called with no arguments,
- return all requesters.
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from requesters
- WHERE (?1 IS NULL OR requester_name = ?1)
- AND (?2 IS NULL OR provider_type = ?2)
- """,
- (requester_name, provider_type),
- )
- rows = c.fetchall()
- return [Requester(self, str(r["requester_id"]), row=r) for r in rows]
-
- def new_worker(self, worker_name: str, provider_type: str) -> str:
- """
- Create a new worker with the given name and provider type.
- Raises EntryAlreadyExistsException
- if there is already a worker with this name
-
- worker_name should be the unique identifier by which the crowd provider
- is using to keep track of this worker
- """
- if worker_name == "":
- raise MephistoDBException("Empty string is not a valid requester name")
- assert_valid_provider(provider_type)
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- "INSERT INTO workers(worker_name, provider_type) VALUES (?, ?);",
- (worker_name, provider_type),
- )
- worker_id = str(c.lastrowid)
- return worker_id
- except sqlite3.IntegrityError as e:
- if is_unique_failure(e):
- raise EntryAlreadyExistsException()
- raise MephistoDBException(e)
-
- def get_worker(self, worker_id: str) -> Mapping[str, Any]:
- """
- Return worker's fields by worker_id, raise EntryDoesNotExistException
- if no id exists in workers
-
- Returns a SQLite Row object with the expected fields
- """
- return self.__get_one_by_id("workers", "worker_id", worker_id)
-
- def find_workers(
- self, worker_name: Optional[str] = None, provider_type: Optional[str] = None
- ) -> List[Worker]:
- """
- Try to find any worker that matches the above. When called with no arguments,
- return all workers.
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from workers
- WHERE (?1 IS NULL OR worker_name = ?1)
- AND (?2 IS NULL OR provider_type = ?2)
- """,
- (worker_name, provider_type),
- )
- rows = c.fetchall()
- return [Worker(self, str(r["worker_id"]), row=r) for r in rows]
-
- def new_agent(
- self,
- worker_id: str,
- unit_id: str,
- task_id: str,
- task_run_id: str,
- assignment_id: str,
- task_type: str,
- provider_type: str,
- ) -> str:
- """
- Create a new agent with the given name and provider type.
- Raises EntryAlreadyExistsException
- if there is already a agent with this name
- """
- assert_valid_provider(provider_type)
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- """INSERT INTO agents(
- worker_id,
- unit_id,
- task_id,
- task_run_id,
- assignment_id,
- task_type,
- provider_type,
- status
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?);""",
- (
- int(worker_id),
- int(unit_id),
- int(task_id),
- int(task_run_id),
- int(assignment_id),
- task_type,
- provider_type,
- AgentState.STATUS_NONE,
- ),
- )
- agent_id = str(c.lastrowid)
- c.execute(
- """
- UPDATE units
- SET status = ?, agent_id = ?, worker_id = ?
- WHERE unit_id = ?;
- """,
- (
- AssignmentState.ASSIGNED,
- int(agent_id),
- int(worker_id),
- int(unit_id),
- ),
- )
- return agent_id
- except sqlite3.IntegrityError as e:
- if is_key_failure(e):
- raise EntryDoesNotExistException(e)
- raise MephistoDBException(e)
-
- def get_agent(self, agent_id: str) -> Mapping[str, Any]:
- """
- Return agent's fields by agent_id, raise EntryDoesNotExistException
- if no id exists in agents
-
- Returns a SQLite Row object with the expected fields
- """
- return self.__get_one_by_id("agents", "agent_id", agent_id)
-
- def update_agent(self, agent_id: str, status: Optional[str] = None) -> None:
- """
- Update the given task with the given parameters if possible, raise appropriate exception otherwise.
- """
- if status not in AgentState.valid():
- raise MephistoDBException(f"Invalid status {status} for an agent")
-
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- c.execute(
- """
- UPDATE agents
- SET status = ?
- WHERE agent_id = ?;
- """,
- (status, int(agent_id)),
- )
-
- def find_agents(
- self,
- status: Optional[str] = None,
- unit_id: Optional[str] = None,
- worker_id: Optional[str] = None,
- task_id: Optional[str] = None,
- task_run_id: Optional[str] = None,
- assignment_id: Optional[str] = None,
- task_type: Optional[str] = None,
- provider_type: Optional[str] = None,
- ) -> List[Agent]:
- """
- Try to find any agent that matches the above. When called with no arguments,
- return all agents.
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from agents
- WHERE (?1 IS NULL OR status = ?1)
- AND (?2 IS NULL OR unit_id = ?2)
- AND (?3 IS NULL OR worker_id = ?3)
- AND (?4 IS NULL OR task_id = ?4)
- AND (?5 IS NULL OR task_run_id = ?5)
- AND (?6 IS NULL OR assignment_id = ?6)
- AND (?7 IS NULL OR task_type = ?7)
- AND (?8 IS NULL OR provider_type = ?8)
- """,
- (
- status,
- nonesafe_int(unit_id),
- nonesafe_int(worker_id),
- nonesafe_int(task_id),
- nonesafe_int(task_run_id),
- nonesafe_int(assignment_id),
- task_type,
- provider_type,
- ),
- )
- rows = c.fetchall()
- return [Agent(self, str(r["agent_id"]), row=r) for r in rows]
-
- def make_qualification(self, qualification_name: str) -> str:
- """
- Make a new qualification, throws an error if a qualification by the given name
- already exists. Return the id for the qualification.
- """
- if qualification_name == "":
- raise MephistoDBException("Empty string is not a valid qualification name")
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- "INSERT INTO qualifications(qualification_name) VALUES (?);",
- (qualification_name,),
- )
- qualification_id = str(c.lastrowid)
- return qualification_id
- except sqlite3.IntegrityError as e:
- if is_unique_failure(e):
- raise EntryAlreadyExistsException()
- raise MephistoDBException(e)
-
- def find_qualifications(
- self, qualification_name: Optional[str] = None
- ) -> List[Qualification]:
- """
- Find a qualification. If no name is supplied, returns all qualifications.
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from qualifications
- WHERE (?1 IS NULL OR qualification_name = ?1)
- """,
- (qualification_name,),
- )
- rows = c.fetchall()
- return [
- Qualification(self, str(r["qualification_id"]), row=r) for r in rows
- ]
-
- def get_qualification(self, qualification_id: str) -> Mapping[str, Any]:
- """
- Return qualification's fields by qualification_id, raise
- EntryDoesNotExistException if no id exists in qualifications
-
- See Qualification for the expected fields for the returned mapping
- """
- return self.__get_one_by_id(
- "qualifications", "qualification_id", qualification_id
- )
-
- def _delete_qualification(self, qualification_name: str) -> None:
- """
- Remove this qualification from all workers that have it, then delete the qualification
- """
- qualifications = self.find_qualifications(qualification_name=qualification_name)
- if len(qualifications) == 0:
- raise EntryDoesNotExistException(
- f"No qualification found by name {qualification_name}"
- )
- qualification = qualifications[0]
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- c.execute(
- "DELETE FROM granted_qualifications WHERE qualification_id = ?1;",
- (int(qualification.db_id),),
- )
- c.execute(
- "DELETE FROM qualifications WHERE qualification_name = ?1;",
- (qualification_name,),
- )
-
- def grant_qualification(
- self, qualification_id: str, worker_id: str, value: int = 1
- ) -> None:
- """
- Grant a worker the given qualification. Update the qualification value if it
- already exists
- """
- # Note that better syntax exists for python 3.8+, as described in PR #223
- try:
- # Update existing entry
- qual_row = self.get_granted_qualification(qualification_id, worker_id)
- with self.table_access_condition, self._get_connection() as conn:
- if value != qual_row["value"]:
- c = conn.cursor()
- c.execute(
- """
- UPDATE granted_qualifications
- SET value = ?
- WHERE (qualification_id = ?)
- AND (worker_id = ?);
- """,
- (value, int(qualification_id), int(worker_id)),
- )
- conn.commit()
- return None
- except EntryDoesNotExistException:
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- """
- INSERT INTO granted_qualifications(
- qualification_id,
- worker_id,
- value
- ) VALUES (?, ?, ?);
- """,
- (int(qualification_id), int(worker_id), value),
- )
- qualification_id = str(c.lastrowid)
- conn.commit()
- return None
- except sqlite3.IntegrityError as e:
- if is_unique_failure(e):
- raise EntryAlreadyExistsException()
- raise MephistoDBException(e)
-
- def check_granted_qualifications(
- self,
- qualification_id: Optional[str] = None,
- worker_id: Optional[str] = None,
- value: Optional[int] = None,
- ) -> List[GrantedQualification]:
- """
- Find granted qualifications that match the given specifications
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from granted_qualifications
- WHERE (?1 IS NULL OR qualification_id = ?1)
- AND (?2 IS NULL OR worker_id = ?2)
- AND (?3 IS NULL OR value = ?3)
- """,
- (qualification_id, worker_id, value),
- )
- rows = c.fetchall()
- return [
- GrantedQualification(
- self, str(r["qualification_id"]), str(r["worker_id"])
- )
- for r in rows
- ]
-
- # TODO(101) these should not be optional
- def get_granted_qualification(
- self, qualification_id: Optional[str] = None, worker_id: Optional[str] = None
- ) -> Mapping[str, Any]:
- """
- Return the granted qualification in the database between the given
- worker and qualification id
-
- See GrantedQualification for the expected fields for the returned mapping
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- f"""
- SELECT * FROM granted_qualifications
- WHERE (qualification_id = ?1)
- AND (worker_id = ?2);
- """,
- (nonesafe_int(qualification_id), nonesafe_int(worker_id)),
- )
- results = c.fetchall()
- if len(results) != 1:
- raise EntryDoesNotExistException(
- f"No such granted qualification {qualification_id}, {worker_id}"
- )
- return results[0]
-
- def revoke_qualification(self, qualification_id: str, worker_id: str) -> None:
- """
- Remove the given qualification from the given worker
- """
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- c.execute(
- """DELETE FROM granted_qualifications
- WHERE (qualification_id = ?1)
- AND (worker_id = ?2);
- """,
- (int(qualification_id), int(worker_id)),
- )
-
- def new_onboarding_agent(
- self, worker_id: str, task_id: str, task_run_id: str, task_type: str
- ) -> str:
- """
- Create a new agent for the given worker id to assign to the given unit
- Raises EntryAlreadyExistsException
- """
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- try:
- c.execute(
- """INSERT INTO onboarding_agents(
- worker_id,
- task_id,
- task_run_id,
- task_type,
- status
- ) VALUES (?, ?, ?, ?, ?);""",
- (
- int(worker_id),
- int(task_id),
- int(task_run_id),
- task_type,
- AgentState.STATUS_NONE,
- ),
- )
- return str(c.lastrowid)
- except sqlite3.IntegrityError as e:
- if is_key_failure(e):
- raise EntryDoesNotExistException(e)
- raise MephistoDBException(e)
-
- def get_onboarding_agent(self, onboarding_agent_id: str) -> Mapping[str, Any]:
- """
- Return onboarding agent's fields by onboarding_agent_id, raise
- EntryDoesNotExistException if no id exists in onboarding_agents
-
- Returns a SQLite Row object with the expected fields
- """
- return self.__get_one_by_id(
- "onboarding_agents", "onboarding_agent_id", onboarding_agent_id
- )
-
- def update_onboarding_agent(
- self, onboarding_agent_id: str, status: Optional[str] = None
- ) -> None:
- """
- Update the given onboarding agent with the given parameters if possible,
- raise appropriate exception otherwise.
- """
- if status not in AgentState.valid():
- raise MephistoDBException(f"Invalid status {status} for an agent")
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- if status is not None:
- c.execute(
- """
- UPDATE onboarding_agents
- SET status = ?
- WHERE onboarding_agent_id = ?;
- """,
- (status, int(onboarding_agent_id)),
- )
-
- def find_onboarding_agents(
- self,
- status: Optional[str] = None,
- worker_id: Optional[str] = None,
- task_id: Optional[str] = None,
- task_run_id: Optional[str] = None,
- task_type: Optional[str] = None,
- ) -> List[OnboardingAgent]:
- """
- Try to find any onboarding agent that matches the above. When called with no arguments,
- return all onboarding agents.
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from onboarding_agents
- WHERE (?1 IS NULL OR status = ?1)
- AND (?2 IS NULL OR worker_id = ?2)
- AND (?3 IS NULL OR task_id = ?3)
- AND (?4 IS NULL OR task_run_id = ?4)
- AND (?5 IS NULL OR task_type = ?5)
- """,
- (
- status,
- nonesafe_int(worker_id),
- nonesafe_int(task_id),
- nonesafe_int(task_run_id),
- task_type,
- ),
- )
- rows = c.fetchall()
- return [
- OnboardingAgent(self, str(r["onboarding_agent_id"]), row=r)
- for r in rows
- ]
diff --git a/mephisto/core/logger_core.py b/mephisto/core/logger_core.py
index 85a1844e1..f82ca7c13 100644
--- a/mephisto/core/logger_core.py
+++ b/mephisto/core/logger_core.py
@@ -4,52 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-import logging
-
-loggers = {}
-
-
-def get_logger(
- name: str, verbose: bool = True, log_file: str = None, level: str = "info"
-) -> logging.Logger:
- """
- Gets the logger corresponds to each module
- Parameters:
- name (string): the module name (__name__).
- verbose (bool): INFO level activated if True.
- log_file (string): path for saving logs locally.
- level (string): logging level. Values options: [info, debug, warning, error, critical].
-
- Returns:
- logger (logging.Logger): the corresponding logger to the given module name.
- """
-
- global loggers
- if loggers.get(name):
- return loggers.get(name)
- else:
- logger = logging.getLogger(name)
-
- level_dict = {
- "info": logging.INFO,
- "debug": logging.DEBUG,
- "warning": logging.WARNING,
- "error": logging.ERROR,
- "critical": logging.CRITICAL,
- }
-
- logger.setLevel(logging.INFO if verbose else logging.DEBUG)
- logger.setLevel(level_dict[level.lower()])
- if log_file is None:
- handler = logging.StreamHandler()
- else:
- handler = logging.RotatingFileHandler(log_file)
- formatter = logging.Formatter(
- "[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)5s - %(message)s",
- "%m-%d %H:%M:%S",
- )
-
- handler.setFormatter(formatter)
- logger.addHandler(handler)
- loggers[name] = logger
- return logger
+from mephisto.operations.logger_core import *
+import warnings
+
+warnings.warn(
+ "Imports from `mephisto.core` are going away soon. "
+ "Please replace all of your imports from mephisto.core.logger_core "
+ "to mephisto.operations.logger_core ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/core/operator.py b/mephisto/core/operator.py
index 76f461a14..f19a8e0ff 100644
--- a/mephisto/core/operator.py
+++ b/mephisto/core/operator.py
@@ -4,415 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-
-import unittest
-import shutil
-import json
-import os
-import tempfile
-import time
-import threading
-import shlex
-import traceback
-
-from argparse import ArgumentParser
-
-from mephisto.core.supervisor import Supervisor, Job
-
-from typing import Dict, Optional, List, Any, Tuple, NamedTuple, Type, TYPE_CHECKING
-from mephisto.data_model.task_config import TaskConfig
-from mephisto.data_model.task import TaskRun
-from mephisto.data_model.requester import Requester
-from mephisto.data_model.blueprint import OnboardingRequired, SharedTaskState
-from mephisto.data_model.database import MephistoDB, EntryDoesNotExistException
-from mephisto.data_model.qualification import make_qualification_dict, QUAL_NOT_EXIST
-from mephisto.core.task_launcher import TaskLauncher
-from mephisto.core.registry import (
- get_blueprint_from_type,
- get_crowd_provider_from_type,
- get_architect_from_type,
+from mephisto.operations.operator import *
+import warnings
+
+warnings.warn(
+ "Imports from `mephisto.core` are going away soon. "
+ "Please replace all of your imports from mephisto.core.operator "
+ "to mephisto.operations.operator ",
+ PendingDeprecationWarning,
)
-from mephisto.core.utils import get_mock_requester
-
-from mephisto.core.logger_core import get_logger
-from omegaconf import DictConfig, OmegaConf
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-if TYPE_CHECKING:
- from mephisto.data_model.agent import Agent
- from mephisto.data_model.blueprint import Blueprint, TaskRunner
- from mephisto.data_model.crowd_provider import CrowdProvider
- from mephisto.data_model.architect import Architect
- from argparse import Namespace
-
-
-class TrackedRun(NamedTuple):
- task_run: TaskRun
- architect: "Architect"
- task_runner: "TaskRunner"
- task_launcher: TaskLauncher
- job: Job
-
-
-class Operator:
- """
- Acting as the controller behind the curtain, the Operator class
- is responsible for managing the knobs, switches, and dials
- of the rest of the Mephisto architecture.
-
- Most convenience scripts for using Mephisto will use an Operator
- to get the job done, though this class itself is also a
- good model to use to understand how the underlying
- architecture works in order to build custom jobs or workflows.
- """
-
- def __init__(self, db: "MephistoDB"):
- self.db = db
- self.supervisor = Supervisor(db)
- self._task_runs_tracked: Dict[str, TrackedRun] = {}
- self.is_shutdown = False
- self._run_tracker_thread = threading.Thread(
- target=self._track_and_kill_runs, name="Operator-tracking-thread"
- )
- self._run_tracker_thread.start()
-
- @staticmethod
- def _get_baseline_argparser() -> ArgumentParser:
- """Return a parser for the baseline requirements to launch a job"""
- parser = ArgumentParser()
- parser.add_argument(
- "--blueprint-type",
- dest="blueprint_type",
- help="Name of the blueprint to launch",
- required=True,
- )
- parser.add_argument(
- "--architect-type",
- dest="architect_type",
- help="Name of the architect to launch with",
- required=True,
- )
- parser.add_argument(
- "--requester-name",
- dest="requester_name",
- help="Identifier for the requester to launch as",
- required=True,
- )
- return parser
-
- def get_running_task_runs(self):
- """Return the currently running task runs and their handlers"""
- return self._task_runs_tracked.copy()
-
- def parse_and_launch_run(
- self,
- arg_list: Optional[List[str]] = None,
- extra_args: Optional[Dict[str, Any]] = None,
- ) -> Optional[str]:
- """
- Wrapper around parse and launch run that prints errors on failure, rather
- than throwing. Generally for use in scripts.
- """
- raise Exception(
- "Operator.parse_and_launch_run has been deprecated in favor "
- "of using Hydra for argument configuration. See the docs at "
- "https://github.com/facebookresearch/Mephisto/blob/master/docs/hydra_migration.md "
- "in order to upgrade."
- )
-
- def validate_and_run_config_or_die(
- self, run_config: DictConfig, shared_state: Optional[SharedTaskState] = None
- ) -> str:
- """
- Parse the given arguments and launch a job.
- """
- if shared_state is None:
- shared_state = SharedTaskState()
-
- # First try to find the requester:
- requester_name = run_config.provider.requester_name
- requesters = self.db.find_requesters(requester_name=requester_name)
- if len(requesters) == 0:
- if run_config.provider.requester_name == "MOCK_REQUESTER":
- requesters = [get_mock_requester(self.db)]
- else:
- raise EntryDoesNotExistException(
- f"No requester found with name {requester_name}"
- )
- requester = requesters[0]
- requester_id = requester.db_id
- provider_type = requester.provider_type
- assert provider_type == run_config.provider._provider_type, (
- f"Found requester for name {requester_name} is not "
- f"of the specified type {run_config.provider._provider_type}, "
- f"but is instead {provider_type}."
- )
-
- # Next get the abstraction classes, and run validation
- # before anything is actually created in the database
- blueprint_type = run_config.blueprint._blueprint_type
- architect_type = run_config.architect._architect_type
- BlueprintClass = get_blueprint_from_type(blueprint_type)
- ArchitectClass = get_architect_from_type(architect_type)
- CrowdProviderClass = get_crowd_provider_from_type(provider_type)
-
- BlueprintClass.assert_task_args(run_config, shared_state)
- ArchitectClass.assert_task_args(run_config, shared_state)
- CrowdProviderClass.assert_task_args(run_config, shared_state)
-
- # Find an existing task or create a new one
- task_name = run_config.task.get("task_name", None)
- if task_name is None:
- task_name = blueprint_type
- logger.warning(
- f"Task is using the default blueprint name {task_name} as a name, "
- "as no task_name is provided"
- )
- tasks = self.db.find_tasks(task_name=task_name)
- task_id = None
- if len(tasks) == 0:
- task_id = self.db.new_task(task_name, blueprint_type)
- else:
- task_id = tasks[0].db_id
-
- logger.info(f"Creating a task run under task name: {task_name}")
-
- # Create a new task run
- new_run_id = self.db.new_task_run(
- task_id,
- requester_id,
- json.dumps(OmegaConf.to_container(run_config, resolve=True)),
- provider_type,
- blueprint_type,
- requester.is_sandbox(),
- )
- task_run = TaskRun(self.db, new_run_id)
-
- try:
- # Register the blueprint with args to the task run,
- # ensure cached
- blueprint = BlueprintClass(task_run, run_config, shared_state)
- task_run.get_blueprint(args=run_config, shared_state=shared_state)
-
- # If anything fails after here, we have to cleanup the architect
- build_dir = os.path.join(task_run.get_run_dir(), "build")
- os.makedirs(build_dir, exist_ok=True)
- architect = ArchitectClass(
- self.db, run_config, shared_state, task_run, build_dir
- )
-
- # Setup and deploy the server
- built_dir = architect.prepare()
- task_url = architect.deploy()
-
- # TODO(#102) maybe the cleanup (destruction of the server configuration?) should only
- # happen after everything has already been reviewed, this way it's possible to
- # retrieve the exact build directory to review a task for real
- architect.cleanup()
-
- # Create the backend runner
- task_runner = BlueprintClass.TaskRunnerClass(
- task_run, run_config, shared_state
- )
-
- # Small hack for auto appending block qualification
- existing_qualifications = shared_state.qualifications
- if run_config.blueprint.get("block_qualification", None) is not None:
- existing_qualifications.append(
- make_qualification_dict(
- run_config.blueprint.block_qualification, QUAL_NOT_EXIST, None
- )
- )
- if run_config.blueprint.get("onboarding_qualification", None) is not None:
- existing_qualifications.append(
- make_qualification_dict(
- OnboardingRequired.get_failed_qual(
- run_config.blueprint.onboarding_qualification
- ),
- QUAL_NOT_EXIST,
- None,
- )
- )
- shared_state.qualifications = existing_qualifications
-
- # Register the task with the provider
- provider = CrowdProviderClass(self.db)
- provider.setup_resources_for_task_run(
- task_run, run_config, shared_state, task_url
- )
-
- initialization_data_array = blueprint.get_initialization_data()
-
- # Link the job together
- job = self.supervisor.register_job(
- architect, task_runner, provider, existing_qualifications
- )
- if self.supervisor.sending_thread is None:
- self.supervisor.launch_sending_thread()
- except (KeyboardInterrupt, Exception) as e:
- logger.error(
- "Encountered error while launching run, shutting down", exc_info=True
- )
- try:
- architect.shutdown()
- except (KeyboardInterrupt, Exception) as architect_exception:
- logger.exception(
- f"Could not shut down architect: {architect_exception}",
- exc_info=True,
- )
- raise e
-
- launcher = TaskLauncher(self.db, task_run, initialization_data_array)
- launcher.create_assignments()
- launcher.launch_units(task_url)
-
- self._task_runs_tracked[task_run.db_id] = TrackedRun(
- task_run=task_run,
- task_launcher=launcher,
- task_runner=task_runner,
- architect=architect,
- job=job,
- )
- task_run.update_completion_progress(status=False)
-
- return task_run.db_id
-
- def _track_and_kill_runs(self):
- """
- Background thread that shuts down servers when a task
- is fully done.
- """
- while not self.is_shutdown:
- runs_to_check = list(self._task_runs_tracked.values())
- for tracked_run in runs_to_check:
- task_run = tracked_run.task_run
- task_run.update_completion_progress(
- task_launcher=tracked_run.task_launcher
- )
- if not task_run.get_is_completed():
- continue
- else:
- self.supervisor.shutdown_job(tracked_run.job)
- tracked_run.architect.shutdown()
- tracked_run.task_launcher.shutdown()
- del self._task_runs_tracked[task_run.db_id]
- time.sleep(2)
-
- def shutdown(self, skip_input=True):
- logger.info("operator shutting down")
- self.is_shutdown = True
- for tracked_run in self._task_runs_tracked.values():
- logger.info("expiring units")
- tracked_run.task_launcher.shutdown()
- tracked_run.task_launcher.expire_units()
- try:
- remaining_runs = self._task_runs_tracked.values()
- while len(remaining_runs) > 0:
- next_runs = []
- for tracked_run in remaining_runs:
- if tracked_run.task_run.get_is_completed():
- tracked_run.architect.shutdown()
- else:
- next_runs.append(tracked_run)
- if len(next_runs) > 0:
- logger.info(
- f"Waiting on {len(remaining_runs)} task runs, Ctrl-C ONCE to FORCE QUIT"
- )
- time.sleep(30)
- remaining_runs = next_runs
- except Exception as e:
- logger.exception(
- f"Encountered problem during shutting down {e}", exc_info=True
- )
- import traceback
-
- traceback.print_exc()
- except (KeyboardInterrupt, SystemExit) as e:
- logger.info(
- "Skipping waiting for outstanding task completions, shutting down servers now!"
- )
- for tracked_run in remaining_runs:
- tracked_run.architect.shutdown()
- finally:
- self.supervisor.shutdown()
- self._run_tracker_thread.join()
-
- def validate_and_run_config(
- self, run_config: DictConfig, shared_state: Optional[SharedTaskState] = None
- ) -> Optional[str]:
- """
- Wrapper around validate_and_run_config_or_die that prints errors on
- failure, rather than throwing. Generally for use in scripts.
- """
- try:
- return self.validate_and_run_config_or_die(
- run_config=run_config, shared_state=shared_state
- )
- except (KeyboardInterrupt, Exception) as e:
- logger.error("Ran into error while launching run: ", exc_info=True)
- return None
-
- def parse_and_launch_run_wrapper(
- self,
- arg_list: Optional[List[str]] = None,
- extra_args: Optional[Dict[str, Any]] = None,
- ) -> Optional[str]:
- """
- Wrapper around parse and launch run that prints errors on failure, rather
- than throwing. Generally for use in scripts.
- """
- raise Exception(
- "Operator.parse_and_launch_run_wrapper has been deprecated in favor "
- "of using Hydra for argument configuration. See the docs at "
- "https://github.com/facebookresearch/Mephisto/blob/master/docs/hydra_migration.md "
- "in order to upgrade."
- )
-
- def print_run_details(self):
- """Print details about running tasks"""
- # TODO(#93) parse these tasks and get the full details
- for task in self.get_running_task_runs():
- logger.info(f"Operator running task ID = {task}")
-
- def wait_for_runs_then_shutdown(
- self, skip_input=False, log_rate: Optional[int] = None
- ) -> None:
- """
- Wait for task_runs to complete, and then shutdown.
-
- Set log_rate to get print statements of currently running tasks
- at the specified interval
- """
- try:
- try:
- last_log = 0.0
- while len(self.get_running_task_runs()) > 0:
- if log_rate is not None:
- if time.time() - last_log > log_rate:
- last_log = time.time()
- self.print_run_details()
- time.sleep(10)
-
- except Exception as e:
- if skip_input:
- raise e
-
- traceback.print_exc()
- should_quit = input(
- "The above exception happened while running a task, do "
- "you want to shut down? (y)/n: "
- )
- if should_quit not in ["n", "N", "no", "No"]:
- raise e
-
- except Exception as e:
- import traceback
-
- traceback.print_exc()
- except (KeyboardInterrupt, SystemExit) as e:
- logger.exception(
- "Cleaning up after keyboard interrupt, please wait!", exc_info=True
- )
- finally:
- self.shutdown()
diff --git a/mephisto/core/registry.py b/mephisto/core/registry.py
index d5e605916..a12d20ccd 100644
--- a/mephisto/core/registry.py
+++ b/mephisto/core/registry.py
@@ -4,164 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from typing import Union, Type, Dict, Any, List, TYPE_CHECKING
-from mephisto.core.utils import get_root_dir, get_provider_dir
-from mephisto.core.hydra_config import register_abstraction_config
-import importlib
-import os
-
-if TYPE_CHECKING:
- from mephisto.data_model.blueprint import Blueprint
- from mephisto.data_model.crowd_provider import CrowdProvider
- from mephisto.data_model.architect import Architect
-
-
-BLUEPRINTS: Dict[str, Type["Blueprint"]] = {}
-ARCHITECTS: Dict[str, Type["Architect"]] = {}
-PROVIDERS: Dict[str, Type["CrowdProvider"]] = {}
-
-
-def register_mephisto_abstraction():
- """
- Decorator method for classes that extend a mephisto abstraction, used
- to pull Mephisto abstractions out of anywhere that defines them.
- """
-
- def register_cls(
- base_class: Union[Type["Blueprint"], Type["Architect"], Type["CrowdProvider"]]
- ):
- from mephisto.data_model.blueprint import Blueprint
- from mephisto.data_model.crowd_provider import CrowdProvider
- from mephisto.data_model.architect import Architect
-
- if issubclass(base_class, Blueprint):
- name = base_class.BLUEPRINT_TYPE
- BLUEPRINTS[name] = base_class
- type_key = "blueprint"
- elif issubclass(base_class, Architect):
- name = base_class.ARCHITECT_TYPE
- ARCHITECTS[name] = base_class
- type_key = "architect"
- elif issubclass(base_class, CrowdProvider):
- name = base_class.PROVIDER_TYPE
- PROVIDERS[name] = base_class
- type_key = "provider"
- else:
- raise AssertionError(
- f"Provided class {base_class} not a child of one of the mephisto "
- "abstractions, expected one of Blueprint, Architect, or CrowdProvider."
- )
- register_abstraction_config(
- name=name, node=base_class.ArgsClass, abstraction_type=type_key
- )
- return base_class
-
- return register_cls
-
-
-def uses_mephisto(module: Any):
- """
- Register a module as having defined classes for special Mephisto abstractions.
- Should be put in the __init__.py of the base module.
- """
- # TODO register the module and file path to the local mephisto registry file
- pass
-
-
-def fill_registries():
- """
- Ensure that all of the required modules are picked up by the mephisto server
- """
- # TODO pick up on local file changes such that Mephisto won't need to be
- # restarted to add new abstractions
-
- # TODO pass through all of the use_mephisto modules in the local registry file
- # to ensure that all of the modules are added
-
- # TODO(WISH) these can be made recursive finds to pass through subfolders
- # Import Mephisto CrowdProviders
- provider_root = get_provider_dir()
- for dir_name in os.listdir(provider_root):
- provider_dir = os.path.join(provider_root, dir_name)
- if not os.path.isdir(provider_dir):
- continue
- for filename in os.listdir(provider_dir):
- if filename.endswith("provider.py"):
- provider_name = filename[: filename.find(".py")]
- importlib.import_module(
- f"mephisto.providers.{dir_name}.{provider_name}"
- )
-
- # Import Mephisto Architects
- architect_root = os.path.join(get_root_dir(), "mephisto", "server", "architects")
- for filename in os.listdir(architect_root):
- if filename.endswith("architect.py"):
- architect_name = filename[: filename.find(".py")]
- importlib.import_module(f"mephisto.server.architects.{architect_name}")
-
- # Import Mephisto Blueprints
- blueprint_root = os.path.join(get_root_dir(), "mephisto", "server", "blueprints")
- for dir_name in os.listdir(blueprint_root):
- blueprint_dir = os.path.join(blueprint_root, dir_name)
- if not os.path.isdir(blueprint_dir):
- continue
- for filename in os.listdir(blueprint_dir):
- if filename.endswith("blueprint.py"):
- blueprint_name = filename[: filename.find(".py")]
- importlib.import_module(
- f"mephisto.server.blueprints.{dir_name}.{blueprint_name}"
- )
-
-
-def get_crowd_provider_from_type(provider_type: str) -> Type["CrowdProvider"]:
- """Return the crowd provider class for the given string"""
- if provider_type in PROVIDERS:
- return PROVIDERS[provider_type]
- else:
- raise NotImplementedError(
- f"Missing provider type {provider_type}, is it registered?"
- )
-
-
-def get_blueprint_from_type(task_type: str) -> Type["Blueprint"]:
- """Return the blueprint class for the given string"""
- if task_type in BLUEPRINTS:
- return BLUEPRINTS[task_type]
- else:
- raise NotImplementedError(
- f"Missing blueprint type {task_type}, is it registered?"
- )
-
-
-def get_architect_from_type(architect_type: str) -> Type["Architect"]:
- """Return the architect class for the given string"""
- if architect_type in ARCHITECTS:
- return ARCHITECTS[architect_type]
- else:
- raise NotImplementedError(
- f"Missing architect type {architect_type}, is it registered?"
- )
-
-
-def get_valid_provider_types() -> List[str]:
- """
- Return the valid provider types that are currently supported by
- the mephisto framework
- """
- return list(PROVIDERS.keys())
-
-
-def get_valid_blueprint_types() -> List[str]:
- """
- Return the valid provider types that are currently supported by
- the mephisto framework
- """
- return list(BLUEPRINTS.keys())
-
-
-def get_valid_architect_types() -> List[str]:
- """
- Return the valid provider types that are currently supported by
- the mephisto framework
- """
- return list(ARCHITECTS.keys())
+from mephisto.operations.registry import *
+import warnings
+
+warnings.warn(
+ "Imports from `mephisto.core` are going away soon. "
+ "Please replace all of your imports from mephisto.core.registry "
+ "to mephisto.operations.registry ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/core/supervisor.py b/mephisto/core/supervisor.py
index 0e05efd17..1048e258b 100644
--- a/mephisto/core/supervisor.py
+++ b/mephisto/core/supervisor.py
@@ -4,852 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-
-import threading
-from queue import PriorityQueue, Empty
-import time
-from mephisto.data_model.packet import (
- Packet,
- PACKET_TYPE_ALIVE,
- PACKET_TYPE_AGENT_ACTION,
- PACKET_TYPE_NEW_AGENT,
- PACKET_TYPE_NEW_WORKER,
- PACKET_TYPE_REQUEST_AGENT_STATUS,
- PACKET_TYPE_RETURN_AGENT_STATUS,
- PACKET_TYPE_INIT_DATA,
- PACKET_TYPE_GET_INIT_DATA,
- PACKET_TYPE_PROVIDER_DETAILS,
- PACKET_TYPE_SUBMIT_ONBOARDING,
- PACKET_TYPE_REQUEST_ACTION,
- PACKET_TYPE_UPDATE_AGENT_STATUS,
+from mephisto.operations.supervisor import *
+import warnings
+
+warnings.warn(
+ "Imports from `mephisto.core` are going away soon. "
+ "Please replace all of your imports from mephisto.core.supervisor "
+ "to mephisto.operations.supervisor ",
+ PendingDeprecationWarning,
)
-from mephisto.data_model.worker import Worker
-from mephisto.data_model.qualification import worker_is_qualified
-from mephisto.data_model.agent import Agent, OnboardingAgent
-from mephisto.data_model.blueprint import OnboardingRequired, AgentState
-from mephisto.core.registry import get_crowd_provider_from_type
-from mephisto.server.channels.channel import Channel, STATUS_CHECK_TIME
-
-from dataclasses import dataclass
-
-from typing import Dict, Set, Optional, List, Any, Union, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.assignment import Assignment, Unit
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.blueprint import TaskRunner
- from mephisto.data_model.crowd_provider import CrowdProvider
- from mephisto.data_model.architect import Architect
-
-from mephisto.core.logger_core import get_logger
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-# This class manages communications between the server
-# and workers, ensures that their status is properly tracked,
-# and also provides some helping utility functions for
-# groups of workers or worker/agent compatibility.
-
-# Mostly, the supervisor oversees the communications
-# between jobs and workers over the channels
-
-STATUS_TO_TEXT_MAP = {
- AgentState.STATUS_EXPIRED: "This task is no longer available to be completed. "
- "Please return it and try a different task",
- AgentState.STATUS_TIMEOUT: "You took to long to respond to this task, and have timed out. "
- "The task is no longer available, please return it.",
- AgentState.STATUS_DISCONNECT: "You have disconnected from our server during the duration of the task. "
- "If you have done substantial work, please reach out to see if we can recover it. ",
- AgentState.STATUS_PARTNER_DISCONNECT: "One of your partners has disconnected while working on this task. We won't penalize "
- "you for them leaving, so please submit this task as is.",
-}
-
-SYSTEM_CHANNEL_ID = "mephisto" # TODO pull from somewhere
-START_DEATH_TIME = 10
-
-# State storage
-@dataclass
-class Job:
- architect: "Architect"
- task_runner: "TaskRunner"
- provider: "CrowdProvider"
- qualifications: List[Dict[str, Any]]
- registered_channel_ids: List[str]
-
-
-@dataclass
-class ChannelInfo:
- channel_id: str
- job: "Job"
- channel: Channel
-
-
-@dataclass
-class AgentInfo:
- agent: Union["Agent", "OnboardingAgent"]
- used_channel_id: str
- assignment_thread: Optional[threading.Thread] = None
-
-
-class Supervisor:
- def __init__(self, db: "MephistoDB"):
- self.db = db
- # Tracked state
- self.agents: Dict[str, AgentInfo] = {}
- self.agents_by_registration_id: Dict[str, AgentInfo] = {}
- self.channels: Dict[str, ChannelInfo] = {}
- # Map from onboarding id to agent request packet
- self.onboarding_packets: Dict[str, Packet] = {}
-
- # Agent status handling
- self.last_status_check = time.time()
-
- # Message handling
- self.message_queue: List[Packet] = []
- self.sending_thread: Optional[threading.Thread] = None
-
- def _on_channel_open(self, channel_id: str):
- """Handler for what to do when a socket opens, we send an alive"""
- channel_info = self.channels[channel_id]
- self._send_alive(channel_info)
-
- def _on_catastrophic_disconnect(self, channel_id):
- # TODO(#102) Catastrophic disconnect needs to trigger cleanup
- logger.error(f"Channel {channel_id} called on_catastrophic_disconnect")
-
- def _on_channel_message(self, channel_id: str, packet: Packet):
- """Incoming message handler defers to the internal handler"""
- try:
- channel_info = self.channels[channel_id]
- self._on_message(packet, channel_info)
- except Exception as e:
- # TODO(#93) better error handling about failed messages
- logger.exception(
- f"Channel {channel_id} encountered error on packet {packet}",
- exc_info=True,
- )
- raise
-
- def register_job(
- self,
- architect: "Architect",
- task_runner: "TaskRunner",
- provider: "CrowdProvider",
- qualifications: Optional[List[Dict[str, Any]]] = None,
- ):
- if qualifications is None:
- qualifications = []
- task_run = task_runner.task_run
- channels = architect.get_channels(
- self._on_channel_open,
- self._on_catastrophic_disconnect,
- self._on_channel_message,
- )
- job = Job(
- architect=architect,
- task_runner=task_runner,
- provider=provider,
- qualifications=qualifications,
- registered_channel_ids=[],
- )
- for channel in channels:
- channel_id = self.register_channel(channel, job)
- job.registered_channel_ids.append(channel_id)
- return job
-
- def register_channel(self, channel: Channel, job: "Job") -> str:
- """Register the channel to the specific job"""
- channel_id = channel.channel_id
-
- channel_info = ChannelInfo(channel_id=channel_id, channel=channel, job=job)
- self.channels[channel_id] = channel_info
-
- channel.open()
- self._send_alive(channel_info)
- start_time = time.time()
- while not channel.is_alive():
- if time.time() - start_time > START_DEATH_TIME:
- # TODO(OWN) Ask channel why it might have failed to connect?
- self.channels[channel_id].channel.close()
- raise ConnectionRefusedError( # noqa F821 we only support py3
- "Was not able to establish a connection with the server, "
- "please try to run again. If that fails,"
- "please ensure that your local device has the correct SSL "
- "certs installed."
- )
- try:
- self._send_alive(channel_info)
- except Exception:
- pass
- time.sleep(0.3)
- return channel_id
-
- def close_channel(self, channel_id: str):
- """Close the given channel by id"""
- self.channels[channel_id].channel.close()
- del self.channels[channel_id]
-
- def shutdown_job(self, job: Job):
- """Close any channels related to a job"""
- job_channels = job.registered_channel_ids
- for channel_id in job_channels:
- self.close_channel(channel_id)
-
- def shutdown(self):
- """Close all of the channels, join threads"""
- channels_to_close = list(self.channels.keys())
- for channel_id in channels_to_close:
- self.close_channel(channel_id)
- if self.sending_thread is not None:
- self.sending_thread.join()
-
- def _send_alive(self, channel_info: ChannelInfo) -> bool:
- logger.info("Sending alive")
- return channel_info.channel.send(
- Packet(
- packet_type=PACKET_TYPE_ALIVE,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_info.channel_id,
- )
- )
-
- def _on_act(self, packet: Packet, channel_info: ChannelInfo):
- """Handle an action as sent from an agent"""
- agent = self.agents[packet.sender_id].agent
-
- # If the packet is_submit, and has files, we need to
- # process downloading those files first
- if packet.data.get("MEPHISTO_is_submit") is True:
- data_files = packet.data.get("files")
- if data_files is not None:
- save_dir = agent.get_data_dir()
- architect = channel_info.job.architect
- for f_obj in data_files:
- architect.download_file(f_obj["filename"], save_dir)
-
- # TODO(OWN) Packets stored as info from workers can also be
- # saved somewhere locally just in case the world dies, and
- # then cleaned up once the world completes successfully
- agent.pending_actions.append(packet)
- agent.has_action.set()
-
- def _on_submit_onboarding(self, packet: Packet, channel_info: ChannelInfo):
- """Handle the submission of onboarding data"""
- onboarding_id = packet.sender_id
- if onboarding_id not in self.agents:
- logger.warning(
- f"Onboarding agent {onboarding_id} already submitted or disconnected, "
- f"but is calling _on_submit_onboarding again"
- )
- return
- agent_info = self.agents[onboarding_id]
- agent = agent_info.agent
- # Update the request id for the original packet (which has the required
- # registration data) to be the new submission packet (so that we answer
- # back properly under the new request)
- self.onboarding_packets[onboarding_id].data["request_id"] = packet.data[
- "request_id"
- ]
- del packet.data["request_id"]
- assert isinstance(
- agent, OnboardingAgent
- ), "Only onboarding agents should submit onboarding"
- agent.pending_actions.append(packet)
- agent.has_action.set()
- self._register_agent_from_onboarding(agent_info)
- logger.info(f"Onboarding agent {onboarding_id} registered out from onboarding")
- del self.agents[onboarding_id]
- del self.onboarding_packets[onboarding_id]
-
- def _register_worker(self, packet: Packet, channel_info: ChannelInfo):
- """Process a worker registration packet to register a worker"""
- crowd_data = packet.data["provider_data"]
- crowd_provider = channel_info.job.provider
- worker_name = crowd_data["worker_name"]
- workers = self.db.find_workers(worker_name=worker_name)
- if len(workers) == 0:
- # TODO(WISH) get rid of sandbox designation
- workers = self.db.find_workers(worker_name=worker_name + "_sandbox")
- if len(workers) == 0:
- worker = crowd_provider.WorkerClass.new_from_provider_data(
- self.db, crowd_data
- )
- else:
- worker = workers[0]
-
- if not worker_is_qualified(worker, channel_info.job.qualifications):
- self.message_queue.append(
- Packet(
- packet_type=PACKET_TYPE_PROVIDER_DETAILS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_info.channel_id,
- data={"request_id": packet.data["request_id"], "worker_id": None},
- )
- )
- else:
- self.message_queue.append(
- Packet(
- packet_type=PACKET_TYPE_PROVIDER_DETAILS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_info.channel_id,
- data={
- "request_id": packet.data["request_id"],
- "worker_id": worker.db_id,
- },
- )
- )
-
- def _launch_and_run_onboarding(
- self, agent_info: "AgentInfo", task_runner: "TaskRunner"
- ):
- """Launch a thread to supervise the completion of onboarding for a task"""
- tracked_agent = agent_info.agent
- assert isinstance(tracked_agent, OnboardingAgent), (
- "Can launch onboarding for OnboardingAgents, not Agents"
- f", got {tracked_agent}"
- )
- try:
- task_runner.launch_onboarding(tracked_agent)
- except Exception as e:
- import traceback
-
- traceback.print_exc()
- task_runner.cleanup_onboarding(tracked_agent)
- finally:
- if tracked_agent.get_status() not in [
- AgentState.STATUS_WAITING,
- AgentState.STATUS_APPROVED,
- AgentState.STATUS_REJECTED,
- ]:
- onboarding_id = tracked_agent.get_agent_id()
- logger.info(
- f"Onboarding agent {onboarding_id} disconnected or errored, "
- f"final status {tracked_agent.get_status()}."
- )
- del self.agents[onboarding_id]
- del self.onboarding_packets[onboarding_id]
-
- def _launch_and_run_assignment(
- self,
- assignment: "Assignment",
- agent_infos: List["AgentInfo"],
- task_runner: "TaskRunner",
- ):
- """Launch a thread to supervise the completion of an assignment"""
- try:
- tracked_agents: List["Agent"] = []
- for a in agent_infos:
- assert isinstance(
- a.agent, Agent
- ), f"Can launch assignments for Agents, not OnboardingAgents, got {a.agent}"
- tracked_agents.append(a.agent)
- task_runner.launch_assignment(assignment, tracked_agents)
- for agent_info in agent_infos:
- self._mark_agent_done(agent_info)
- # Wait for agents to be complete
- for agent_info in agent_infos:
- agent = agent_info.agent
- if agent.get_status() not in AgentState.complete():
- if not agent.did_submit.is_set():
- # Wait for a submit to occur
- # TODO(#94) make submit timeout configurable
- agent.has_action.wait(timeout=300)
- agent.act()
- agent.mark_done()
- except Exception as e:
- logger.exception(f"Cleaning up assignment: {e}", exc_info=True)
- task_runner.cleanup_assignment(assignment)
- finally:
- task_run = task_runner.task_run
- for unit in assignment.get_units():
- task_run.clear_reservation(unit)
-
- def _launch_and_run_unit(
- self, unit: "Unit", agent_info: "AgentInfo", task_runner: "TaskRunner"
- ):
- """Launch a thread to supervise the completion of an assignment"""
- try:
- agent = agent_info.agent
- assert isinstance(
- agent, Agent
- ), f"Can launch units for Agents, not OnboardingAgents, got {agent}"
- task_runner.launch_unit(unit, agent)
- if agent.get_status() not in AgentState.complete():
- self._mark_agent_done(agent_info)
- if not agent.did_submit.is_set():
- # Wait for a submit to occur
- # TODO(#94) make submit timeout configurable
- agent.has_action.wait(timeout=300)
- agent.act()
- agent.mark_done()
- except Exception as e:
- logger.exception(f"Cleaning up unit: {e}", exc_info=True)
- task_runner.cleanup_unit(unit)
- finally:
- task_runner.task_run.clear_reservation(unit)
-
- def _assign_unit_to_agent(
- self, packet: Packet, channel_info: ChannelInfo, units: List["Unit"]
- ):
- """Handle creating an agent for the specific worker to register an agent"""
-
- crowd_data = packet.data["provider_data"]
- task_run = channel_info.job.task_runner.task_run
- crowd_provider = channel_info.job.provider
- worker_id = crowd_data["worker_id"]
- worker = Worker(self.db, worker_id)
-
- logger.debug(
- f"Worker {worker_id} is being assigned one of " f"{len(units)} units."
- )
-
- reserved_unit = None
- while len(units) > 0 and reserved_unit is None:
- unit = units.pop(0)
- reserved_unit = task_run.reserve_unit(unit)
- if reserved_unit is None:
- self.message_queue.append(
- Packet(
- packet_type=PACKET_TYPE_PROVIDER_DETAILS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_info.channel_id,
- data={"request_id": packet.data["request_id"], "agent_id": None},
- )
- )
- else:
- agent = crowd_provider.AgentClass.new_from_provider_data(
- self.db, worker, unit, crowd_data
- )
- logger.debug(f"Created agent {agent}, {agent.db_id}.")
- self.message_queue.append(
- Packet(
- packet_type=PACKET_TYPE_PROVIDER_DETAILS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_info.channel_id,
- data={
- "request_id": packet.data["request_id"],
- "agent_id": agent.get_agent_id(),
- },
- )
- )
- agent_info = AgentInfo(agent=agent, used_channel_id=channel_info.channel_id)
- self.agents[agent.get_agent_id()] = agent_info
- self.agents_by_registration_id[
- crowd_data["agent_registration_id"]
- ] = agent_info
-
- # Launch individual tasks
- if not channel_info.job.task_runner.is_concurrent:
- unit_thread = threading.Thread(
- target=self._launch_and_run_unit,
- args=(unit, agent_info, channel_info.job.task_runner),
- name=f"Unit-thread-{unit.db_id}",
- )
- agent_info.assignment_thread = unit_thread
- unit_thread.start()
- else:
- # See if the concurrent unit is ready to launch
- assignment = unit.get_assignment()
- agents = assignment.get_agents()
- if None in agents:
- agent.update_status(AgentState.STATUS_WAITING)
- return # need to wait for all agents to be here to launch
-
- # Launch the backend for this assignment
- agent_infos = [self.agents[a.db_id] for a in agents if a is not None]
-
- assign_thread = threading.Thread(
- target=self._launch_and_run_assignment,
- args=(assignment, agent_infos, channel_info.job.task_runner),
- name=f"Assignment-thread-{assignment.db_id}",
- )
-
- for agent_info in agent_infos:
- agent_info.agent.update_status(AgentState.STATUS_IN_TASK)
- agent_info.assignment_thread = assign_thread
-
- assign_thread.start()
-
- def _register_agent_from_onboarding(self, onboarding_agent_info: AgentInfo):
- """
- Convert the onboarding agent to a full agent
- """
- onboarding_agent = onboarding_agent_info.agent
- current_status = onboarding_agent.get_status()
- channel_id = onboarding_agent_info.used_channel_id
- channel_info = self.channels[channel_id]
- task_runner = channel_info.job.task_runner
- task_run = task_runner.task_run
- blueprint = task_run.get_blueprint(args=task_runner.args)
- worker = onboarding_agent.get_worker()
-
- assert (
- isinstance(blueprint, OnboardingRequired) and blueprint.use_onboarding
- ), "Should only be registering from onboarding if onboarding is required and set"
- worker_passed = blueprint.validate_onboarding(worker, onboarding_agent)
- worker.grant_qualification(
- blueprint.onboarding_qualification_name, int(worker_passed), skip_crowd=True
- )
- if not worker_passed:
- worker.grant_qualification(
- blueprint.onboarding_failed_name, int(worker_passed)
- )
- onboarding_agent.update_status(AgentState.STATUS_REJECTED)
- else:
- onboarding_agent.update_status(AgentState.STATUS_APPROVED)
-
- # get the list of tentatively valid units
- units = task_run.get_valid_units_for_worker(worker)
- usable_units = channel_info.job.task_runner.filter_units_for_worker(
- units, worker
- )
-
- if not worker_passed:
- # TODO(WISH) it may be worth investigating launching a dummy task for these
- # instances where a worker has failed onboarding, but the onboarding
- # task still allowed submission of the failed data (no front-end validation)
- # units = [self.dummy_launcher.launch_dummy()]
- # self._assign_unit_to_agent(packet, channel_info, units)
- usable_units = []
-
- packet = self.onboarding_packets[onboarding_agent.get_agent_id()]
- self._try_send_agent_messages(onboarding_agent_info)
- self._send_status_update(onboarding_agent_info)
- self._assign_unit_to_agent(packet, channel_info, usable_units)
-
- def _register_agent(self, packet: Packet, channel_info: ChannelInfo):
- """Process an agent registration packet to register an agent"""
- # First see if this is a reconnection
- crowd_data = packet.data["provider_data"]
- agent_registration_id = crowd_data["agent_registration_id"]
- logger.debug(f"Incoming request to register agent {agent_registration_id}.")
- if agent_registration_id in self.agents_by_registration_id:
- agent = self.agents_by_registration_id[agent_registration_id].agent
- # Update the source channel, in case it has changed
- self.agents[agent.get_agent_id()].used_channel_id = channel_info.channel_id
- self.message_queue.append(
- Packet(
- packet_type=PACKET_TYPE_PROVIDER_DETAILS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_info.channel_id,
- data={
- "request_id": packet.data["request_id"],
- "agent_id": agent.get_agent_id(),
- },
- )
- )
- logger.debug(
- f"Found existing agent_registration_id {agent_registration_id}, "
- f"reconnecting to agent {agent.get_agent_id()}."
- )
- return
-
- # Process a new agent
- task_runner = channel_info.job.task_runner
- task_run = task_runner.task_run
- worker_id = crowd_data["worker_id"]
- worker = Worker(self.db, worker_id)
-
- # get the list of tentatively valid units
- units = task_run.get_valid_units_for_worker(worker)
- if len(units) == 0:
- self.message_queue.append(
- Packet(
- packet_type=PACKET_TYPE_PROVIDER_DETAILS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_info.channel_id,
- data={"request_id": packet.data["request_id"], "agent_id": None},
- )
- )
- logger.debug(
- f"Found existing agent_registration_id {agent_registration_id}, "
- f"had no valid units."
- )
- return
-
- # If there's onboarding, see if this worker has already been disqualified
- worker_id = crowd_data["worker_id"]
- worker = Worker(self.db, worker_id)
- blueprint = task_run.get_blueprint(args=task_runner.args)
- if isinstance(blueprint, OnboardingRequired) and blueprint.use_onboarding:
- if worker.is_disqualified(blueprint.onboarding_qualification_name):
- self.message_queue.append(
- Packet(
- packet_type=PACKET_TYPE_PROVIDER_DETAILS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_info.channel_id,
- data={
- "request_id": packet.data["request_id"],
- "agent_id": None,
- },
- )
- )
- logger.debug(
- f"Worker {worker_id} is already disqualified by onboarding "
- f"qual {blueprint.onboarding_qualification_name}."
- )
- return
- elif not worker.is_qualified(blueprint.onboarding_qualification_name):
- # Send a packet with onboarding information
- onboard_data = blueprint.get_onboarding_data(worker.db_id)
- onboard_agent = OnboardingAgent.new(self.db, worker, task_run)
- onboard_agent.state.set_init_state(onboard_data)
- agent_info = AgentInfo(
- agent=onboard_agent, used_channel_id=channel_info.channel_id
- )
- onboard_id = onboard_agent.get_agent_id()
- # register onboarding agent
- self.agents[onboard_id] = agent_info
- self.onboarding_packets[onboard_id] = packet
- self.message_queue.append(
- Packet(
- packet_type=PACKET_TYPE_PROVIDER_DETAILS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_info.channel_id,
- data={
- "request_id": packet.data["request_id"],
- "agent_id": onboard_id,
- "onboard_data": onboard_data,
- },
- )
- )
-
- logger.debug(
- f"Worker {worker_id} is starting onboarding thread with "
- f"onboarding agent id {onboard_id}."
- )
-
- # Create an onboarding thread
- onboard_thread = threading.Thread(
- target=self._launch_and_run_onboarding,
- args=(agent_info, channel_info.job.task_runner),
- name=f"Onboard-thread-{onboard_id}",
- )
-
- onboard_agent.update_status(AgentState.STATUS_ONBOARDING)
- agent_info.assignment_thread = onboard_thread
- onboard_thread.start()
- return
-
- # Not onboarding, so just register directly
- self._assign_unit_to_agent(packet, channel_info, units)
-
- def _get_init_data(self, packet, channel_info: ChannelInfo):
- """Get the initialization data for the assigned agent's task"""
- task_runner = channel_info.job.task_runner
- agent_id = packet.data["provider_data"]["agent_id"]
- agent_info = self.agents[agent_id]
- assert isinstance(
- agent_info.agent, Agent
- ), f"Can only get init unit data for Agents, not OnboardingAgents, got {agent_info}"
- unit_data = task_runner.get_init_data_for_agent(agent_info.agent)
-
- agent_data_packet = Packet(
- packet_type=PACKET_TYPE_INIT_DATA,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_info.channel_id,
- data={"request_id": packet.data["request_id"], "init_data": unit_data},
- )
-
- self.message_queue.append(agent_data_packet)
-
- if isinstance(unit_data, dict) and unit_data.get("raw_messages") is not None:
- # TODO bring these into constants somehow
- for message in unit_data["raw_messages"]:
- packet = Packet.from_dict(message)
- packet.receiver_id = agent_id
- agent_info.agent.pending_observations.append(packet)
-
- def _on_message(self, packet: Packet, channel_info: ChannelInfo):
- """Handle incoming messages from the channel"""
- # TODO(#102) this method currently assumes that the packet's sender_id will
- # always be a valid agent in our list of agent_infos. At the moment this
- # is a valid assumption, but will not be on recovery from catastrophic failure.
- if packet.type == PACKET_TYPE_AGENT_ACTION:
- self._on_act(packet, channel_info)
- elif packet.type == PACKET_TYPE_NEW_AGENT:
- self._register_agent(packet, channel_info)
- elif packet.type == PACKET_TYPE_SUBMIT_ONBOARDING:
- self._on_submit_onboarding(packet, channel_info)
- elif packet.type == PACKET_TYPE_NEW_WORKER:
- self._register_worker(packet, channel_info)
- elif packet.type == PACKET_TYPE_GET_INIT_DATA:
- self._get_init_data(packet, channel_info)
- elif packet.type == PACKET_TYPE_RETURN_AGENT_STATUS:
- # Record this status response
- self._handle_updated_agent_status(packet.data)
- else:
- # PACKET_TYPE_REQUEST_AGENT_STATUS, PACKET_TYPE_ALIVE,
- # PACKET_TYPE_INIT_DATA
- raise Exception(f"Unexpected packet type {packet.type}")
-
- # TODO(#103) maybe batching these is better?
- def _try_send_agent_messages(self, agent_info: AgentInfo):
- """Handle sending any possible messages for a specific agent"""
- channel_info = self.channels[agent_info.used_channel_id]
- agent = agent_info.agent
- while len(agent.pending_observations) > 0:
- curr_obs = agent.pending_observations.pop(0)
- did_send = channel_info.channel.send(curr_obs)
- if not did_send:
- logger.error(f"Failed to send packet {curr_obs} to {channel_info}")
- agent.pending_observations.insert(0, curr_obs)
- return # something up with the channel, try later
-
- def _send_message_queue(self) -> None:
- """Send all of the messages in the system queue"""
- while len(self.message_queue) > 0:
- curr_obs = self.message_queue.pop(0)
- channel = self.channels[curr_obs.receiver_id].channel
- did_send = channel.send(curr_obs)
- if not did_send:
- logger.error(
- f"Failed to send packet {curr_obs} to server {curr_obs.receiver_id}"
- )
- self.message_queue.insert(0, curr_obs)
- return # something up with the channel, try later
-
- def _send_status_update(self, agent_info: AgentInfo) -> None:
- """
- Handle telling the frontend agent about a change in their
- active status. (Pushing a change in AgentState)
- """
- send_packet = Packet(
- packet_type=PACKET_TYPE_UPDATE_AGENT_STATUS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=agent_info.agent.get_agent_id(),
- data={
- "status": agent_info.agent.db_status,
- "state": {
- "done_text": STATUS_TO_TEXT_MAP.get(agent_info.agent.db_status),
- "task_done": agent_info.agent.db_status
- == AgentState.STATUS_PARTNER_DISCONNECT,
- },
- },
- )
- channel_info = self.channels[agent_info.used_channel_id]
- channel_info.channel.send(send_packet)
-
- def _mark_agent_done(self, agent_info: AgentInfo) -> None:
- """
- Handle marking an agent as done, and telling the frontend agent
- that they have successfully completed their task.
-
- If the agent is in a final non-successful status, or already
- told of partner disconnect, skip
- """
- if agent_info.agent.db_status in AgentState.complete() + [
- AgentState.STATUS_PARTNER_DISCONNECT
- ]:
- return
- send_packet = Packet(
- packet_type=PACKET_TYPE_UPDATE_AGENT_STATUS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=agent_info.agent.get_agent_id(),
- data={
- "status": "completed",
- "state": {
- "done_text": "You have completed this task. Please submit.",
- "task_done": True,
- },
- },
- )
- channel_info = self.channels[agent_info.used_channel_id]
- channel_info.channel.send(send_packet)
-
- def _handle_updated_agent_status(self, status_map: Dict[str, str]):
- """
- Handle updating the local statuses for agents based on
- the previously reported agent statuses.
-
- Takes as input a mapping from agent_id to server-side status
- """
- for agent_id, status in status_map.items():
- if status not in AgentState.valid():
- logger.warning(f"Invalid status for agent {agent_id}: {status}")
- continue
- if agent_id not in self.agents:
- # no longer tracking agent
- continue
- agent = self.agents[agent_id].agent
- db_status = agent.get_status()
- if agent.has_updated_status.is_set():
- continue # Incoming info may be stale if we have new info to send
- if status == AgentState.STATUS_NONE:
- # Stale or reconnect, send a status update
- self._send_status_update(self.agents[agent_id])
- continue
- if status != db_status:
- if db_status in AgentState.complete():
- logger.info(
- f"Got updated status {status} when already final: {agent.db_status}"
- )
- continue
- elif status == AgentState.STATUS_COMPLETED:
- continue # COMPLETED can only be marked locally
- agent.update_status(status)
- pass
-
- def _request_action(self, agent_info: AgentInfo) -> None:
- """
- Request an act from the agent targetted here. If the
- agent is found by the server, this request will be
- forwarded.
- """
- send_packet = Packet(
- packet_type=PACKET_TYPE_REQUEST_ACTION,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=agent_info.agent.get_agent_id(),
- data={},
- )
- channel_info = self.channels[agent_info.used_channel_id]
- channel_info.channel.send(send_packet)
-
- def _request_status_update(self) -> None:
- """
- Check last round of statuses, then request
- an update from the server on all agent's current status
- """
- if time.time() - self.last_status_check < STATUS_CHECK_TIME:
- return
-
- self.last_status_check = time.time()
-
- for channel_id, channel_info in self.channels.items():
- send_packet = Packet(
- packet_type=PACKET_TYPE_REQUEST_AGENT_STATUS,
- sender_id=SYSTEM_CHANNEL_ID,
- receiver_id=channel_id,
- data={},
- )
- channel_info.channel.send(send_packet)
-
- def _channel_handling_thread(self) -> None:
- """Thread for handling outgoing messages through the channels"""
- while len(self.channels) > 0:
- current_agents = list(self.agents.values())
- for agent_info in current_agents:
- # Send requests for action
- if agent_info.agent.wants_action.is_set():
- self._request_action(agent_info)
- agent_info.agent.wants_action.clear()
- # Pass updated statuses
- if agent_info.agent.has_updated_status.is_set():
- self._send_status_update(agent_info)
- agent_info.agent.has_updated_status.clear()
- # clear the message queue for this agent
- self._try_send_agent_messages(agent_info)
- # Send all messages from the system
- self._send_message_queue()
- self._request_status_update()
- # TODO(#103) is there a way we can trigger this when
- # agents observe instead?
- time.sleep(0.1)
-
- def launch_sending_thread(self) -> None:
- """Launch the sending thread for this supervisor"""
- self.sending_thread = threading.Thread(
- target=self._channel_handling_thread, name=f"channel-sending-thread"
- )
- self.sending_thread.start()
diff --git a/mephisto/core/task_launcher.py b/mephisto/core/task_launcher.py
index 8b8ccba37..35d114d56 100644
--- a/mephisto/core/task_launcher.py
+++ b/mephisto/core/task_launcher.py
@@ -4,213 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-
-# TODO(#99) do we standardize some kinds of data loader formats? perhaps
-# one that loads from files, and then an arbitrary kind? Simple
-# interface could be like an iterator. This class will launch tasks
-# as if the loader is an iterator.
-
-from mephisto.data_model.assignment import (
- Assignment,
- Unit,
- InitializationData,
- AssignmentState,
+from mephisto.operations.task_launcher import *
+import warnings
+
+warnings.warn(
+ "Imports from `mephisto.core` are going away soon. "
+ "Please replace all of your imports from mephisto.core.task_launcher "
+ "to mephisto.operations.task_launcher ",
+ PendingDeprecationWarning,
)
-
-from typing import Dict, Optional, List, Any, TYPE_CHECKING, Iterator
-from tqdm import tqdm
-import os
-import time
-import enum
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.database import MephistoDB
-
-import threading
-from mephisto.core.logger_core import get_logger
-import types
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-UNIT_GENERATOR_WAIT_SECONDS = 10
-ASSIGNMENT_GENERATOR_WAIT_SECONDS = 0.5
-
-
-class GeneratorType(enum.Enum):
- NONE = 0
- UNIT = 1
- ASSIGNMENT = 2
-
-
-class TaskLauncher:
- """
- This class is responsible for managing the process of registering
- and launching units, including the steps for pre-processing
- data and storing them locally for assignments when appropriate.
- """
-
- def __init__(
- self,
- db: "MephistoDB",
- task_run: "TaskRun",
- assignment_data_iterator: Iterator[InitializationData],
- max_num_concurrent_units: int = 0,
- ):
- """Prepare the task launcher to get it ready to launch the assignments"""
- self.db = db
- self.task_run = task_run
- self.assignment_data_iterable = assignment_data_iterator
- self.assignments: List[Assignment] = []
- self.units: List[Unit] = []
- self.provider_type = task_run.get_provider().PROVIDER_TYPE
- self.max_num_concurrent_units = max_num_concurrent_units
- self.launched_units: Dict[str, Unit] = {}
- self.unlaunched_units: Dict[str, Unit] = {}
- self.keep_launching_units: bool = False
- self.finished_generators: bool = False
- self.assignment_thread_done: bool = True
-
- self.unlaunched_units_access_condition = threading.Condition()
- if isinstance(self.assignment_data_iterable, types.GeneratorType):
- self.generator_type = GeneratorType.ASSIGNMENT
- self.assignment_thread_done = False
- else:
- self.generator_type = GeneratorType.NONE
- run_dir = task_run.get_run_dir()
- os.makedirs(run_dir, exist_ok=True)
-
- logger.debug(f"type of assignment data: {type(self.assignment_data_iterable)}")
- self.units_thread = None
- self.assignments_thread = None
-
- def _create_single_assignment(self, assignment_data) -> None:
- """ Create a single assignment in the database using its read assignment_data """
- task_run = self.task_run
- task_config = task_run.get_task_config()
- assignment_id = self.db.new_assignment(
- task_run.task_id,
- task_run.db_id,
- task_run.requester_id,
- task_run.task_type,
- task_run.provider_type,
- task_run.sandbox,
- )
- assignment = Assignment(self.db, assignment_id)
- assignment.write_assignment_data(assignment_data)
- self.assignments.append(assignment)
- unit_count = len(assignment_data.unit_data)
- for unit_idx in range(unit_count):
- unit_id = self.db.new_unit(
- task_run.task_id,
- task_run.db_id,
- task_run.requester_id,
- assignment_id,
- unit_idx,
- task_config.task_reward,
- task_run.provider_type,
- task_run.task_type,
- task_run.sandbox,
- )
- self.units.append(Unit(self.db, unit_id))
- with self.unlaunched_units_access_condition:
- self.unlaunched_units[unit_id] = Unit(self.db, unit_id)
-
- def _try_generating_assignments(self) -> None:
- """ Try to generate more assignments from the assignments_data_iterator"""
- while not self.finished_generators:
- try:
- data = next(self.assignment_data_iterable)
- self._create_single_assignment(data)
- except StopIteration:
- self.finished_generators = True
- self.assignment_thread_done = True
- time.sleep(ASSIGNMENT_GENERATOR_WAIT_SECONDS)
-
- def create_assignments(self) -> None:
- """ Create an assignment and associated units for the generated assignment data """
- self.keep_launching_units = True
- if self.generator_type == GeneratorType.NONE:
- for data in self.assignment_data_iterable:
- self._create_single_assignment(data)
- else:
- self.assignments_thread = threading.Thread(
- target=self._try_generating_assignments, args=()
- )
- self.assignments_thread.start()
-
- def generate_units(self):
- """units generator which checks that only 'max_num_concurrent_units' running at the same time,
- i.e. in the LAUNCHED or ASSIGNED states"""
- while self.keep_launching_units:
- units_id_to_remove = []
- for db_id, unit in self.launched_units.items():
- status = unit.get_status()
- if (
- status != AssignmentState.LAUNCHED
- and status != AssignmentState.ASSIGNED
- ):
- units_id_to_remove.append(db_id)
- for db_id in units_id_to_remove:
- self.launched_units.pop(db_id)
-
- num_avail_units = self.max_num_concurrent_units - len(self.launched_units)
- num_avail_units = (
- len(self.unlaunched_units)
- if self.max_num_concurrent_units == 0
- else num_avail_units
- )
-
- units_id_to_remove = []
- for i, item in enumerate(self.unlaunched_units.items()):
- db_id, unit = item
- if i < num_avail_units:
- self.launched_units[unit.db_id] = unit
- units_id_to_remove.append(db_id)
- yield unit
- else:
- break
- with self.unlaunched_units_access_condition:
- for db_id in units_id_to_remove:
- self.unlaunched_units.pop(db_id)
-
- time.sleep(UNIT_GENERATOR_WAIT_SECONDS)
- if not self.unlaunched_units:
- break
-
- def _launch_limited_units(self, url: str) -> None:
- """ use units' generator to launch limited number of units according to (max_num_concurrent_units)"""
- while not self.finished_generators:
- for unit in self.generate_units():
- unit.launch(url)
- if self.generator_type == GeneratorType.NONE:
- break
-
- def launch_units(self, url: str) -> None:
- """launch any units registered by this TaskLauncher"""
- self.units_thread = threading.Thread(
- target=self._launch_limited_units, args=(url,)
- )
- self.units_thread.start()
-
- def get_assignments_are_all_created(self) -> bool:
- return self.assignment_thread_done
-
- def expire_units(self) -> None:
- """Clean up all units on this TaskLauncher"""
- self.keep_launching_units = False
- self.finished_generators = True
- for unit in tqdm(self.units):
- try:
- unit.expire()
- except Exception as e:
- logger.exception(
- f"Warning: failed to expire unit {unit.db_id}. Stated error: {e}",
- exc_info=True,
- )
-
- def shutdown(self) -> None:
- """Clean up running threads for generating assignments and units"""
- if self.assignments_thread is not None:
- self.assignments_thread.join()
- self.units_thread.join()
diff --git a/mephisto/core/utils.py b/mephisto/core/utils.py
index 9bf5346ef..af70320ac 100644
--- a/mephisto/core/utils.py
+++ b/mephisto/core/utils.py
@@ -4,192 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-import os
-import sys, glob, importlib
-
-import shlex
-from distutils.dir_util import copy_tree
-import functools
-from mephisto.data_model.constants import NO_PROJECT_NAME
-from mephisto.core.config_handler import (
- add_config_arg,
- get_config_arg,
- CORE_SECTION,
- DATA_STORAGE_KEY,
- DEFAULT_CONFIG_FILE,
+from mephisto.operations.utils import *
+import warnings
+
+warnings.warn(
+ "Imports from `mephisto.core` are going away soon. "
+ "Please replace all of your imports from mephisto.core.utils "
+ "to mephisto.operations.utils ",
+ PendingDeprecationWarning,
)
-
-from typing import Optional, Dict, Any, List, Type, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.crowd_provider import CrowdProvider
- from mephisto.data_model.task_runner import TaskRunner
- from mephisto.data_model.architect import Architect
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.requester import Requester
-
-
-loaded_data_dir = None
-
-
-def ensure_user_confirm(display_text, skip_input=False) -> None:
- """
- Helper to provide the flow for having a user confirm a specific occurrence
- before it happens. skip_input will make this method return without
- checking, which is useful for automated scripts
- """
- if skip_input:
- return
- res = input(f'{display_text}\nEnter "n" to exit and anything else to continue:')
- if res == "n":
- raise SystemExit(0)
- return
-
-
-def get_root_dir() -> str:
- """Return the currently configured root mephisto directory"""
- # This file is at ROOT/mephisto/core/utils.py
- return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-
-def get_mock_requester(db) -> "Requester":
- """Get or create a mock requester to use for test tasks"""
- # TODO(#98) Need to split utils into those operating for the data model
- # and those operating on the data model, and those operating beyond
- mock_requesters = db.find_requesters(provider_type="mock")
- if len(mock_requesters) == 0:
- db.new_requester("MOCK_REQUESTER", "mock")
- mock_requesters = db.find_requesters(provider_type="mock")
- return mock_requesters[0]
-
-
-def get_provider_dir() -> str:
- """
- Return the path to the mephisto providers diroctory
- """
- return os.path.join(get_root_dir(), "mephisto/providers")
-
-
-def get_gallery_dir() -> str:
- """
- Return the path to the mephisto task gallery
- """
- return os.path.join(get_root_dir(), "gallery")
-
-
-def get_dir_for_task(task_name: str, not_exists_ok: bool = False) -> Optional[str]:
- """
- Return the directory for the given task, if it exists. Check the user's task
- dir first and then the gallery second.
- """
- dir_path = os.path.join(get_tasks_dir(), task_name)
- if os.path.exists(dir_path) or not_exists_ok:
- return dir_path
- dir_path = os.path.join(get_gallery_dir(), task_name)
- if os.path.exists(dir_path) or not_exists_ok:
- return dir_path
- return None
-
-
-def get_tasks_dir() -> str:
- """
- Return the directory where the mephisto user has configured their personal tasks
- to exist in
- """
- return os.path.join(get_root_dir(), "mephisto/tasks")
-
-
-def get_root_data_dir() -> str:
- """
- Return the directory where the mephisto data is expected to go
- """
- global loaded_data_dir
- if loaded_data_dir is None:
- default_data_dir = os.path.join(get_root_dir(), "data")
- actual_data_dir = get_config_arg(CORE_SECTION, DATA_STORAGE_KEY)
- if actual_data_dir is None:
- data_dir_location = input(
- "Please enter the full path to a location to store Mephisto run data. By default this "
- f"would be at '{default_data_dir}'. This dir should NOT be on a distributed file "
- "store. Press enter to use the default: "
- ).strip()
- if len(data_dir_location) == 0:
- data_dir_location = default_data_dir
- data_dir_location = os.path.expanduser(data_dir_location)
- os.makedirs(data_dir_location, exist_ok=True)
- # Check to see if there is existing data to possibly move to the data dir:
- database_loc = os.path.join(default_data_dir, "database.db")
- if os.path.exists(database_loc) and data_dir_location != default_data_dir:
- should_migrate = (
- input(
- "We have found an existing database in the default data directory, do you want to "
- f"copy any existing data from the default location to {data_dir_location}? (y)es/no: "
- )
- .lower()
- .strip()
- )
- if len(should_migrate) == 0 or should_migrate[0] == "y":
- copy_tree(default_data_dir, data_dir_location)
- print(
- "Mephisto data successfully copied, once you've confirmed the migration worked, "
- "feel free to remove all of the contents in "
- f"{default_data_dir} EXCEPT for `README.md`."
- )
- add_config_arg(CORE_SECTION, DATA_STORAGE_KEY, data_dir_location)
-
- loaded_data_dir = get_config_arg(CORE_SECTION, DATA_STORAGE_KEY)
-
- if not os.path.isdir(loaded_data_dir):
- raise NotADirectoryError(
- f"The provided Mephisto data directory {loaded_data_dir} as set in "
- f"{DEFAULT_CONFIG_FILE} is not a directory! Please locate your Mephisto "
- f"data directory and update {DEFAULT_CONFIG_FILE} to point to it."
- )
-
- return loaded_data_dir
-
-
-def get_data_dir(root_dir: Optional[str] = None) -> str:
- """
- Return the directory where the mephisto data is expected to go
- """
- if root_dir is None:
- return get_root_data_dir()
- return os.path.join(root_dir, "data")
-
-
-def get_mephisto_tmp_dir() -> str:
- """
- Return the directory where the mephisto temporary build files go
- """
- return os.path.join(get_root_dir(), "tmp")
-
-
-def get_dir_for_run(task_run: "TaskRun", project_name: str = NO_PROJECT_NAME) -> str:
- """
- Return the directory where the mephisto run data is expected to go
- """
- run_id = task_run.db_id
- root_dir = task_run.db.db_root
- return os.path.join(get_data_dir(root_dir), "runs", project_name, run_id)
-
-
-def build_arg_list_from_dict(in_dict: Dict[str, Any]) -> List[str]:
- arg_list = []
- for key, val in in_dict.items():
- arg_list.append(f"--{key.replace('_', '-')}")
- arg_list.append(str(val))
- return arg_list
-
-
-def find_or_create_qualification(db, qualification_name) -> None:
- """
- Ensure the given qualification exists in the db,
- creating it if it doesn't already
- """
- from mephisto.data_model.database import EntryAlreadyExistsException
-
- try:
- db.make_qualification(qualification_name)
- except EntryAlreadyExistsException:
- pass # qualification already exists
diff --git a/mephisto/data_model/agent.py b/mephisto/data_model/agent.py
index 854513268..9af5ca774 100644
--- a/mephisto/data_model/agent.py
+++ b/mephisto/data_model/agent.py
@@ -8,7 +8,7 @@
import threading
from abc import ABC, abstractmethod, abstractstaticmethod
-from mephisto.data_model.blueprint import AgentState
+from mephisto.abstractions.blueprint import AgentState
from mephisto.data_model.worker import Worker
from mephisto.data_model.exceptions import (
AgentReturnedError,
@@ -20,9 +20,10 @@
if TYPE_CHECKING:
from mephisto.data_model.assignment import Unit, Assignment
- from mephisto.data_model.database import MephistoDB
+ from mephisto.abstractions.database import MephistoDB
from mephisto.data_model.packet import Packet
- from mephisto.data_model.task import Task, TaskRun
+ from mephisto.data_model.task import Task
+ from mephisto.data_model.task_run import TaskRun
class Agent(ABC):
@@ -77,7 +78,7 @@ def __new__(
as you will instead be returned the correct Agent class according to
the crowdprovider associated with this Agent.
"""
- from mephisto.core.registry import get_crowd_provider_from_type
+ from mephisto.operations.registry import get_crowd_provider_from_type
if cls == Agent:
# We are trying to construct a Agent, find what type to use and
@@ -134,7 +135,7 @@ def get_task_run(self) -> "TaskRun":
elif self._assignment is not None:
self._task_run = self._assignment.get_task_run()
else:
- from mephisto.data_model.task import TaskRun
+ from mephisto.data_model.task_run import TaskRun
self._task_run = TaskRun(self.db, self.task_run_id)
return self._task_run
@@ -393,7 +394,7 @@ def get_worker(self) -> Worker:
def get_task_run(self) -> "TaskRun":
"""Return the TaskRun this agent is working within"""
if self._task_run is None:
- from mephisto.data_model.task import TaskRun
+ from mephisto.data_model.task_run import TaskRun
self._task_run = TaskRun(self.db, self.task_run_id)
return self._task_run
diff --git a/mephisto/data_model/architect.py b/mephisto/data_model/architect.py
index 52de0ec42..241ed592a 100644
--- a/mephisto/data_model/architect.py
+++ b/mephisto/data_model/architect.py
@@ -4,110 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field
-from omegaconf import MISSING, DictConfig
-from typing import Dict, List, Any, ClassVar, Type, TYPE_CHECKING, Callable
-
-if TYPE_CHECKING:
- from mephisto.server.channels.channel import Channel
- from mephsito.data_model.packet import Packet
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.blueprint import SharedTaskState
- from argparse import _ArgumentGroup as ArgumentGroup
-
-
-@dataclass
-class ArchitectArgs:
- """Base class for arguments to configure architects"""
-
- _architect_type: str = MISSING
-
-
-class Architect(ABC):
- """
- Provides methods for setting up a server somewhere and deploying tasks
- onto that server.
- """
-
- ArgsClass: ClassVar[Type[ArchitectArgs]] = ArchitectArgs
- ARCHITECT_TYPE: str
-
- def __init__(
- self,
- db: "MephistoDB",
- args: DictConfig,
- shared_state: "SharedTaskState",
- task_run: "TaskRun",
- build_dir_root: str,
- ):
- """
- Initialize this architect with whatever options are provided given
- ArgsClass. Parse whatever additional options may be required
- for the specific task_run.
-
- Also set up any required database/memory into the MephistoDB so that
- this data can be stored long-term.
- """
- raise NotImplementedError()
-
- @classmethod
- def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
- """
- Assert that the provided arguments are valid. Should
- fail if a task launched with these arguments would
- not work.
-
- This should include throwing an exception if the architect
- needs login details or something similar given the
- arguments passed in.
- """
- return
-
- def get_channels(
- self,
- on_channel_open: Callable[[str], None],
- on_catastrophic_disconnect: Callable[[str], None],
- on_message: Callable[[str, "Packet"], None],
- ) -> List["Channel"]:
- """
- Return a list of all relevant channels that the Supervisor will
- need to register to in order to function
- """
- raise NotImplementedError()
-
- def download_file(self, filename: str, save_dir: str) -> None:
- """
- Save the file that is noted as stored on the server to
- the desired save location.
- """
- raise NotImplementedError()
-
- def prepare(self) -> str:
- """
- Produce the server files that will be deployed to the server
- """
- raise NotImplementedError()
-
- def deploy(self) -> str:
- """
- Launch the server, and push the task files to the server. Return
- the server URL
- """
- raise NotImplementedError()
-
- def cleanup(self) -> None:
- """
- Remove any files that were used for the deployment process that
- no longer need to be kept track of now that the task has
- been launched.
- """
- raise NotImplementedError()
-
- def shutdown(self) -> None:
- """
- Shut down the server launched by this Architect, as stored
- in the db.
- """
- raise NotImplementedError()
+from mephisto.abstractions.architect import *
+import warnings
+
+warnings.warn(
+ "Import of architect content from `data_model` is going away soon. "
+ "Please replace all of your imports from mephisto.data_model.architect "
+ "to mephisto.abstractions.architect. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/data_model/assignment.py b/mephisto/data_model/assignment.py
index 4f175e38e..506c546e2 100644
--- a/mephisto/data_model/assignment.py
+++ b/mephisto/data_model/assignment.py
@@ -5,19 +5,19 @@
# LICENSE file in the root directory of this source tree.
-from abc import ABC, abstractmethod
-from mephisto.core.utils import get_dir_for_run
-from mephisto.data_model.assignment_state import AssignmentState
-from mephisto.data_model.task import TaskRun, Task
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.data_model.task import Task
+from mephisto.data_model.task_run import TaskRun
from mephisto.data_model.agent import Agent
-from mephisto.data_model.blueprint import AgentState
from mephisto.data_model.requester import Requester
-from typing import List, Optional, Tuple, Mapping, Dict, Any, Type, TYPE_CHECKING, IO
+from typing import List, Optional, Mapping, Dict, Any, TYPE_CHECKING, IO
+
+# Temporary until import split is clear
+from mephisto.data_model.unit import *
if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
+ from mephisto.abstractions.database import MephistoDB
from mephisto.data_model.worker import Worker
- from mephisto.data_model.crowd_provider import CrowdProvider
import os
import json
@@ -230,299 +230,3 @@ def new(
) as json_file:
json.dump(assignment_data, json_file)
return Assignment(db, db_id)
-
-
-class Unit(ABC):
- """
- This class tracks the status of an individual worker's contribution to a
- higher level assignment. It is the smallest 'unit' of work to complete
- the assignment, and this class is only responsible for checking
- the status of that work itself being done.
-
- It should be extended for usage with a specific crowd provider
- """
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- self.db: "MephistoDB" = db
- if row is None:
- row = db.get_unit(db_id)
- assert row is not None, f"Given db_id {db_id} did not exist in given db"
- self.db_id: str = row["unit_id"]
- self.assignment_id = row["assignment_id"]
- self.unit_index = row["unit_index"]
- self.pay_amount = row["pay_amount"]
- self.agent_id = row["agent_id"]
- self.provider_type = row["provider_type"]
- self.db_status = row["status"]
- self.task_type = row["task_type"]
- self.task_id = row["task_id"]
- self.task_run_id = row["task_run_id"]
- self.sandbox = row["sandbox"]
- self.requester_id = row["requester_id"]
- self.worker_id = row["worker_id"]
-
- # Deferred loading of related entities
- self.__task: Optional["Task"] = None
- self.__task_run: Optional["TaskRun"] = None
- self.__assignment: Optional["Assignment"] = None
- self.__requester: Optional["Requester"] = None
- self.__agent: Optional["Agent"] = None
- self.__worker: Optional["Worker"] = None
-
- def __new__(
- cls, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ) -> "Unit":
- """
- The new method is overridden to be able to automatically generate
- the expected Unit class without needing to specifically find it
- for a given db_id. As such it is impossible to create a Unit
- as you will instead be returned the correct Unit class according to
- the crowdprovider associated with this Unit.
- """
- if cls == Unit:
- # We are trying to construct a Unit, find what type to use and
- # create that instead
- from mephisto.core.registry import get_crowd_provider_from_type
-
- if row is None:
- row = db.get_unit(db_id)
- assert row is not None, f"Given db_id {db_id} did not exist in given db"
- correct_class = get_crowd_provider_from_type(row["provider_type"]).UnitClass
- return super().__new__(correct_class)
- else:
- # We are constructing another instance directly
- return super().__new__(cls)
-
- def get_crowd_provider_class(self) -> Type["CrowdProvider"]:
- """Get the CrowdProvider class that manages this Unit"""
- from mephisto.core.registry import get_crowd_provider_from_type
-
- return get_crowd_provider_from_type(self.provider_type)
-
- def get_assignment_data(self) -> Optional[Dict[str, Any]]:
- """Return the specific assignment data for this assignment"""
- return self.get_assignment().get_assignment_data()
-
- def sync_status(self) -> None:
- """
- Ensure that the queried status from this unit and the db status
- are up to date
- """
- # TODO(102) this will need to be run periodically/on crashes
- # to sync any lost state
- self.set_db_status(self.get_status())
-
- def get_db_status(self) -> str:
- """
- Return the status as currently stored in the database
- """
- if self.db_status in AssignmentState.final_unit():
- return self.db_status
- row = self.db.get_unit(self.db_id)
- assert row is not None, f"Unit {self.db_id} stopped existing in the db..."
- return row["status"]
-
- def set_db_status(self, status: str) -> None:
- """
- Set the status reflected in the database for this Unit
- """
- assert (
- status in AssignmentState.valid_unit()
- ), f"{status} not valid Assignment Status, not in {AssignmentState.valid_unit()}"
- self.db_status = status
- self.db.update_unit(self.db_id, status=status)
-
- def get_assignment(self) -> Assignment:
- """
- Return the assignment that this Unit is part of.
- """
- if self.__assignment is None:
- self.__assignment = Assignment(self.db, self.assignment_id)
- return self.__assignment
-
- def get_task_run(self) -> TaskRun:
- """
- Return the task run that this assignment is part of
- """
- if self.__task_run is None:
- if self.__assignment is not None:
- self.__task_run = self.__assignment.get_task_run()
- else:
- self.__task_run = TaskRun(self.db, self.task_run_id)
- return self.__task_run
-
- def get_task(self) -> Task:
- """
- Return the task that this assignment is part of
- """
- if self.__task is None:
- if self.__assignment is not None:
- self.__task = self.__assignment.get_task()
- elif self.__task_run is not None:
- self.__task = self.__task_run.get_task()
- else:
- self.__task = Task(self.db, self.task_id)
- return self.__task
-
- def get_requester(self) -> "Requester":
- """
- Return the requester who offered this Unit
- """
- if self.__requester is None:
- if self.__assignment is not None:
- self.__requester = self.__assignment.get_requester()
- elif self.__task_run is not None:
- self.__requester = self.__task_run.get_requester()
- else:
- self.__requester = Requester(self.db, self.requester_id)
- return self.__requester
-
- def clear_assigned_agent(self) -> None:
- """Clear the agent that is assigned to this unit"""
- self.db.clear_unit_agent_assignment(self.db_id)
- self.agent_id = None
- self.__agent = None
-
- def get_assigned_agent(self) -> Optional[Agent]:
- """
- Get the agent assigned to this Unit if there is one, else return None
- """
- # In these statuses, we know the agent isn't changing anymore, and thus will
- # not need to be re-queried
- # TODO(#97) add test to ensure this behavior/assumption holds always
- if self.db_status in AssignmentState.final_unit():
- if self.agent_id is None:
- return None
- return Agent(self.db, self.agent_id)
-
- # Query the database to get the most up-to-date assignment, as this can
- # change after instantiation if the Unit status isn't final
- # TODO(#101) this may not be particularly efficient
- row = self.db.get_unit(self.db_id)
- assert row is not None, f"Unit {self.db_id} stopped existing in the db..."
- agent_id = row["agent_id"]
- if agent_id is not None:
- return Agent(self.db, agent_id)
- return None
-
- @staticmethod
- def _register_unit(
- db: "MephistoDB",
- assignment: Assignment,
- index: int,
- pay_amount: float,
- provider_type: str,
- ) -> "Unit":
- """
- Create an entry for this unit in the database
- """
- db_id = db.new_unit(
- assignment.task_id,
- assignment.task_run_id,
- assignment.requester_id,
- assignment.db_id,
- index,
- pay_amount,
- provider_type,
- assignment.task_type,
- )
- return Unit(db, db_id)
-
- def get_pay_amount(self) -> float:
- """
- Return the amount that this Unit is costing against the budget,
- calculating additional fees as relevant
- """
- return self.pay_amount
-
- # Children classes may need to override the following
-
- def get_status(self) -> str:
- """
- Get the status of this unit, as determined by whether there's
- a worker working on it at the moment, and any other possible states. Should
- return one of UNIT_STATUSES
-
- Accurate status is crowd-provider dependent, and thus this method should be
- defined in the child class to ensure that the local record matches
- the ground truth in the provider
- """
- from mephisto.data_model.blueprint import AgentState
-
- db_status = self.db_status
- computed_status = AssignmentState.LAUNCHED
-
- agent = self.get_assigned_agent()
- if agent is None:
- row = self.db.get_unit(self.db_id)
- computed_status = row["status"]
- else:
- agent_status = agent.get_status()
- if agent_status == AgentState.STATUS_NONE:
- computed_status = AssignmentState.LAUNCHED
- elif agent_status in [
- AgentState.STATUS_ACCEPTED,
- AgentState.STATUS_ONBOARDING,
- AgentState.STATUS_PARTNER_DISCONNECT,
- AgentState.STATUS_WAITING,
- AgentState.STATUS_IN_TASK,
- ]:
- computed_status = AssignmentState.ASSIGNED
- elif agent_status in [AgentState.STATUS_COMPLETED]:
- computed_status = AssignmentState.COMPLETED
- elif agent_status in [AgentState.STATUS_SOFT_REJECTED]:
- computed_status = AssignmentState.SOFT_REJECTED
- elif agent_status in [AgentState.STATUS_EXPIRED]:
- computed_status = AssignmentState.EXPIRED
- elif agent_status in [
- AgentState.STATUS_DISCONNECT,
- AgentState.STATUS_RETURNED,
- ]:
- computed_status = AssignmentState.ASSIGNED
- elif agent_status == AgentState.STATUS_APPROVED:
- computed_status = AssignmentState.ACCEPTED
- elif agent_status == AgentState.STATUS_REJECTED:
- computed_status = AssignmentState.REJECTED
-
- if computed_status != db_status:
- self.set_db_status(computed_status)
-
- return computed_status
-
- # Children classes should implement the below methods
-
- def launch(self, task_url: str) -> None:
- """
- Make this Unit available on the crowdsourcing vendor. Depending on
- the task type, this could mean a number of different setup steps.
-
- Some crowd providers require setting up a configuration for the
- very first launch, and this method should call a helper to manage
- that step if necessary.
- """
- raise NotImplementedError()
-
- def expire(self) -> float:
- """
- Expire this unit, removing it from being workable on the vendor.
- Return the maximum time needed to wait before we know it's taken down.
- """
- raise NotImplementedError()
-
- def is_expired(self) -> bool:
- """Determine if this unit is expired as according to the vendor."""
- raise NotImplementedError()
-
- @staticmethod
- def new(
- db: "MephistoDB", assignment: Assignment, index: int, pay_amount: float
- ) -> "Unit":
- """
- Create a Unit for the given assignment
-
- Implementation should return the result of _register_unit when sure the unit
- can be successfully created to have it put into the db.
- """
- raise NotImplementedError()
diff --git a/mephisto/data_model/assignment_state.py b/mephisto/data_model/assignment_state.py
index 975abbecb..04de1596a 100644
--- a/mephisto/data_model/assignment_state.py
+++ b/mephisto/data_model/assignment_state.py
@@ -4,87 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-
-from typing import List
-
-
-class AssignmentState:
- CREATED = "created"
- LAUNCHED = "launched"
- ASSIGNED = "assigned"
- COMPLETED = "completed"
- ACCEPTED = "accepted"
- MIXED = "mixed"
- REJECTED = "rejected"
- SOFT_REJECTED = "soft_rejected"
- EXPIRED = "expired"
-
- @staticmethod
- def valid() -> List[str]:
- """Return all valid assignment statuses"""
- # TODO(#97) write test to ensure all states are covered here
- return [
- AssignmentState.CREATED,
- AssignmentState.LAUNCHED,
- AssignmentState.ASSIGNED,
- AssignmentState.COMPLETED,
- AssignmentState.ACCEPTED,
- AssignmentState.MIXED,
- AssignmentState.REJECTED,
- AssignmentState.SOFT_REJECTED,
- AssignmentState.EXPIRED,
- ]
-
- @staticmethod
- def incomplete() -> List[str]:
- """Return all statuses that are considered incomplete"""
- return [
- AssignmentState.CREATED,
- AssignmentState.LAUNCHED,
- AssignmentState.ASSIGNED,
- ]
-
- @staticmethod
- def payable() -> List[str]:
- """Return all statuses that should be considered spent budget"""
- return [
- AssignmentState.LAUNCHED,
- AssignmentState.ASSIGNED,
- AssignmentState.COMPLETED,
- AssignmentState.ACCEPTED,
- AssignmentState.SOFT_REJECTED,
- ]
-
- @staticmethod
- def valid_unit() -> List[str]:
- """Return all statuses that are valids for a Unit"""
- return [
- AssignmentState.CREATED,
- AssignmentState.LAUNCHED,
- AssignmentState.ASSIGNED,
- AssignmentState.COMPLETED,
- AssignmentState.ACCEPTED,
- AssignmentState.REJECTED,
- AssignmentState.SOFT_REJECTED,
- AssignmentState.EXPIRED,
- ]
-
- @staticmethod
- def final_unit() -> List[str]:
- """Return all statuses that are terminal for a Unit"""
- return [
- AssignmentState.ACCEPTED,
- AssignmentState.EXPIRED,
- AssignmentState.SOFT_REJECTED,
- ]
-
- @staticmethod
- def final_agent() -> List[str]:
- """Return all statuses that are terminal changes to a Unit's agent"""
- return [
- AssignmentState.COMPLETED,
- AssignmentState.ACCEPTED,
- AssignmentState.REJECTED,
- AssignmentState.SOFT_REJECTED,
- AssignmentState.EXPIRED,
- ]
+from mephisto.data_model.constants.assignment_state import *
+import warnings
+
+warnings.warn(
+ "Import of AssignmentState content from `data_model` is going away soon. "
+ "Please replace all of your imports from mephisto.data_model.assignment_state "
+ "to mephisto.data_model.constants.assignment_state. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/data_model/blueprint.py b/mephisto/data_model/blueprint.py
index c2972263d..16dec02d6 100644
--- a/mephisto/data_model/blueprint.py
+++ b/mephisto/data_model/blueprint.py
@@ -4,621 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from abc import ABC, abstractmethod
-from mephisto.core.utils import find_or_create_qualification
-from typing import (
- ClassVar,
- Optional,
- List,
- Dict,
- Any,
- Type,
- ClassVar,
- Union,
- Iterable,
- AsyncIterator,
- Callable,
- TYPE_CHECKING,
+from mephisto.abstractions.blueprint import *
+import warnings
+
+warnings.warn(
+ "Import of blueprint content from `data_model` is going away soon. "
+ "Please replace all of your imports from mephisto.data_model.blueprint "
+ "to mephisto.abstractions.blueprint. ",
+ PendingDeprecationWarning,
)
-
-from dataclasses import dataclass, field
-from omegaconf import MISSING, DictConfig
-
-from mephisto.data_model.exceptions import (
- AgentReturnedError,
- AgentDisconnectedError,
- AgentTimeoutError,
-)
-from mephisto.data_model.assignment_state import AssignmentState
-
-if TYPE_CHECKING:
- from mephisto.data_model.agent import Agent, OnboardingAgent
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.assignment import Assignment, InitializationData, Unit
- from mephisto.data_model.packet import Packet
- from mephisto.data_model.worker import Worker
- from argparse import _ArgumentGroup as ArgumentGroup
-
-
-@dataclass
-class BlueprintArgs:
- _blueprint_type: str = MISSING
- onboarding_qualification: str = field(
- default=MISSING,
- metadata={
- "help": (
- "Specify the name of a qualification used to block workers who fail onboarding, "
- "Empty will skip onboarding."
- )
- },
- )
- block_qualification: str = field(
- default=MISSING,
- metadata={
- "help": ("Specify the name of a qualification used to soft block workers.")
- },
- )
-
-
-@dataclass
-class SharedTaskState:
- """
- Base class for specifying additional state that can't just
- be passed as Hydra args, like functions and objects
- """
-
- onboarding_data: Dict[str, Any] = field(default_factory=dict)
- task_config: Dict[str, Any] = field(default_factory=dict)
- validate_onboarding: Callable[[Any], bool] = field(
- default_factory=lambda: (lambda x: True)
- )
- qualifications: List[Any] = field(default_factory=list)
- worker_can_do_unit: Callable[["Worker", "Unit"], bool] = field(
- default_factory=lambda: (lambda worker, unit: True)
- )
-
-
-class TaskBuilder(ABC):
- """
- Class to manage building a task of a specific type in a directory
- that will be used to deploy that task.
- """
-
- def __init__(self, task_run: "TaskRun", args: "DictConfig"):
- self.args = args
- self.task_run = task_run
-
- def __new__(cls, task_run: "TaskRun", args: "DictConfig") -> "TaskBuilder":
- """Get the correct TaskBuilder for this task run"""
- from mephisto.core.registry import get_blueprint_from_type
-
- if cls == TaskBuilder:
- # We are trying to construct an TaskBuilder, find what type to use and
- # create that instead
- correct_class = get_blueprint_from_type(task_run.task_type).TaskBuilderClass
- return super().__new__(correct_class)
- else:
- # We are constructing another instance directly
- return super().__new__(cls)
-
- @abstractmethod
- def build_in_dir(self, build_dir: str) -> None:
- """
- Build the server for the given task run into the provided directory
- """
- raise NotImplementedError()
-
-
-class TaskRunner(ABC):
- """
- Class to manage running a task of a specific type. Includes
- building the dependencies to a directory to be deployed to
- the server, and spawning threads that manage the process of
- passing agents through a task.
- """
-
- def __init__(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- self.args = args
- self.shared_state = shared_state
- self.task_run = task_run
- self.running_assignments: Dict[str, "Assignment"] = {}
- self.running_units: Dict[str, "Unit"] = {}
- self.running_onboardings: Dict[str, "OnboardingAgent"] = {}
- self.is_concurrent = False
- # TODO(102) populate some kind of local state for tasks that are being run
- # by this runner from the database.
-
- self.block_qualification = args.blueprint.get("block_qualification", None)
- if self.block_qualification is not None:
- find_or_create_qualification(task_run.db, self.block_qualification)
-
- def __new__(
- cls, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ) -> "TaskRunner":
- """Get the correct TaskRunner for this task run"""
- if cls == TaskRunner:
- from mephisto.core.registry import get_blueprint_from_type
-
- # We are trying to construct an AgentState, find what type to use and
- # create that instead
- correct_class = get_blueprint_from_type(task_run.task_type).TaskRunnerClass
- return super().__new__(correct_class)
- else:
- # We are constructing another instance directly
- return super().__new__(cls)
-
- def launch_onboarding(self, onboarding_agent: "OnboardingAgent") -> None:
- """
- Validate that onboarding is ready, then launch. Catch disconnect conditions
- """
- onboarding_id = onboarding_agent.get_agent_id()
- if onboarding_id in self.running_onboardings:
- print(f"Onboarding {onboarding_id} is already running")
- return
-
- print(f"Onboarding {onboarding_id} is launching with {onboarding_agent}")
-
- # At this point we're sure we want to run Onboarding
- self.running_onboardings[onboarding_id] = onboarding_agent
- try:
- self.run_onboarding(onboarding_agent)
- onboarding_agent.mark_done()
- except (AgentReturnedError, AgentTimeoutError, AgentDisconnectedError):
- self.cleanup_onboarding(onboarding_agent)
- except Exception as e:
- print(f"Unhandled exception in onboarding {onboarding_agent}: {repr(e)}")
- import traceback
-
- traceback.print_exc()
- self.cleanup_onboarding(onboarding_agent)
- del self.running_onboardings[onboarding_id]
- return
-
- def launch_unit(self, unit: "Unit", agent: "Agent") -> None:
- """
- Validate the unit is prepared to launch, then run it
- """
- if unit.db_id in self.running_units:
- print(f"Unit {unit.db_id} is already running")
- return
-
- print(f"Unit {unit.db_id} is launching with {agent}")
-
- # At this point we're sure we want to run the unit
- self.running_units[unit.db_id] = unit
- try:
- self.run_unit(unit, agent)
- except (AgentReturnedError, AgentTimeoutError, AgentDisconnectedError):
- # A returned Unit can be worked on again by someone else.
- if (
- unit.get_status() != AssignmentState.EXPIRED
- and unit.get_assigned_agent().db_id == agent.db_id
- ):
- unit.clear_assigned_agent()
- self.cleanup_unit(unit)
- except Exception as e:
- print(f"Unhandled exception in unit {unit}: {repr(e)}")
- import traceback
-
- traceback.print_exc()
- self.cleanup_unit(unit)
- del self.running_units[unit.db_id]
- return
-
- def launch_assignment(
- self, assignment: "Assignment", agents: List["Agent"]
- ) -> None:
- """
- Validate the assignment is prepared to launch, then run it
- """
- if assignment.db_id in self.running_assignments:
- print(f"Assignment {assignment.db_id} is already running")
- return
-
- print(f"Assignment {assignment.db_id} is launching with {agents}")
-
- # At this point we're sure we want to run the assignment
- self.running_assignments[assignment.db_id] = assignment
- try:
- self.run_assignment(assignment, agents)
- except (AgentReturnedError, AgentTimeoutError, AgentDisconnectedError) as e:
- # TODO(#99) if some operator flag is set for counting complete tasks, launch a
- # new assignment copied from the parameters of this one
- disconnected_agent_id = e.agent_id
- for agent in agents:
- if agent.db_id != e.agent_id:
- agent.update_status(AgentState.STATUS_PARTNER_DISCONNECT)
- else:
- # Must expire the disconnected unit so that
- # new workers aren't shown it
- agent.get_unit().expire()
- self.cleanup_assignment(assignment)
- except Exception as e:
- print(f"Unhandled exception in assignment {assignment}: {repr(e)}")
- import traceback
-
- traceback.print_exc()
- self.cleanup_assignment(assignment)
- del self.running_assignments[assignment.db_id]
- return
-
- @staticmethod
- def get_data_for_assignment(assignment: "Assignment") -> "InitializationData":
- """
- Finds the right data to get for the given assignment.
- """
- return assignment.get_assignment_data()
-
- @abstractmethod
- def get_init_data_for_agent(self, agent: "Agent"):
- """
- Return the data that an agent will need for their task.
- """
- raise NotImplementedError()
-
- def filter_units_for_worker(self, units: List["Unit"], worker: "Worker"):
- """
- Returns the list of Units that the given worker is eligible to work on.
-
- Some tasks may want more direct control of what units a worker is
- allowed to work on, so this method should be overridden by children
- classes.
- """
- return units
-
- # TaskRunners must implement either the unit or assignment versions of the
- # run and cleanup functions, depending on if the task is run at the assignment
- # level rather than on the the unit level.
-
- def run_onboarding(self, agent: "OnboardingAgent"):
- """
- Handle setup for any resources to run an onboarding task. This
- will be run in a background thread, and should be tolerant to being
- interrupted by cleanup_onboarding.
-
- Only required by tasks that want to implement onboarding
- """
- raise NotImplementedError()
-
- def cleanup_onboarding(self, agent: "OnboardingAgent"):
- """
- Handle cleaning up the resources that were being used to onboard
- the given agent.
- """
- raise NotImplementedError()
-
- def run_unit(self, unit: "Unit", agent: "Agent"):
- """
- Handle setup for any resources required to get this unit running.
- This will be run in a background thread, and should be tolerant to
- being interrupted by cleanup_unit.
-
- Only needs to be implemented by non-concurrent tasks
- """
- raise NotImplementedError()
-
- def cleanup_unit(self, unit: "Unit"):
- """
- Handle ensuring resources for a given assignment are cleaned up following
- a disconnect or other crash event
-
- Does not need to be implemented if the run_unit method is
- already error catching and handles its own cleanup
- """
- raise NotImplementedError()
-
- def run_assignment(self, assignment: "Assignment", agents: List["Agent"]):
- """
- Handle setup for any resources required to get this assignment running.
- This will be run in a background thread, and should be tolerant to
- being interrupted by cleanup_assignment.
-
- Only needs to be implemented by concurrent tasks
- """
- raise NotImplementedError()
-
- def cleanup_assignment(self, assignment: "Assignment"):
- """
- Handle ensuring resources for a given assignment are cleaned up following
- a disconnect or other crash event
-
- Does not need to be implemented if the run_assignment method is
- already error catching and handles its own cleanup
- """
- raise NotImplementedError()
-
-
-# TODO(#101) what is the best method for creating new ones of these for different task types
-# in ways that are supported by different backends? Perhaps abstract additional
-# methods into the required db interface? Move any file manipulations into a
-# extra_data_handler subcomponent of the MephistoDB class?
-class AgentState(ABC):
- """
- Class for holding state information about work by an Agent on a Unit, currently
- stored as current task work into a json file.
-
- Specific state implementations will need to be created for different Task Types,
- as different tasks store and load differing data.
- """
-
- # Possible Agent Status Values
- STATUS_NONE = "none"
- STATUS_ACCEPTED = "accepted"
- STATUS_ONBOARDING = "onboarding"
- STATUS_WAITING = "waiting"
- STATUS_IN_TASK = "in task"
- STATUS_COMPLETED = "completed"
- STATUS_DISCONNECT = "disconnect"
- STATUS_TIMEOUT = "timeout"
- STATUS_PARTNER_DISCONNECT = "partner disconnect"
- STATUS_EXPIRED = "expired"
- STATUS_RETURNED = "returned"
- STATUS_APPROVED = "approved"
- STATUS_SOFT_REJECTED = "soft_rejected"
- STATUS_REJECTED = "rejected"
-
- def __new__(cls, agent: Union["Agent", "OnboardingAgent"]) -> "AgentState":
- """Return the correct agent state for the given agent"""
- if cls == AgentState:
- from mephisto.data_model.agent import Agent
- from mephisto.core.registry import get_blueprint_from_type
-
- # We are trying to construct an AgentState, find what type to use and
- # create that instead
- if isinstance(agent, Agent):
- correct_class = get_blueprint_from_type(agent.task_type).AgentStateClass
- else:
- correct_class = get_blueprint_from_type(
- agent.task_type
- ).OnboardingAgentStateClass
- return super().__new__(correct_class)
- else:
- # We are constructing another instance directly
- return super().__new__(cls)
-
- @staticmethod
- def complete() -> List[str]:
- """Return all final Agent statuses which will not be updated by the supervisor"""
- return [
- AgentState.STATUS_COMPLETED,
- AgentState.STATUS_DISCONNECT,
- AgentState.STATUS_TIMEOUT,
- AgentState.STATUS_EXPIRED,
- AgentState.STATUS_RETURNED,
- AgentState.STATUS_SOFT_REJECTED,
- AgentState.STATUS_APPROVED,
- AgentState.STATUS_REJECTED,
- ]
-
- @staticmethod
- def valid() -> List[str]:
- """Return all valid Agent statuses"""
- # TODO(#97) write a test that ensures all AgentState statuses are here
- return [
- AgentState.STATUS_NONE,
- AgentState.STATUS_ACCEPTED,
- AgentState.STATUS_ONBOARDING,
- AgentState.STATUS_WAITING,
- AgentState.STATUS_IN_TASK,
- AgentState.STATUS_COMPLETED,
- AgentState.STATUS_DISCONNECT,
- AgentState.STATUS_TIMEOUT,
- AgentState.STATUS_PARTNER_DISCONNECT,
- AgentState.STATUS_EXPIRED,
- AgentState.STATUS_RETURNED,
- AgentState.STATUS_SOFT_REJECTED,
- AgentState.STATUS_APPROVED,
- AgentState.STATUS_REJECTED,
- ]
-
- # Implementations of an AgentState must implement the following:
-
- @abstractmethod
- def __init__(self, agent: "Agent"):
- """
- Create an AgentState to track the state of an agent's work on a Unit
-
- Implementations should initialize any required files for saving and
- loading state data somewhere.
-
- If said file already exists based on the given agent, load that data
- instead.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def set_init_state(self, data: Any) -> bool:
- """Set the initial state for this agent"""
- raise NotImplementedError()
-
- @abstractmethod
- def get_init_state(self) -> Optional[Any]:
- """
- Return the initial state for this agent,
- None if no such state exists
- """
- raise NotImplementedError()
-
- @abstractmethod
- def load_data(self) -> None:
- """
- Load stored data from a file to this object
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_data(self) -> Dict[str, Any]:
- """
- Return the currently stored data for this task in the format
- expected by any frontend displays
- """
- raise NotImplementedError()
-
- def get_parsed_data(self) -> Any:
- """
- Return the portion of the data that is relevant to a human
- who wants to parse or analyze the data
-
- Utility function to handle stripping the data of any
- context that is only important for reproducing the task
- exactly. By default is just `get_data`
- """
- return self.get_data()
-
- @abstractmethod
- def save_data(self) -> None:
- """
- Save the relevant data from this Unit to a file in the expected location
- """
- raise NotImplementedError()
-
- @abstractmethod
- def update_data(self, packet: "Packet") -> None:
- """
- Put new current Unit data into this AgentState
- """
- # TODO(#100) maybe refine the signature for this function once use cases
- # are fully scoped
-
- # Some use cases might just be appending new data, some
- # might instead prefer to maintain a final state.
-
- # Maybe the correct storage is of a series of actions taken
- # on this Unit? Static tasks only have 2 turns max, dynamic
- # ones may have multiple turns or steps.
- raise NotImplementedError()
-
- def get_task_start(self) -> Optional[float]:
- """
- Return the start time for this task, if it is available
- """
- return 0.0
-
- def get_task_end(self) -> Optional[float]:
- """
- Return the end time for this task, if it is available
- """
- return 0.0
-
-
-class OnboardingRequired(object):
- """
- Compositional class for blueprints that may have an onboarding step
- """
-
- @staticmethod
- def get_failed_qual(qual_name: str) -> str:
- """Returns the wrapper for a qualification to represent failing an onboarding"""
- return qual_name + "-failed"
-
- def init_onboarding_config(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- self.onboarding_qualification_name: Optional[str] = args.blueprint.get(
- "onboarding_qualification", None
- )
- self.onboarding_data = shared_state.onboarding_data
- self.use_onboarding = self.onboarding_qualification_name is not None
- self.onboarding_qualification_id = None
- if self.onboarding_qualification_name is not None:
- db = task_run.db
- found_qualifications = db.find_qualifications(
- self.onboarding_qualification_name
- )
- if len(found_qualifications) == 0:
- self.onboarding_qualification_id = db.make_qualification(
- self.onboarding_qualification_name
- )
- else:
- self.onboarding_qualification_id = found_qualifications[0].db_id
-
- # We need to keep a separate qualification for failed onboarding
- # to push to a crowd provider in order to prevent workers
- # who have failed from being shown our task
- self.onboarding_failed_name = self.get_failed_qual(
- self.onboarding_qualification_name
- )
- found_qualifications = db.find_qualifications(self.onboarding_failed_name)
- if len(found_qualifications) == 0:
- self.onboarding_failed_id = db.make_qualification(
- self.onboarding_failed_name
- )
- else:
- self.onboarding_failed_id = found_qualifications[0].db_id
-
- def get_onboarding_data(self, worker_id: str) -> Dict[str, Any]:
- """
- If the onboarding task on the frontend requires any specialized data, the blueprint
- should provide it for the user.
-
- As onboarding qualifies a worker for all tasks from this blueprint, this should
- generally be static data that can later be evaluated against.
- """
- return self.onboarding_data
-
- def validate_onboarding(
- self, worker: "Worker", onboarding_agent: "OnboardingAgent"
- ) -> bool:
- """
- Check the incoming onboarding data and evaluate if the worker
- has passed the qualification or not. Return True if the worker
- has qualified.
- """
- return True
-
-
-class Blueprint(ABC):
- """
- Configuration class for the various parts of building, launching,
- and running a task of a specific task. Provides utility functions
- for managing between the three main components, which are separated
- into separate classes in acknowledgement that some tasks may have
- particularly complicated processes for them
- """
-
- AgentStateClass: ClassVar[Type["AgentState"]]
- OnboardingAgentStateClass: ClassVar[Type["AgentState"]] = AgentState # type: ignore
- TaskRunnerClass: ClassVar[Type["TaskRunner"]]
- TaskBuilderClass: ClassVar[Type["TaskBuilder"]]
- ArgsClass: ClassVar[Type["BlueprintArgs"]] = BlueprintArgs
- SharedStateClass: ClassVar[Type["SharedTaskState"]] = SharedTaskState
- supported_architects: ClassVar[List[str]]
- BLUEPRINT_TYPE: str
-
- def __init__(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- self.args = args
- self.shared_state = shared_state
- self.frontend_task_config = shared_state.task_config
-
- @classmethod
- def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
- """
- Assert that the provided arguments are valid. Should
- fail if a task launched with these arguments would
- not work
- """
- return
-
- def get_frontend_args(self) -> Dict[str, Any]:
- """
- Specifies what options should be fowarded
- to the client for use by the task's frontend
- """
- return self.frontend_task_config
-
- @abstractmethod
- def get_initialization_data(
- self,
- ) -> Union[Iterable["InitializationData"], AsyncIterator["InitializationData"]]:
- """
- Get all of the data used to initialize tasks from this blueprint.
- Can either be a simple iterable if all the assignments can
- be processed at once, or an AsyncIterator if the number
- of tasks is unknown or changes based on something running
- concurrently with the job.
- """
- raise NotImplementedError
diff --git a/mephisto/data_model/constants.py b/mephisto/data_model/constants/__init__.py
similarity index 100%
rename from mephisto/data_model/constants.py
rename to mephisto/data_model/constants/__init__.py
diff --git a/mephisto/data_model/constants/assignment_state.py b/mephisto/data_model/constants/assignment_state.py
new file mode 100644
index 000000000..975abbecb
--- /dev/null
+++ b/mephisto/data_model/constants/assignment_state.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import List
+
+
+class AssignmentState:
+ CREATED = "created"
+ LAUNCHED = "launched"
+ ASSIGNED = "assigned"
+ COMPLETED = "completed"
+ ACCEPTED = "accepted"
+ MIXED = "mixed"
+ REJECTED = "rejected"
+ SOFT_REJECTED = "soft_rejected"
+ EXPIRED = "expired"
+
+ @staticmethod
+ def valid() -> List[str]:
+ """Return all valid assignment statuses"""
+ # TODO(#97) write test to ensure all states are covered here
+ return [
+ AssignmentState.CREATED,
+ AssignmentState.LAUNCHED,
+ AssignmentState.ASSIGNED,
+ AssignmentState.COMPLETED,
+ AssignmentState.ACCEPTED,
+ AssignmentState.MIXED,
+ AssignmentState.REJECTED,
+ AssignmentState.SOFT_REJECTED,
+ AssignmentState.EXPIRED,
+ ]
+
+ @staticmethod
+ def incomplete() -> List[str]:
+ """Return all statuses that are considered incomplete"""
+ return [
+ AssignmentState.CREATED,
+ AssignmentState.LAUNCHED,
+ AssignmentState.ASSIGNED,
+ ]
+
+ @staticmethod
+ def payable() -> List[str]:
+ """Return all statuses that should be considered spent budget"""
+ return [
+ AssignmentState.LAUNCHED,
+ AssignmentState.ASSIGNED,
+ AssignmentState.COMPLETED,
+ AssignmentState.ACCEPTED,
+ AssignmentState.SOFT_REJECTED,
+ ]
+
+ @staticmethod
+ def valid_unit() -> List[str]:
+ """Return all statuses that are valids for a Unit"""
+ return [
+ AssignmentState.CREATED,
+ AssignmentState.LAUNCHED,
+ AssignmentState.ASSIGNED,
+ AssignmentState.COMPLETED,
+ AssignmentState.ACCEPTED,
+ AssignmentState.REJECTED,
+ AssignmentState.SOFT_REJECTED,
+ AssignmentState.EXPIRED,
+ ]
+
+ @staticmethod
+ def final_unit() -> List[str]:
+ """Return all statuses that are terminal for a Unit"""
+ return [
+ AssignmentState.ACCEPTED,
+ AssignmentState.EXPIRED,
+ AssignmentState.SOFT_REJECTED,
+ ]
+
+ @staticmethod
+ def final_agent() -> List[str]:
+ """Return all statuses that are terminal changes to a Unit's agent"""
+ return [
+ AssignmentState.COMPLETED,
+ AssignmentState.ACCEPTED,
+ AssignmentState.REJECTED,
+ AssignmentState.SOFT_REJECTED,
+ AssignmentState.EXPIRED,
+ ]
diff --git a/mephisto/data_model/crowd_provider.py b/mephisto/data_model/crowd_provider.py
index f92fbe090..2dba9d4de 100644
--- a/mephisto/data_model/crowd_provider.py
+++ b/mephisto/data_model/crowd_provider.py
@@ -4,135 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from abc import ABC, abstractmethod, abstractproperty
-from dataclasses import dataclass, field
-from omegaconf import MISSING, DictConfig
-from mephisto.data_model.blueprint import AgentState, SharedTaskState
-from mephisto.data_model.assignment import Unit
-from mephisto.data_model.requester import Requester
-from mephisto.data_model.worker import Worker
-from mephisto.data_model.agent import Agent
-
-from typing import List, Optional, Tuple, Dict, Any, ClassVar, Type, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.task import TaskRun
- from argparse import _ArgumentGroup as ArgumentGroup
-
-
-@dataclass
-class ProviderArgs:
- """Base class for arguments to configure Crowd Providers"""
-
- _provider_type: str = MISSING
- requester_name: str = MISSING
-
-
-class CrowdProvider(ABC):
- """
- Base class that defines the required functionality for
- the mephisto system to be able to interface with an
- external crowdsourcing vendor.
-
- Implementing the methods within, as well as supplying
- wrapped Unit, Requester, Worker, and Agent classes
- should ensure support for a vendor.
- """
-
- PROVIDER_TYPE = "__PROVIDER_BASE_CLASS__"
-
- UnitClass: ClassVar[Type[Unit]] = Unit
-
- RequesterClass: ClassVar[Type[Requester]] = Requester
-
- WorkerClass: ClassVar[Type[Worker]] = Worker
-
- AgentClass: ClassVar[Type[Agent]] = Agent
-
- ArgsClass: ClassVar[Type[ProviderArgs]] = ProviderArgs
-
- SUPPORTED_TASK_TYPES: ClassVar[List[str]]
-
- def __init__(self, db: "MephistoDB"):
- """
- Crowd provider classes should keep as much of their state
- as possible in their non-python datastore. This way
- the system can work even after shutdowns, and the
- state of the system can be managed or observed from
- other processes.
-
- In order to set up a datastore, init should check to see
- if one is already set (using get_datastore_for_provider)
- and use that one if available, otherwise make a new one
- and register it with the database.
- """
- self.db = db
- if db.has_datastore_for_provider(self.PROVIDER_TYPE):
- self.datastore = db.get_datastore_for_provider(self.PROVIDER_TYPE)
- else:
- self.datastore_root = db.get_db_path_for_provider(self.PROVIDER_TYPE)
- self.datastore = self.initialize_provider_datastore(self.datastore_root)
- db.set_datastore_for_provider(self.PROVIDER_TYPE, self.datastore)
-
- @classmethod
- def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
- """
- Assert that the provided arguments are valid. Should
- fail if a task launched with these arguments would
- not work
- """
- return
-
- @classmethod
- @abstractmethod
- def get_wrapper_js_path(cls):
- """
- Return the path to the `wrap_crowd_source.js` file for this
- provider to be deployed to the server
- """
- raise NotImplementedError
-
- @abstractmethod
- def initialize_provider_datastore(self, storage_path: str) -> Any:
- """
- Do whatever is required to initialize this provider insofar
- as setting up local or external state is required to ensure
- that this vendor is usable.
-
- Local data storage should be put into the given root path.
-
- This method should return the local data storage component that
- is required to do any object initialization, as it will be available
- from the MephistoDB in a db.get_provider_datastore(PROVIDER_TYPE).
- """
- raise NotImplementedError()
-
- @abstractmethod
- def setup_resources_for_task_run(
- self,
- task_run: "TaskRun",
- args: DictConfig,
- shared_state: "SharedTaskState",
- server_url: str,
- ) -> None:
- """
- Setup any required resources for managing any additional resources
- surrounding a specific task run.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def cleanup_resources_from_task_run(
- self, task_run: "TaskRun", server_url: str
- ) -> None:
- """
- Destroy any resources set up specifically for this task run
- """
- raise NotImplementedError()
-
- def cleanup_qualification(self, qualification_name: str) -> None:
- """
- Remove the linked qualification from the crowdprovider if it exists
- """
- return None
+from mephisto.abstractions.crowd_provider import *
+import warnings
+
+warnings.warn(
+ "Import of crowd provider content from `data_model` is going away soon. "
+ "Please replace all of your imports from mephisto.data_model.crowd_provider "
+ "to mephisto.abstractions.crowd_provider. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/data_model/database.py b/mephisto/data_model/database.py
index f8bd61b50..e4e69afe2 100644
--- a/mephisto/data_model/database.py
+++ b/mephisto/data_model/database.py
@@ -4,578 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-
-import os
-import sqlite3
-
-from abc import ABC, abstractmethod
-from mephisto.core.utils import get_data_dir
-from mephisto.core.registry import (
- get_crowd_provider_from_type,
- get_valid_provider_types,
+from mephisto.abstractions.database import *
+import warnings
+
+warnings.warn(
+ "Import of MephistoDB content from `data_model` is going away soon. "
+ "Please replace all of your imports from mephisto.data_model.database "
+ "to mephisto.abstractions.database. ",
+ PendingDeprecationWarning,
)
-from typing import Mapping, Optional, Any, List
-from mephisto.data_model.agent import Agent, OnboardingAgent
-from mephisto.data_model.assignment import Assignment, Unit
-from mephisto.data_model.project import Project
-from mephisto.data_model.requester import Requester
-from mephisto.data_model.task import Task, TaskRun
-from mephisto.data_model.worker import Worker
-from mephisto.data_model.qualification import Qualification, GrantedQualification
-
-# TODO(#101) investigate rate limiting against the db by caching locally where appropriate across the data model?
-# TODO(#101) investigate cursors for DB queries as the project scales
-
-
-class MephistoDBException(Exception):
- pass
-
-
-class EntryAlreadyExistsException(MephistoDBException):
- pass
-
-
-class EntryDoesNotExistException(MephistoDBException):
- pass
-
-
-class MephistoDB(ABC):
- """
- Provides the interface for all queries that are necessary for the Mephisto
- architecture to run as expected. All other databases should implement
- these methods to be used as the database that backs Mephisto.
-
- By default, we use a LocalMesphistoDB located at `mephisto/data/database.db`
- """
-
- def __init__(self, database_path=None):
- """Ensure the database is set up and ready to handle data"""
- if database_path is None:
- database_path = os.path.join(get_data_dir(), "database.db")
- self.db_path = database_path
- self.db_root = os.path.dirname(self.db_path)
- self.init_tables()
- self.__provider_datastores: Dict[str, Any] = {}
-
- def get_db_path_for_provider(self, provider_type) -> str:
- """Get the path to store data for a specific provider in"""
- database_root = os.path.dirname(self.db_path)
- provider_root = os.path.join(database_root, provider_type)
- os.makedirs(provider_root, exist_ok=True)
- return provider_root
-
- def has_datastore_for_provider(self, provider_type: str) -> bool:
- """Determine if a datastore has been registered for the given provider"""
- return provider_type in self.__provider_datastores
-
- def get_datastore_for_provider(self, provider_type: str) -> Any:
- """Get the provider datastore registered with this db"""
- if provider_type not in self.__provider_datastores:
- # Register this provider for usage now
- ProviderClass = get_crowd_provider_from_type(provider_type)
- provider = ProviderClass(self)
- return self.__provider_datastores.get(provider_type)
-
- def set_datastore_for_provider(self, provider_type: str, datastore: Any) -> None:
- """Set the provider datastore registered with this db"""
- self.__provider_datastores[provider_type] = datastore
-
- def delete_qualification(self, qualification_name: str) -> None:
- """
- Remove this qualification from all workers that have it, then delete the qualification
- """
- self._delete_qualification(qualification_name)
- for crowd_provider_name in get_valid_provider_types():
- ProviderClass = get_crowd_provider_from_type(crowd_provider_name)
- provider = ProviderClass(self)
- provider.cleanup_qualification(qualification_name)
-
- @abstractmethod
- def shutdown(self) -> None:
- """Do whatever is required to shut this server off"""
- raise NotImplementedError()
-
- @abstractmethod
- def init_tables(self) -> None:
- """
- Initialize any tables that may be required to run this database. If this is an expensive
- operation, check to see if they already exist before trying to initialize
- """
- raise NotImplementedError()
-
- @abstractmethod
- def new_project(self, project_name: str) -> str:
- """
- Create a new project with the given project name. Raise EntryAlreadyExistsException if a project
- with this name has already been created.
-
- Project names are permanent, as changing directories later is painful.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_project(self, project_id: str) -> Mapping[str, Any]:
- """
- Return project's fields by the given project_id, raise EntryDoesNotExistException if no id exists
- in projects
-
- See Project for the expected returned mapping's fields
- """
- raise NotImplementedError()
-
- @abstractmethod
- def find_projects(self, project_name: Optional[str] = None) -> List[Project]:
- """
- Try to find any project that matches the above. When called with no arguments,
- return all projects.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def new_task(
- self,
- task_name: str,
- task_type: str,
- project_id: Optional[str] = None,
- parent_task_id: Optional[str] = None,
- ) -> str:
- """
- Create a new task with the given task name. Raise EntryAlreadyExistsException if a task
- with this name has already been created.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_task(self, task_id: str) -> Mapping[str, Any]:
- """
- Return task's fields by task_id, raise EntryDoesNotExistException if no id exists
- in tasks
-
- See Task for the expected fields for the returned mapping
- """
- raise NotImplementedError()
-
- @abstractmethod
- def find_tasks(
- self,
- task_name: Optional[str] = None,
- project_id: Optional[str] = None,
- parent_task_id: Optional[str] = None,
- ) -> List[Task]:
- """
- Try to find any task that matches the above. When called with no arguments,
- return all tasks.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def update_task(
- self,
- task_id: str,
- task_name: Optional[str] = None,
- project_id: Optional[str] = None,
- ) -> None:
- """
- Update the given task with the given parameters if possible, raise appropriate exception otherwise.
-
- Should only be runable if no runs have been created for this task
- """
- raise NotImplementedError()
-
- @abstractmethod
- def new_task_run(
- self,
- task_id: str,
- requester_id: str,
- init_params: str,
- provider_type: str,
- task_type: str,
- sandbox: bool = True,
- ) -> str:
- """
- Create a new task_run for the given task.
-
- Once a run is created, it should no longer be altered. The assignments and
- subassignments depend on the data set up within, as the launched task
- cannot be replaced and the requester can not be swapped mid-run.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_task_run(self, task_run_id: str) -> Mapping[str, Any]:
- """
- Return the given task_run's fields by task_run_id, raise EntryDoesNotExistException if no id exists
- in task_runs.
-
- See TaskRun for the expected fields to populate in the returned mapping
- """
- raise NotImplementedError()
-
- @abstractmethod
- def find_task_runs(
- self,
- task_id: Optional[str] = None,
- requester_id: Optional[str] = None,
- is_completed: Optional[bool] = None,
- ) -> List[TaskRun]:
- """
- Try to find any task_run that matches the above. When called with no arguments,
- return all task_runs.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def update_task_run(self, task_run_id: str, is_completed: bool):
- """
- Update a task run. At the moment, can only update completion status
- """
- raise NotImplementedError()
-
- @abstractmethod
- def new_assignment(
- self,
- task_id: str,
- task_run_id: str,
- requester_id: str,
- task_type: str,
- provider_type: str,
- sandbox: bool = True,
- ) -> str:
- """
- Create a new assignment for the given task
-
- Assignments should not be edited or altered once created
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_assignment(self, assignment_id: str) -> Mapping[str, Any]:
- """
- Return assignment's fields by assignment_id, raise EntryDoesNotExistException if
- no id exists in tasks
-
- See Assignment for the expected fields for the returned mapping
- """
- raise NotImplementedError()
-
- @abstractmethod
- def find_assignments(
- self,
- task_run_id: Optional[str] = None,
- task_id: Optional[str] = None,
- requester_id: Optional[str] = None,
- task_type: Optional[str] = None,
- provider_type: Optional[str] = None,
- sandbox: Optional[bool] = None,
- ) -> List[Assignment]:
- """
- Try to find any task that matches the above. When called with no arguments,
- return all tasks.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def new_unit(
- self,
- task_id: str,
- task_run_id: str,
- requester_id: str,
- assignment_id: str,
- unit_index: int,
- pay_amount: float,
- provider_type: str,
- task_type: str,
- sandbox: bool = True,
- ) -> str:
- """
- Create a new unit with the given index. Raises EntryAlreadyExistsException
- if there is already a unit for the given assignment with the given index.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_unit(self, unit_id: str) -> Mapping[str, Any]:
- """
- Return unit's fields by unit_id, raise EntryDoesNotExistException
- if no id exists in units
-
- See unit for the expected fields for the returned mapping
- """
- raise NotImplementedError()
-
- @abstractmethod
- def find_units(
- self,
- task_id: Optional[str] = None,
- task_run_id: Optional[str] = None,
- requester_id: Optional[str] = None,
- assignment_id: Optional[str] = None,
- unit_index: Optional[int] = None,
- provider_type: Optional[str] = None,
- task_type: Optional[str] = None,
- agent_id: Optional[str] = None,
- worker_id: Optional[str] = None,
- sandbox: Optional[bool] = None,
- status: Optional[str] = None,
- ) -> List[Unit]:
- """
- Try to find any unit that matches the above. When called with no arguments,
- return all units.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def clear_unit_agent_assignment(self, unit_id: str) -> None:
- """
- Update the given unit by removing the agent that is assigned to it, thus updating
- the status to assignable.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def update_unit(
- self, unit_id: str, agent_id: Optional[str] = None, status: Optional[str] = None
- ) -> None:
- """
- Update the given task with the given parameters if possible, raise appropriate exception otherwise.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def new_requester(self, requester_name: str, provider_type: str) -> str:
- """
- Create a new requester with the given name and provider type.
- Raises EntryAlreadyExistsException
- if there is already a requester with this name
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_requester(self, requester_id: str) -> Mapping[str, Any]:
- """
- Return requester's fields by requester_id, raise EntryDoesNotExistException
- if no id exists in requesters
-
- See requester for the expected fields for the returned mapping
- """
- raise NotImplementedError()
-
- @abstractmethod
- def find_requesters(
- self, requester_name: Optional[str] = None, provider_type: Optional[str] = None
- ) -> List[Requester]:
- """
- Try to find any requester that matches the above. When called with no arguments,
- return all requesters.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def new_worker(self, worker_name: str, provider_type: str) -> str:
- """
- Create a new worker with the given name and provider type.
- Raises EntryAlreadyExistsException
- if there is already a worker with this name
-
- worker_name should be the unique identifier by which the crowd provider
- is using to keep track of this worker
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_worker(self, worker_id: str) -> Mapping[str, Any]:
- """
- Return worker's fields by worker_id, raise EntryDoesNotExistException
- if no id exists in workers
-
- See worker for the expected fields for the returned mapping
- """
- raise NotImplementedError()
-
- @abstractmethod
- def find_workers(
- self, worker_name: Optional[str] = None, provider_type: Optional[str] = None
- ) -> List[Worker]:
- """
- Try to find any worker that matches the above. When called with no arguments,
- return all workers.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def new_agent(
- self,
- worker_id: str,
- unit_id: str,
- task_id: str,
- task_run_id: str,
- assignment_id: str,
- task_type: str,
- provider_type: str,
- ) -> str:
- """
- Create a new agent for the given worker id to assign to the given unit
- Raises EntryAlreadyExistsException
-
- Should update the unit's status to ASSIGNED and the assigned agent to
- this one.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_agent(self, agent_id: str) -> Mapping[str, Any]:
- """
- Return agent's fields by agent_id, raise EntryDoesNotExistException
- if no id exists in agents
-
- See Agent for the expected fields for the returned mapping
- """
- raise NotImplementedError()
-
- @abstractmethod
- def update_agent(self, agent_id: str, status: Optional[str] = None) -> None:
- """
- Update the given task with the given parameters if possible, raise appropriate exception otherwise.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def find_agents(
- self,
- status: Optional[str] = None,
- unit_id: Optional[str] = None,
- worker_id: Optional[str] = None,
- task_id: Optional[str] = None,
- task_run_id: Optional[str] = None,
- assignment_id: Optional[str] = None,
- task_type: Optional[str] = None,
- provider_type: Optional[str] = None,
- ) -> List[Agent]:
- """
- Try to find any agent that matches the above. When called with no arguments,
- return all agents.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def new_onboarding_agent(
- self, worker_id: str, task_id: str, task_run_id: str, task_type: str
- ) -> str:
- """
- Create a new agent for the given worker id to assign to the given unit
- Raises EntryAlreadyExistsException
-
- Should update the unit's status to ASSIGNED and the assigned agent to
- this one.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_onboarding_agent(self, onboarding_agent_id: str) -> Mapping[str, Any]:
- """
- Return onboarding agent's fields by onboarding_agent_id, raise
- EntryDoesNotExistException if no id exists in onboarding_agents
-
- See OnboardingAgent for the expected fields for the returned mapping
- """
- raise NotImplementedError()
-
- @abstractmethod
- def update_onboarding_agent(
- self, onboarding_agent_id: str, status: Optional[str] = None
- ) -> None:
- """
- Update the given onboarding agent with the given parameters if possible,
- raise appropriate exception otherwise.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def find_onboarding_agents(
- self,
- status: Optional[str] = None,
- worker_id: Optional[str] = None,
- task_id: Optional[str] = None,
- task_run_id: Optional[str] = None,
- task_type: Optional[str] = None,
- ) -> List[OnboardingAgent]:
- """
- Try to find any onboarding agent that matches the above. When called with no arguments,
- return all onboarding agents.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def make_qualification(self, qualification_name: str) -> str:
- """
- Make a new qualification, throws an error if a qualification by the given name
- already exists. Return the id for the qualification.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def find_qualifications(
- self, qualification_name: Optional[str] = None
- ) -> List[Qualification]:
- """
- Find a qualification. If no name is supplied, returns all qualifications.
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_qualification(self, qualification_id: str) -> Mapping[str, Any]:
- """
- Return qualification's fields by qualification_id, raise
- EntryDoesNotExistException if no id exists in qualifications
-
- See Qualification for the expected fields for the returned mapping
- """
- raise NotImplementedError()
-
- @abstractmethod
- def _delete_qualification(self, qualification_name: str) -> None:
- """
- Remove this qualification from all workers that have it, then delete the qualification
- """
- raise NotImplementedError()
-
- @abstractmethod
- def grant_qualification(
- self, qualification_id: str, worker_id: str, value: int = 1
- ) -> None:
- """
- Grant a worker the given qualification. Update the qualification value if it
- already exists
- """
- raise NotImplementedError()
-
- @abstractmethod
- def check_granted_qualifications(
- self,
- qualification_id: Optional[str] = None,
- worker_id: Optional[str] = None,
- value: Optional[int] = None,
- ) -> List[GrantedQualification]:
- """
- Find granted qualifications that match the given specifications
- """
- raise NotImplementedError()
-
- @abstractmethod
- def get_granted_qualification(
- self, qualification_id: Optional[str] = None, worker_id: Optional[str] = None
- ) -> Mapping[str, Any]:
- """
- Return the granted qualification in the database between the given
- worker and qualification id
-
- See GrantedQualification for the expected fields for the returned mapping
- """
- raise NotImplementedError()
-
- @abstractmethod
- def revoke_qualification(self, qualification_id: str, worker_id: str) -> None:
- """
- Remove the given qualification from the given worker
- """
- raise NotImplementedError()
diff --git a/mephisto/data_model/project.py b/mephisto/data_model/project.py
index 3465fa13d..c5e878baf 100644
--- a/mephisto/data_model/project.py
+++ b/mephisto/data_model/project.py
@@ -9,7 +9,7 @@
from typing import List, Mapping, Any, Optional, TYPE_CHECKING
if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
+ from mephisto.abstractions.database import MephistoDB
from mephisto.data_model.task import Task
diff --git a/mephisto/data_model/qualification.py b/mephisto/data_model/qualification.py
index a19ad96e2..322061d45 100644
--- a/mephisto/data_model/qualification.py
+++ b/mephisto/data_model/qualification.py
@@ -5,7 +5,7 @@
# LICENSE file in the root directory of this source tree.
from abc import ABC, abstractmethod, abstractstaticmethod
-from mephisto.core.registry import (
+from mephisto.operations.registry import (
get_crowd_provider_from_type,
get_valid_provider_types,
)
@@ -13,8 +13,8 @@
from typing import List, Optional, Mapping, Dict, TYPE_CHECKING, Any
if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.task import TaskRun
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.task_run import TaskRun
from mephisto.data_model.worker import Worker
from argparse import _ArgumentGroup as ArgumentGroup
diff --git a/mephisto/data_model/requester.py b/mephisto/data_model/requester.py
index 622ca12cc..7cf078ad0 100644
--- a/mephisto/data_model/requester.py
+++ b/mephisto/data_model/requester.py
@@ -11,8 +11,8 @@
from typing import List, Optional, Mapping, Dict, TYPE_CHECKING, Any, Type, ClassVar
if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.task import TaskRun
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.task_run import TaskRun
from argparse import _ArgumentGroup as ArgumentGroup
@@ -59,7 +59,7 @@ def __new__(
as you will instead be returned the correct Requester class according to
the crowdprovider associated with this Requester.
"""
- from mephisto.core.registry import get_crowd_provider_from_type
+ from mephisto.operations.registry import get_crowd_provider_from_type
if cls == Requester:
# We are trying to construct a Requester, find what type to use and
diff --git a/mephisto/data_model/task.py b/mephisto/data_model/task.py
index b47274436..0cbcd8c1a 100644
--- a/mephisto/data_model/task.py
+++ b/mephisto/data_model/task.py
@@ -7,32 +7,24 @@
import os
from shutil import copytree
-import json
from mephisto.data_model.project import Project
-from mephisto.data_model.requester import Requester
-from mephisto.data_model.assignment_state import AssignmentState
-from mephisto.data_model.task_config import TaskConfig
-from mephisto.core.utils import (
- get_tasks_dir,
+from mephisto.operations.utils import (
get_dir_for_task,
ensure_user_confirm,
- get_dir_for_run,
)
from functools import reduce
-from omegaconf import OmegaConf
+from mephisto.data_model.task_run import *
-from typing import List, Optional, Tuple, Dict, cast, Mapping, TYPE_CHECKING, Any
+from typing import List, Optional, Mapping, TYPE_CHECKING, Any
if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
+ from mephisto.abstractions.database import MephistoDB
from mephisto.data_model.assignment import Assignment
- from mephisto.data_model.blueprint import Blueprint, SharedTaskState
from mephisto.data_model.worker import Worker
from mephisto.data_model.unit import Unit
- from mephisto.data_model.crowd_provider import CrowdProvider
- from omegaconf import DictConfig
+ from mephisto.abstractions.crowd_provider import CrowdProvider
# TODO(#98) pull from utils, these are blueprints
@@ -178,300 +170,3 @@ def new(
def __repr__(self):
return f"Task-{self.task_name} [{self.task_type}]"
-
-
-class TaskRun:
- """
- This class tracks an individual run of a specific task, and handles state management
- for the set of assignments within
- """
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- self.db: "MephistoDB" = db
- if row is None:
- row = db.get_task_run(db_id)
- assert row is not None, f"Given db_id {db_id} did not exist in given db"
- self.db_id: str = row["task_run_id"]
- self.task_id = row["task_id"]
- self.requester_id = row["requester_id"]
- self.param_string = row["init_params"]
- try:
- self.args: "DictConfig" = OmegaConf.create(json.loads(self.param_string))
- except Exception as e:
- self.args = None
- print(e)
- self.start_time = row["creation_date"]
- self.provider_type = row["provider_type"]
- self.task_type = row["task_type"]
- self.sandbox = row["sandbox"]
- self.assignments_generator_done: bool = None
-
- # properties with deferred loading
- self.__is_completed = row["is_completed"]
- self.__has_assignments = False
- self.__task_config: Optional["TaskConfig"] = None
- self.__task: Optional["Task"] = None
- self.__requester: Optional["Requester"] = None
- self.__run_dir: Optional[str] = None
- self.__blueprint: Optional["Blueprint"] = None
- self.__crowd_provider: Optional["CrowdProvider"] = None
-
- def get_units(self) -> List["Unit"]:
- """
- Return the units associated with this task run.
- """
- return self.db.find_units(task_run_id=self.db_id)
-
- def get_valid_units_for_worker(self, worker: "Worker") -> List["Unit"]:
- """
- Get any units that the given worker could work on in this
- task run
- """
- config = self.get_task_config()
-
- if config.allowed_concurrent != 0 or config.maximum_units_per_worker:
- current_units = self.db.find_units(
- task_run_id=self.db_id,
- worker_id=worker.db_id,
- status=AssignmentState.ASSIGNED,
- )
- currently_active = len(current_units)
- if config.allowed_concurrent != 0:
- if currently_active >= config.allowed_concurrent:
- return [] # currently at the maximum number of concurrent units
- if config.maximum_units_per_worker != 0:
- currently_completed = len(
- self.db.find_units(
- task_id=self.task_id,
- worker_id=worker.db_id,
- status=AssignmentState.COMPLETED,
- )
- )
- if (
- currently_active + currently_completed
- >= config.maximum_units_per_worker
- ):
- return [] # Currently at the maximum number of units for this task
-
- task_units: List["Unit"] = self.get_units()
- unit_assigns: Dict[str, List["Unit"]] = {}
- for unit in task_units:
- assignment_id = unit.assignment_id
- if assignment_id not in unit_assigns:
- unit_assigns[assignment_id] = []
- unit_assigns[assignment_id].append(unit)
-
- # Cannot pair with self
- units: List["Unit"] = []
- for unit_set in unit_assigns.values():
- is_self_set = map(lambda u: u.worker_id == worker.db_id, unit_set)
- if not any(is_self_set):
- units += unit_set
- valid_units = [u for u in units if u.get_status() == AssignmentState.LAUNCHED]
-
- # Should load cached blueprint for SharedTaskState
- blueprint = self.get_blueprint()
- ret_units = [
- u
- for u in valid_units
- if blueprint.shared_state.worker_can_do_unit(worker, u)
- ]
-
- return ret_units
-
- def clear_reservation(self, unit: "Unit") -> None:
- """
- Remove the holder used to reserve a unit
- """
- file_name = f"unit_res_{unit.db_id}"
- write_dir = os.path.join(self.get_run_dir(), "reservations")
- if os.path.exists(os.path.join(write_dir, file_name)):
- os.unlink(os.path.join(write_dir, file_name))
-
- def reserve_unit(self, unit: "Unit") -> Optional["Unit"]:
- """
- 'Atomically' reserve a unit by writing to the filesystem. If
- the file creation fails, return none
- """
- file_name = f"unit_res_{unit.db_id}"
- write_dir = os.path.join(self.get_run_dir(), "reservations")
- os.makedirs(write_dir, exist_ok=True)
- try:
- with open(os.path.join(write_dir, file_name), "x") as res_file:
- pass # Creating the file is sufficient
- except FileExistsError:
- print(os.path.join(write_dir, file_name), " existed")
- return None
- return unit
-
- def get_blueprint(
- self,
- args: Optional["DictConfig"] = None,
- shared_state: Optional["SharedTaskState"] = None,
- ) -> "Blueprint":
- """Return the runner associated with this task run"""
- from mephisto.core.registry import get_blueprint_from_type
- from mephisto.data_model.blueprint import SharedTaskState
-
- if self.__blueprint is None:
- cache = False
- if args is None:
- args = self.args
- else:
- cache = True
- if shared_state is None:
- shared_state = SharedTaskState()
-
- BlueprintClass = get_blueprint_from_type(self.task_type)
- if not cache:
- return BlueprintClass(self, args, shared_state)
- self.__blueprint = BlueprintClass(self, args, shared_state)
- return self.__blueprint
-
- def get_provider(self) -> "CrowdProvider":
- """Return the crowd provider used to launch this task"""
- from mephisto.core.registry import get_crowd_provider_from_type
-
- if self.__crowd_provider is None:
- CrowdProviderClass = get_crowd_provider_from_type(self.provider_type)
- self.__crowd_provider = CrowdProviderClass(self.db)
- return self.__crowd_provider
-
- def get_task(self) -> "Task":
- """Return the task used to initialize this run"""
- if self.__task is None:
- self.__task = Task(self.db, self.task_id)
- return self.__task
-
- def get_task_config(self) -> "TaskConfig":
- if self.__task_config is None:
- self.__task_config = TaskConfig(self)
- return self.__task_config
-
- def get_requester(self) -> Requester:
- """
- Return the requester that started this task.
- """
- if self.__requester is None:
- self.__requester = Requester(self.db, self.requester_id)
- return self.__requester
-
- def get_has_assignments(self) -> bool:
- """See if this task run has any assignments launched yet"""
- if not self.__has_assignments:
- if len(self.get_assignments()) > 0:
- self.__has_assignments = True
- return self.__has_assignments
-
- def get_assignments(self, status: Optional[str] = None) -> List["Assignment"]:
- """
- Get assignments for this run, optionally filtering by their
- current status
- """
- assert (
- status is None or status in AssignmentState.valid()
- ), "Invalid assignment status"
- assignments = self.db.find_assignments(task_run_id=self.db_id)
- if status is not None:
- assignments = [a for a in assignments if a.get_status() == status]
- return assignments
-
- def get_assignment_statuses(self) -> Dict[str, int]:
- """
- Get the statistics for all of the assignments for this run.
- """
- assigns = self.get_assignments()
- assigns_with_status = [(x, x.get_status()) for x in assigns]
- return {
- status: len(
- [x for x, had_status in assigns_with_status if had_status == status]
- )
- for status in AssignmentState.valid()
- }
-
- def update_completion_progress(self, task_launcher=None, status=None) -> None:
- """ Flag the task run that the assignments' generator has finished """
- if task_launcher:
- if task_launcher.get_assignments_are_all_created():
- self.assignments_generator_done = True
- if status:
- self.assignments_generator_done = status
-
- def get_is_completed(self) -> bool:
- """get the completion status of this task"""
- self.sync_completion_status()
- return self.__is_completed
-
- def sync_completion_status(self) -> None:
- """
- Update the is_complete status for this task run based on completion
- of subassignments. If this task run has no subassignments yet, it
- is not complete
- """
- # TODO(#99) revisit when/if it's possible to add tasks to a completed run
- if not self.__is_completed and self.get_has_assignments():
- statuses = self.get_assignment_statuses()
- has_incomplete = False
- for status in AssignmentState.incomplete():
- if statuses[status] > 0:
- has_incomplete = True
- if not has_incomplete and self.assignments_generator_done is not False:
- self.db.update_task_run(self.db_id, is_completed=True)
- self.__is_completed = True
-
- def get_run_dir(self) -> str:
- """
- Return the directory where the data from this run is stored
- """
- if self.__run_dir is None:
- task = self.get_task()
- project = task.get_project()
- if project is None:
- self.__run_dir = get_dir_for_run(self)
- else:
- self.__run_dir = get_dir_for_run(self, project.project_name)
- os.makedirs(self.__run_dir, exist_ok=True)
- return self.__run_dir
-
- def get_total_spend(self) -> float:
- """
- Return the total amount spent on this run, based on any assignments
- that are still in a payable state.
- """
- assigns = self.get_assignments()
- total_amount = 0.0
- for assign in assigns:
- total_amount += assign.get_cost_of_statuses(AssignmentState.payable())
- return total_amount
-
- def to_dict(self) -> Dict[str, Any]:
- """Return a dict containing any important information about this task run"""
- return {
- "task_run_id": self.db_id,
- "task_id": self.task_id,
- "task_name": self.get_task().task_name,
- "task_type": self.task_type,
- "start_time": self.start_time,
- "params": self.get_task_config().args,
- "param_string": self.param_string,
- "task_status": self.get_assignment_statuses(),
- "sandbox": self.get_requester().is_sandbox(),
- }
-
- @staticmethod
- def new(
- db: "MephistoDB", task: Task, requester: Requester, param_string: str
- ) -> "TaskRun":
- """
- Create a new run for the given task with the given params
- """
- db_id = db.new_task_run(
- task.db_id,
- requester.db_id,
- param_string,
- requester.provider_type,
- task.task_type,
- )
- return TaskRun(db, db_id)
diff --git a/mephisto/data_model/task_config.py b/mephisto/data_model/task_config.py
index fc2dfc347..b5c35a869 100644
--- a/mephisto/data_model/task_config.py
+++ b/mephisto/data_model/task_config.py
@@ -15,7 +15,7 @@
import shlex
if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
+ from mephisto.data_model.task_run import TaskRun
from argparse import _ArgumentGroup as ArgumentGroup
@@ -115,7 +115,7 @@ def __init__(self, task_run: "TaskRun"):
@classmethod
def get_mock_params(cls) -> str:
"""Returns a param string with default / mock arguments to use for testing"""
- from mephisto.core.hydra_config import MephistoConfig
+ from mephisto.operations.hydra_config import MephistoConfig
return OmegaConf.structured(
MephistoConfig(
diff --git a/mephisto/data_model/task_run.py b/mephisto/data_model/task_run.py
new file mode 100644
index 000000000..7082bf1c3
--- /dev/null
+++ b/mephisto/data_model/task_run.py
@@ -0,0 +1,327 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import os
+import json
+
+from mephisto.data_model.requester import Requester
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.data_model.task_config import TaskConfig
+from mephisto.operations.utils import get_dir_for_run
+
+from omegaconf import OmegaConf
+
+from typing import List, Optional, Dict, Mapping, TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.assignment import Assignment
+ from mephisto.abstractions.blueprint import Blueprint, SharedTaskState
+ from mephisto.data_model.worker import Worker
+ from mephisto.data_model.unit import Unit
+ from mephisto.abstractions.crowd_provider import CrowdProvider
+ from mephisto.data_model.task import Task
+ from omegaconf import DictConfig
+
+
+class TaskRun:
+ """
+ This class tracks an individual run of a specific task, and handles state management
+ for the set of assignments within
+ """
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ self.db: "MephistoDB" = db
+ if row is None:
+ row = db.get_task_run(db_id)
+ assert row is not None, f"Given db_id {db_id} did not exist in given db"
+ self.db_id: str = row["task_run_id"]
+ self.task_id = row["task_id"]
+ self.requester_id = row["requester_id"]
+ self.param_string = row["init_params"]
+ try:
+ self.args: "DictConfig" = OmegaConf.create(json.loads(self.param_string))
+ except Exception as e:
+ self.args = None
+ print(e)
+ self.start_time = row["creation_date"]
+ self.provider_type = row["provider_type"]
+ self.task_type = row["task_type"]
+ self.sandbox = row["sandbox"]
+ self.assignments_generator_done: bool = None
+
+ # properties with deferred loading
+ self.__is_completed = row["is_completed"]
+ self.__has_assignments = False
+ self.__task_config: Optional["TaskConfig"] = None
+ self.__task: Optional["Task"] = None
+ self.__requester: Optional["Requester"] = None
+ self.__run_dir: Optional[str] = None
+ self.__blueprint: Optional["Blueprint"] = None
+ self.__crowd_provider: Optional["CrowdProvider"] = None
+
+ def get_units(self) -> List["Unit"]:
+ """
+ Return the units associated with this task run.
+ """
+ return self.db.find_units(task_run_id=self.db_id)
+
+ def get_valid_units_for_worker(self, worker: "Worker") -> List["Unit"]:
+ """
+ Get any units that the given worker could work on in this
+ task run
+ """
+ config = self.get_task_config()
+
+ if config.allowed_concurrent != 0 or config.maximum_units_per_worker:
+ current_units = self.db.find_units(
+ task_run_id=self.db_id,
+ worker_id=worker.db_id,
+ status=AssignmentState.ASSIGNED,
+ )
+ currently_active = len(current_units)
+ if config.allowed_concurrent != 0:
+ if currently_active >= config.allowed_concurrent:
+ return [] # currently at the maximum number of concurrent units
+ if config.maximum_units_per_worker != 0:
+ currently_completed = len(
+ self.db.find_units(
+ task_id=self.task_id,
+ worker_id=worker.db_id,
+ status=AssignmentState.COMPLETED,
+ )
+ )
+ if (
+ currently_active + currently_completed
+ >= config.maximum_units_per_worker
+ ):
+ return [] # Currently at the maximum number of units for this task
+
+ task_units: List["Unit"] = self.get_units()
+ unit_assigns: Dict[str, List["Unit"]] = {}
+ for unit in task_units:
+ assignment_id = unit.assignment_id
+ if assignment_id not in unit_assigns:
+ unit_assigns[assignment_id] = []
+ unit_assigns[assignment_id].append(unit)
+
+ # Cannot pair with self
+ units: List["Unit"] = []
+ for unit_set in unit_assigns.values():
+ is_self_set = map(lambda u: u.worker_id == worker.db_id, unit_set)
+ if not any(is_self_set):
+ units += unit_set
+ valid_units = [u for u in units if u.get_status() == AssignmentState.LAUNCHED]
+
+ # Should load cached blueprint for SharedTaskState
+ blueprint = self.get_blueprint()
+ ret_units = [
+ u
+ for u in valid_units
+ if blueprint.shared_state.worker_can_do_unit(worker, u)
+ ]
+
+ return ret_units
+
+ def clear_reservation(self, unit: "Unit") -> None:
+ """
+ Remove the holder used to reserve a unit
+ """
+ file_name = f"unit_res_{unit.db_id}"
+ write_dir = os.path.join(self.get_run_dir(), "reservations")
+ if os.path.exists(os.path.join(write_dir, file_name)):
+ os.unlink(os.path.join(write_dir, file_name))
+
+ def reserve_unit(self, unit: "Unit") -> Optional["Unit"]:
+ """
+ 'Atomically' reserve a unit by writing to the filesystem. If
+ the file creation fails, return none
+ """
+ file_name = f"unit_res_{unit.db_id}"
+ write_dir = os.path.join(self.get_run_dir(), "reservations")
+ os.makedirs(write_dir, exist_ok=True)
+ try:
+ with open(os.path.join(write_dir, file_name), "x") as res_file:
+ pass # Creating the file is sufficient
+ except FileExistsError:
+ print(os.path.join(write_dir, file_name), " existed")
+ return None
+ return unit
+
+ def get_blueprint(
+ self,
+ args: Optional["DictConfig"] = None,
+ shared_state: Optional["SharedTaskState"] = None,
+ ) -> "Blueprint":
+ """Return the runner associated with this task run"""
+ from mephisto.operations.registry import get_blueprint_from_type
+ from mephisto.abstractions.blueprint import SharedTaskState
+
+ if self.__blueprint is None:
+ cache = False
+ if args is None:
+ args = self.args
+ else:
+ cache = True
+ if shared_state is None:
+ shared_state = SharedTaskState()
+
+ BlueprintClass = get_blueprint_from_type(self.task_type)
+ if not cache:
+ return BlueprintClass(self, args, shared_state)
+ self.__blueprint = BlueprintClass(self, args, shared_state)
+ return self.__blueprint
+
+ def get_provider(self) -> "CrowdProvider":
+ """Return the crowd provider used to launch this task"""
+ from mephisto.operations.registry import get_crowd_provider_from_type
+
+ if self.__crowd_provider is None:
+ CrowdProviderClass = get_crowd_provider_from_type(self.provider_type)
+ self.__crowd_provider = CrowdProviderClass(self.db)
+ return self.__crowd_provider
+
+ def get_task(self) -> "Task":
+ """Return the task used to initialize this run"""
+ if self.__task is None:
+ from mephisto.data_model.task import Task
+
+ self.__task = Task(self.db, self.task_id)
+ return self.__task
+
+ def get_task_config(self) -> "TaskConfig":
+ if self.__task_config is None:
+ self.__task_config = TaskConfig(self)
+ return self.__task_config
+
+ def get_requester(self) -> Requester:
+ """
+ Return the requester that started this task.
+ """
+ if self.__requester is None:
+ self.__requester = Requester(self.db, self.requester_id)
+ return self.__requester
+
+ def get_has_assignments(self) -> bool:
+ """See if this task run has any assignments launched yet"""
+ if not self.__has_assignments:
+ if len(self.get_assignments()) > 0:
+ self.__has_assignments = True
+ return self.__has_assignments
+
+ def get_assignments(self, status: Optional[str] = None) -> List["Assignment"]:
+ """
+ Get assignments for this run, optionally filtering by their
+ current status
+ """
+ assert (
+ status is None or status in AssignmentState.valid()
+ ), "Invalid assignment status"
+ assignments = self.db.find_assignments(task_run_id=self.db_id)
+ if status is not None:
+ assignments = [a for a in assignments if a.get_status() == status]
+ return assignments
+
+ def get_assignment_statuses(self) -> Dict[str, int]:
+ """
+ Get the statistics for all of the assignments for this run.
+ """
+ assigns = self.get_assignments()
+ assigns_with_status = [(x, x.get_status()) for x in assigns]
+ return {
+ status: len(
+ [x for x, had_status in assigns_with_status if had_status == status]
+ )
+ for status in AssignmentState.valid()
+ }
+
+ def update_completion_progress(self, task_launcher=None, status=None) -> None:
+ """ Flag the task run that the assignments' generator has finished """
+ if task_launcher:
+ if task_launcher.get_assignments_are_all_created():
+ self.assignments_generator_done = True
+ if status:
+ self.assignments_generator_done = status
+
+ def get_is_completed(self) -> bool:
+ """get the completion status of this task"""
+ self.sync_completion_status()
+ return self.__is_completed
+
+ def sync_completion_status(self) -> None:
+ """
+ Update the is_complete status for this task run based on completion
+ of subassignments. If this task run has no subassignments yet, it
+ is not complete
+ """
+ # TODO(#99) revisit when/if it's possible to add tasks to a completed run
+ if not self.__is_completed and self.get_has_assignments():
+ statuses = self.get_assignment_statuses()
+ has_incomplete = False
+ for status in AssignmentState.incomplete():
+ if statuses[status] > 0:
+ has_incomplete = True
+ if not has_incomplete and self.assignments_generator_done is not False:
+ self.db.update_task_run(self.db_id, is_completed=True)
+ self.__is_completed = True
+
+ def get_run_dir(self) -> str:
+ """
+ Return the directory where the data from this run is stored
+ """
+ if self.__run_dir is None:
+ task = self.get_task()
+ project = task.get_project()
+ if project is None:
+ self.__run_dir = get_dir_for_run(self)
+ else:
+ self.__run_dir = get_dir_for_run(self, project.project_name)
+ os.makedirs(self.__run_dir, exist_ok=True)
+ return self.__run_dir
+
+ def get_total_spend(self) -> float:
+ """
+ Return the total amount spent on this run, based on any assignments
+ that are still in a payable state.
+ """
+ assigns = self.get_assignments()
+ total_amount = 0.0
+ for assign in assigns:
+ total_amount += assign.get_cost_of_statuses(AssignmentState.payable())
+ return total_amount
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Return a dict containing any important information about this task run"""
+ return {
+ "task_run_id": self.db_id,
+ "task_id": self.task_id,
+ "task_name": self.get_task().task_name,
+ "task_type": self.task_type,
+ "start_time": self.start_time,
+ "params": self.get_task_config().args,
+ "param_string": self.param_string,
+ "task_status": self.get_assignment_statuses(),
+ "sandbox": self.get_requester().is_sandbox(),
+ }
+
+ @staticmethod
+ def new(
+ db: "MephistoDB", task: "Task", requester: Requester, param_string: str
+ ) -> "TaskRun":
+ """
+ Create a new run for the given task with the given params
+ """
+ db_id = db.new_task_run(
+ task.db_id,
+ requester.db_id,
+ param_string,
+ requester.provider_type,
+ task.task_type,
+ )
+ return TaskRun(db, db_id)
diff --git a/mephisto/data_model/test/architect_tester.py b/mephisto/data_model/test/architect_tester.py
index 8152804ee..834e68c10 100644
--- a/mephisto/data_model/test/architect_tester.py
+++ b/mephisto/data_model/test/architect_tester.py
@@ -11,14 +11,14 @@
import os
import shutil
import requests
-from mephisto.data_model.architect import Architect
-from mephisto.data_model.task import TaskRun
+from mephisto.abstractions.architect import Architect
+from mephisto.data_model.task_run import TaskRun
from mephisto.data_model.test.utils import get_test_task_run
-from mephisto.data_model.database import MephistoDB
-from mephisto.data_model.blueprint import SharedTaskState
-from mephisto.server.blueprints.mock.mock_task_builder import MockTaskBuilder
-from mephisto.core.local_database import LocalMephistoDB
-from mephisto.core.hydra_config import MephistoConfig
+from mephisto.abstractions.database import MephistoDB
+from mephisto.abstractions.blueprint import SharedTaskState
+from mephisto.abstractions.blueprints.mock.mock_task_builder import MockTaskBuilder
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
+from mephisto.operations.hydra_config import MephistoConfig
from omegaconf import OmegaConf
EMPTY_STATE = SharedTaskState()
diff --git a/mephisto/data_model/test/blueprint_tester.py b/mephisto/data_model/test/blueprint_tester.py
index c3eb174ae..222806aa0 100644
--- a/mephisto/data_model/test/blueprint_tester.py
+++ b/mephisto/data_model/test/blueprint_tester.py
@@ -12,14 +12,19 @@
import shutil
import threading
import time
-from mephisto.data_model.blueprint import Blueprint, AgentState, TaskRunner, TaskBuilder
-from mephisto.core.local_database import LocalMephistoDB
+from mephisto.abstractions.blueprint import (
+ Blueprint,
+ AgentState,
+ TaskRunner,
+ TaskBuilder,
+)
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
from mephisto.data_model.assignment import Assignment
-from mephisto.data_model.task import TaskRun
+from mephisto.data_model.task_run import TaskRun
from mephisto.data_model.test.utils import get_test_task_run
-from mephisto.providers.mock.mock_agent import MockAgent
+from mephisto.abstractions.providers.mock.mock_agent import MockAgent
from mephisto.data_model.agent import Agent
-from mephisto.core.hydra_config import MephistoConfig
+from mephisto.operations.hydra_config import MephistoConfig
from omegaconf import OmegaConf
diff --git a/mephisto/data_model/test/crowd_provider_tester.py b/mephisto/data_model/test/crowd_provider_tester.py
index 0b53fa351..3d4a80d9a 100644
--- a/mephisto/data_model/test/crowd_provider_tester.py
+++ b/mephisto/data_model/test/crowd_provider_tester.py
@@ -13,14 +13,14 @@
import shutil
from mephisto.data_model.requester import Requester
from mephisto.data_model.worker import Worker
-from mephisto.data_model.database import (
+from mephisto.abstractions.database import (
MephistoDB,
MephistoDBException,
EntryAlreadyExistsException,
EntryDoesNotExistException,
)
-from mephisto.core.local_database import LocalMephistoDB
-from mephisto.data_model.crowd_provider import CrowdProvider
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
+from mephisto.abstractions.crowd_provider import CrowdProvider
class CrowdProviderTests(unittest.TestCase):
diff --git a/mephisto/data_model/test/data_model_database_tester.py b/mephisto/data_model/test/data_model_database_tester.py
index 7127f1dbe..34c955a71 100644
--- a/mephisto/data_model/test/data_model_database_tester.py
+++ b/mephisto/data_model/test/data_model_database_tester.py
@@ -17,19 +17,20 @@
get_test_unit,
get_test_agent,
)
-from mephisto.providers.mock.provider_type import PROVIDER_TYPE
+from mephisto.abstractions.providers.mock.provider_type import PROVIDER_TYPE
from mephisto.data_model.constants import NO_PROJECT_NAME
from mephisto.data_model.agent import Agent, OnboardingAgent
-from mephisto.data_model.blueprint import AgentState
+from mephisto.abstractions.blueprint import AgentState
from mephisto.data_model.assignment import Assignment, Unit
-from mephisto.data_model.assignment_state import AssignmentState
+from mephisto.data_model.constants.assignment_state import AssignmentState
from mephisto.data_model.project import Project
from mephisto.data_model.requester import Requester
-from mephisto.data_model.task import Task, TaskRun
+from mephisto.data_model.task import Task
+from mephisto.data_model.task_run import TaskRun
from mephisto.data_model.task_config import TaskConfig
from mephisto.data_model.qualification import Qualification
from mephisto.data_model.worker import Worker
-from mephisto.data_model.database import (
+from mephisto.abstractions.database import (
MephistoDB,
MephistoDBException,
EntryAlreadyExistsException,
diff --git a/mephisto/data_model/test/utils.py b/mephisto/data_model/test/utils.py
index 6487b3871..ab9ed30c0 100644
--- a/mephisto/data_model/test/utils.py
+++ b/mephisto/data_model/test/utils.py
@@ -6,7 +6,7 @@
from typing import Optional, Tuple
-from mephisto.data_model.database import (
+from mephisto.abstractions.database import (
MephistoDB,
MephistoDBException,
EntryAlreadyExistsException,
@@ -17,15 +17,16 @@
from mephisto.data_model.assignment import Unit, Assignment
from mephisto.data_model.task_config import TaskConfig
from mephisto.data_model.requester import Requester
-from mephisto.data_model.task import Task, TaskRun
+from mephisto.data_model.task import Task
+from mephisto.data_model.task_run import TaskRun
from omegaconf import OmegaConf
import json
-from mephisto.providers.mock.mock_provider import MockProviderArgs
-from mephisto.server.blueprints.mock.mock_blueprint import MockBlueprintArgs
-from mephisto.server.architects.mock_architect import MockArchitectArgs
+from mephisto.abstractions.providers.mock.mock_provider import MockProviderArgs
+from mephisto.abstractions.blueprints.mock.mock_blueprint import MockBlueprintArgs
+from mephisto.abstractions.architects.mock_architect import MockArchitectArgs
from mephisto.data_model.task_config import TaskConfigArgs
-from mephisto.core.hydra_config import MephistoConfig
+from mephisto.operations.hydra_config import MephistoConfig
MOCK_TASK_ARGS = TaskConfigArgs(
task_title="title",
diff --git a/mephisto/data_model/unit.py b/mephisto/data_model/unit.py
new file mode 100644
index 000000000..db27cb7a5
--- /dev/null
+++ b/mephisto/data_model/unit.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from abc import ABC
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.data_model.task import Task
+from mephisto.data_model.task_run import TaskRun
+from mephisto.data_model.agent import Agent
+from mephisto.abstractions.blueprint import AgentState
+from mephisto.data_model.requester import Requester
+from typing import Optional, Mapping, Dict, Any, Type, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.worker import Worker
+ from mephisto.abstractions.crowd_provider import CrowdProvider
+ from mephisto.data_model.assignment import Assignment
+
+import os
+
+
+class Unit(ABC):
+ """
+ This class tracks the status of an individual worker's contribution to a
+ higher level assignment. It is the smallest 'unit' of work to complete
+ the assignment, and this class is only responsible for checking
+ the status of that work itself being done.
+
+ It should be extended for usage with a specific crowd provider
+ """
+
+ def __init__(
+ self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ):
+ self.db: "MephistoDB" = db
+ if row is None:
+ row = db.get_unit(db_id)
+ assert row is not None, f"Given db_id {db_id} did not exist in given db"
+ self.db_id: str = row["unit_id"]
+ self.assignment_id = row["assignment_id"]
+ self.unit_index = row["unit_index"]
+ self.pay_amount = row["pay_amount"]
+ self.agent_id = row["agent_id"]
+ self.provider_type = row["provider_type"]
+ self.db_status = row["status"]
+ self.task_type = row["task_type"]
+ self.task_id = row["task_id"]
+ self.task_run_id = row["task_run_id"]
+ self.sandbox = row["sandbox"]
+ self.requester_id = row["requester_id"]
+ self.worker_id = row["worker_id"]
+
+ # Deferred loading of related entities
+ self.__task: Optional["Task"] = None
+ self.__task_run: Optional["TaskRun"] = None
+ self.__assignment: Optional["Assignment"] = None
+ self.__requester: Optional["Requester"] = None
+ self.__agent: Optional["Agent"] = None
+ self.__worker: Optional["Worker"] = None
+
+ def __new__(
+ cls, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
+ ) -> "Unit":
+ """
+ The new method is overridden to be able to automatically generate
+ the expected Unit class without needing to specifically find it
+ for a given db_id. As such it is impossible to create a Unit
+ as you will instead be returned the correct Unit class according to
+ the crowdprovider associated with this Unit.
+ """
+ if cls == Unit:
+ # We are trying to construct a Unit, find what type to use and
+ # create that instead
+ from mephisto.operations.registry import get_crowd_provider_from_type
+
+ if row is None:
+ row = db.get_unit(db_id)
+ assert row is not None, f"Given db_id {db_id} did not exist in given db"
+ correct_class = get_crowd_provider_from_type(row["provider_type"]).UnitClass
+ return super().__new__(correct_class)
+ else:
+ # We are constructing another instance directly
+ return super().__new__(cls)
+
+ def get_crowd_provider_class(self) -> Type["CrowdProvider"]:
+ """Get the CrowdProvider class that manages this Unit"""
+ from mephisto.operations.registry import get_crowd_provider_from_type
+
+ return get_crowd_provider_from_type(self.provider_type)
+
+ def get_assignment_data(self) -> Optional[Dict[str, Any]]:
+ """Return the specific assignment data for this assignment"""
+ return self.get_assignment().get_assignment_data()
+
+ def sync_status(self) -> None:
+ """
+ Ensure that the queried status from this unit and the db status
+ are up to date
+ """
+ # TODO(102) this will need to be run periodically/on crashes
+ # to sync any lost state
+ self.set_db_status(self.get_status())
+
+ def get_db_status(self) -> str:
+ """
+ Return the status as currently stored in the database
+ """
+ if self.db_status in AssignmentState.final_unit():
+ return self.db_status
+ row = self.db.get_unit(self.db_id)
+ assert row is not None, f"Unit {self.db_id} stopped existing in the db..."
+ return row["status"]
+
+ def set_db_status(self, status: str) -> None:
+ """
+ Set the status reflected in the database for this Unit
+ """
+ assert (
+ status in AssignmentState.valid_unit()
+ ), f"{status} not valid Assignment Status, not in {AssignmentState.valid_unit()}"
+ self.db_status = status
+ self.db.update_unit(self.db_id, status=status)
+
+ def get_assignment(self) -> "Assignment":
+ """
+ Return the assignment that this Unit is part of.
+ """
+ if self.__assignment is None:
+ from mephisto.data_model.assignment import Assignment
+
+ self.__assignment = Assignment(self.db, self.assignment_id)
+ return self.__assignment
+
+ def get_task_run(self) -> TaskRun:
+ """
+ Return the task run that this assignment is part of
+ """
+ if self.__task_run is None:
+ if self.__assignment is not None:
+ self.__task_run = self.__assignment.get_task_run()
+ else:
+ self.__task_run = TaskRun(self.db, self.task_run_id)
+ return self.__task_run
+
+ def get_task(self) -> Task:
+ """
+ Return the task that this assignment is part of
+ """
+ if self.__task is None:
+ if self.__assignment is not None:
+ self.__task = self.__assignment.get_task()
+ elif self.__task_run is not None:
+ self.__task = self.__task_run.get_task()
+ else:
+ self.__task = Task(self.db, self.task_id)
+ return self.__task
+
+ def get_requester(self) -> "Requester":
+ """
+ Return the requester who offered this Unit
+ """
+ if self.__requester is None:
+ if self.__assignment is not None:
+ self.__requester = self.__assignment.get_requester()
+ elif self.__task_run is not None:
+ self.__requester = self.__task_run.get_requester()
+ else:
+ self.__requester = Requester(self.db, self.requester_id)
+ return self.__requester
+
+ def clear_assigned_agent(self) -> None:
+ """Clear the agent that is assigned to this unit"""
+ self.db.clear_unit_agent_assignment(self.db_id)
+ self.agent_id = None
+ self.__agent = None
+
+ def get_assigned_agent(self) -> Optional[Agent]:
+ """
+ Get the agent assigned to this Unit if there is one, else return None
+ """
+ # In these statuses, we know the agent isn't changing anymore, and thus will
+ # not need to be re-queried
+ # TODO(#97) add test to ensure this behavior/assumption holds always
+ if self.db_status in AssignmentState.final_unit():
+ if self.agent_id is None:
+ return None
+ return Agent(self.db, self.agent_id)
+
+ # Query the database to get the most up-to-date assignment, as this can
+ # change after instantiation if the Unit status isn't final
+ # TODO(#101) this may not be particularly efficient
+ row = self.db.get_unit(self.db_id)
+ assert row is not None, f"Unit {self.db_id} stopped existing in the db..."
+ agent_id = row["agent_id"]
+ if agent_id is not None:
+ return Agent(self.db, agent_id)
+ return None
+
+ @staticmethod
+ def _register_unit(
+ db: "MephistoDB",
+ assignment: "Assignment",
+ index: int,
+ pay_amount: float,
+ provider_type: str,
+ ) -> "Unit":
+ """
+ Create an entry for this unit in the database
+ """
+ db_id = db.new_unit(
+ assignment.task_id,
+ assignment.task_run_id,
+ assignment.requester_id,
+ assignment.db_id,
+ index,
+ pay_amount,
+ provider_type,
+ assignment.task_type,
+ )
+ return Unit(db, db_id)
+
+ def get_pay_amount(self) -> float:
+ """
+ Return the amount that this Unit is costing against the budget,
+ calculating additional fees as relevant
+ """
+ return self.pay_amount
+
+ # Children classes may need to override the following
+
+ def get_status(self) -> str:
+ """
+ Get the status of this unit, as determined by whether there's
+ a worker working on it at the moment, and any other possible states. Should
+ return one of UNIT_STATUSES
+
+ Accurate status is crowd-provider dependent, and thus this method should be
+ defined in the child class to ensure that the local record matches
+ the ground truth in the provider
+ """
+ from mephisto.abstractions.blueprint import AgentState
+
+ db_status = self.db_status
+ computed_status = AssignmentState.LAUNCHED
+
+ agent = self.get_assigned_agent()
+ if agent is None:
+ row = self.db.get_unit(self.db_id)
+ computed_status = row["status"]
+ else:
+ agent_status = agent.get_status()
+ if agent_status == AgentState.STATUS_NONE:
+ computed_status = AssignmentState.LAUNCHED
+ elif agent_status in [
+ AgentState.STATUS_ACCEPTED,
+ AgentState.STATUS_ONBOARDING,
+ AgentState.STATUS_PARTNER_DISCONNECT,
+ AgentState.STATUS_WAITING,
+ AgentState.STATUS_IN_TASK,
+ ]:
+ computed_status = AssignmentState.ASSIGNED
+ elif agent_status in [AgentState.STATUS_COMPLETED]:
+ computed_status = AssignmentState.COMPLETED
+ elif agent_status in [AgentState.STATUS_SOFT_REJECTED]:
+ computed_status = AssignmentState.SOFT_REJECTED
+ elif agent_status in [AgentState.STATUS_EXPIRED]:
+ computed_status = AssignmentState.EXPIRED
+ elif agent_status in [
+ AgentState.STATUS_DISCONNECT,
+ AgentState.STATUS_RETURNED,
+ ]:
+ computed_status = AssignmentState.ASSIGNED
+ elif agent_status == AgentState.STATUS_APPROVED:
+ computed_status = AssignmentState.ACCEPTED
+ elif agent_status == AgentState.STATUS_REJECTED:
+ computed_status = AssignmentState.REJECTED
+
+ if computed_status != db_status:
+ self.set_db_status(computed_status)
+
+ return computed_status
+
+ # Children classes should implement the below methods
+
+ def launch(self, task_url: str) -> None:
+ """
+ Make this Unit available on the crowdsourcing vendor. Depending on
+ the task type, this could mean a number of different setup steps.
+
+ Some crowd providers require setting up a configuration for the
+ very first launch, and this method should call a helper to manage
+ that step if necessary.
+ """
+ raise NotImplementedError()
+
+ def expire(self) -> float:
+ """
+ Expire this unit, removing it from being workable on the vendor.
+ Return the maximum time needed to wait before we know it's taken down.
+ """
+ raise NotImplementedError()
+
+ def is_expired(self) -> bool:
+ """Determine if this unit is expired as according to the vendor."""
+ raise NotImplementedError()
+
+ @staticmethod
+ def new(
+ db: "MephistoDB", assignment: "Assignment", index: int, pay_amount: float
+ ) -> "Unit":
+ """
+ Create a Unit for the given assignment
+
+ Implementation should return the result of _register_unit when sure the unit
+ can be successfully created to have it put into the db.
+ """
+ raise NotImplementedError()
diff --git a/mephisto/data_model/worker.py b/mephisto/data_model/worker.py
index 948b2194f..ec2095603 100644
--- a/mephisto/data_model/worker.py
+++ b/mephisto/data_model/worker.py
@@ -6,19 +6,19 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
-from mephisto.data_model.blueprint import AgentState
+from mephisto.abstractions.blueprint import AgentState
from typing import Any, List, Optional, Mapping, Tuple, Dict, Type, Tuple, TYPE_CHECKING
-from mephisto.core.logger_core import get_logger
+from mephisto.operations.logger_core import get_logger
logger = get_logger(name=__name__, verbose=True, level="info")
if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
+ from mephisto.abstractions.database import MephistoDB
from mephisto.data_model.agent import Agent
from mephisto.data_model.assignment import Unit
from mephisto.data_model.requester import Requester
- from mephisto.data_model.task import TaskRun
+ from mephisto.data_model.task_run import TaskRun
from mephisto.data_model.qualification import GrantedQualification
from argparse import _ArgumentGroup as ArgumentGroup
@@ -64,7 +64,7 @@ def __new__(
as you will instead be returned the correct Worker class according to
the crowdprovider associated with this Worker.
"""
- from mephisto.core.registry import get_crowd_provider_from_type
+ from mephisto.operations.registry import get_crowd_provider_from_type
if cls == Worker:
# We are trying to construct a Worker, find what type to use and
diff --git a/mephisto/operations/config_handler.py b/mephisto/operations/config_handler.py
new file mode 100644
index 000000000..0dc0b0ec2
--- /dev/null
+++ b/mephisto/operations/config_handler.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import yaml
+from typing import Dict, Any
+
+CORE_SECTION = "core"
+DATA_STORAGE_KEY = "main_data_directory"
+
+DEFAULT_CONFIG_FOLDER = os.path.expanduser("~/.mephisto/")
+DEFAULT_CONFIG_FILE = os.path.join(DEFAULT_CONFIG_FOLDER, "config.yml")
+OLD_DATA_CONFIG_LOC = os.path.join(
+ os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "DATA_LOC"
+)
+
+
+def get_config() -> Dict[str, Any]:
+ """Get the data out of the YAML config file"""
+ with open(DEFAULT_CONFIG_FILE, "r") as config_file:
+ return yaml.safe_load(config_file.read().strip())
+
+
+def write_config(config_data: Dict[str, Any]):
+ """Write the given dictionary to the config yaml"""
+ with open(DEFAULT_CONFIG_FILE, "w") as config_file:
+ config_file.write(yaml.dump(config_data))
+
+
+def init_config() -> None:
+ if not os.path.exists(DEFAULT_CONFIG_FOLDER):
+ os.mkdir(DEFAULT_CONFIG_FOLDER)
+
+ if os.path.exists(OLD_DATA_CONFIG_LOC):
+ print(
+ f"We are migrating Mephisto's configuration to a YAML file stored at {DEFAULT_CONFIG_FILE}"
+ )
+ with open(OLD_DATA_CONFIG_LOC, "r") as data_dir_file:
+ loaded_data_dir = data_dir_file.read().strip()
+ with open(DEFAULT_CONFIG_FILE, "w") as config_file:
+ config_file.write(
+ yaml.dump({CORE_SECTION: {DATA_STORAGE_KEY: loaded_data_dir}})
+ )
+ print(f"Removing DATA_LOC configuration file from {OLD_DATA_CONFIG_LOC}")
+ os.unlink(OLD_DATA_CONFIG_LOC)
+ elif not os.path.exists(DEFAULT_CONFIG_FILE):
+ with open(DEFAULT_CONFIG_FILE, "w") as config_fp:
+ config_fp.write(yaml.dump({CORE_SECTION: {}}))
+
+
+def add_config_arg(section: str, key: str, value: Any) -> None:
+ """Add an argument to the YAML config, overwriting existing"""
+ config = get_config()
+ if section not in config:
+ config[section] = {}
+ config[section][key] = value
+ write_config(config)
+
+
+def get_config_arg(section: str, key: str) -> Any:
+ """Get an argument from the YAML config. Return None if it doesn't exist"""
+ config = get_config()
+ return config.get(section, {}).get(key, None)
diff --git a/mephisto/operations/hydra_config.py b/mephisto/operations/hydra_config.py
new file mode 100644
index 000000000..b420176cc
--- /dev/null
+++ b/mephisto/operations/hydra_config.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from hydra.core.config_store import ConfigStoreWithProvider
+from mephisto.abstractions.blueprint import BlueprintArgs
+from mephisto.abstractions.architect import ArchitectArgs
+from mephisto.abstractions.crowd_provider import ProviderArgs
+from mephisto.data_model.task_config import TaskConfigArgs
+from dataclasses import dataclass, field
+from omegaconf import MISSING
+from typing import List, Any
+
+config = ConfigStoreWithProvider("mephisto")
+
+
+@dataclass
+class MephistoConfig:
+ blueprint: BlueprintArgs = BlueprintArgs()
+ provider: ProviderArgs = ProviderArgs()
+ architect: ArchitectArgs = ArchitectArgs()
+ task: TaskConfigArgs = TaskConfigArgs()
+
+
+@dataclass
+class RunScriptConfig:
+ mephisto: MephistoConfig = MephistoConfig()
+
+
+def register_abstraction_config(name: str, node: Any, abstraction_type: str):
+ config.store(
+ name=name, node=node, group=f"mephisto/{abstraction_type}", package="_group_"
+ )
+
+
+def initialize_named_configs():
+ """
+ Functionality to register the core mephisto configuration structure. Must be done in __init__
+ """
+ config.store(
+ name="base_mephisto_config",
+ node=MephistoConfig,
+ group="mephisto",
+ package="_group_",
+ )
+
+
+def register_script_config(name: str, module: Any):
+ config.store(name=name, node=module)
diff --git a/mephisto/operations/logger_core.py b/mephisto/operations/logger_core.py
new file mode 100644
index 000000000..85a1844e1
--- /dev/null
+++ b/mephisto/operations/logger_core.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+loggers = {}
+
+
+def get_logger(
+ name: str, verbose: bool = True, log_file: str = None, level: str = "info"
+) -> logging.Logger:
+ """
+ Gets the logger corresponds to each module
+ Parameters:
+ name (string): the module name (__name__).
+ verbose (bool): INFO level activated if True.
+ log_file (string): path for saving logs locally.
+ level (string): logging level. Values options: [info, debug, warning, error, critical].
+
+ Returns:
+ logger (logging.Logger): the corresponding logger to the given module name.
+ """
+
+ global loggers
+ if loggers.get(name):
+ return loggers.get(name)
+ else:
+ logger = logging.getLogger(name)
+
+ level_dict = {
+ "info": logging.INFO,
+ "debug": logging.DEBUG,
+ "warning": logging.WARNING,
+ "error": logging.ERROR,
+ "critical": logging.CRITICAL,
+ }
+
+ logger.setLevel(logging.INFO if verbose else logging.DEBUG)
+ logger.setLevel(level_dict[level.lower()])
+ if log_file is None:
+ handler = logging.StreamHandler()
+ else:
+ handler = logging.RotatingFileHandler(log_file)
+ formatter = logging.Formatter(
+ "[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)5s - %(message)s",
+ "%m-%d %H:%M:%S",
+ )
+
+ handler.setFormatter(formatter)
+ logger.addHandler(handler)
+ loggers[name] = logger
+ return logger
diff --git a/mephisto/operations/operator.py b/mephisto/operations/operator.py
new file mode 100644
index 000000000..6558f2b5a
--- /dev/null
+++ b/mephisto/operations/operator.py
@@ -0,0 +1,418 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+import shutil
+import json
+import os
+import tempfile
+import time
+import threading
+import shlex
+import traceback
+
+from argparse import ArgumentParser
+
+from mephisto.operations.supervisor import Supervisor, Job
+
+from typing import Dict, Optional, List, Any, Tuple, NamedTuple, Type, TYPE_CHECKING
+from mephisto.data_model.task_config import TaskConfig
+from mephisto.data_model.task_run import TaskRun
+from mephisto.data_model.requester import Requester
+from mephisto.abstractions.blueprint import OnboardingRequired, SharedTaskState
+from mephisto.abstractions.database import MephistoDB, EntryDoesNotExistException
+from mephisto.data_model.qualification import make_qualification_dict, QUAL_NOT_EXIST
+from mephisto.operations.task_launcher import TaskLauncher
+from mephisto.operations.registry import (
+ get_blueprint_from_type,
+ get_crowd_provider_from_type,
+ get_architect_from_type,
+)
+from mephisto.operations.utils import get_mock_requester
+
+from mephisto.operations.logger_core import get_logger
+from omegaconf import DictConfig, OmegaConf
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+if TYPE_CHECKING:
+ from mephisto.data_model.agent import Agent
+ from mephisto.abstractions.blueprint import Blueprint, TaskRunner
+ from mephisto.abstractions.crowd_provider import CrowdProvider
+ from mephisto.abstractions.architect import Architect
+ from argparse import Namespace
+
+
+class TrackedRun(NamedTuple):
+ task_run: TaskRun
+ architect: "Architect"
+ task_runner: "TaskRunner"
+ task_launcher: TaskLauncher
+ job: Job
+
+
+class Operator:
+ """
+ Acting as the controller behind the curtain, the Operator class
+ is responsible for managing the knobs, switches, and dials
+ of the rest of the Mephisto architecture.
+
+ Most convenience scripts for using Mephisto will use an Operator
+ to get the job done, though this class itself is also a
+ good model to use to understand how the underlying
+ architecture works in order to build custom jobs or workflows.
+ """
+
+ def __init__(self, db: "MephistoDB"):
+ self.db = db
+ self.supervisor = Supervisor(db)
+ self._task_runs_tracked: Dict[str, TrackedRun] = {}
+ self.is_shutdown = False
+ self._run_tracker_thread = threading.Thread(
+ target=self._track_and_kill_runs, name="Operator-tracking-thread"
+ )
+ self._run_tracker_thread.start()
+
+ @staticmethod
+ def _get_baseline_argparser() -> ArgumentParser:
+ """Return a parser for the baseline requirements to launch a job"""
+ parser = ArgumentParser()
+ parser.add_argument(
+ "--blueprint-type",
+ dest="blueprint_type",
+ help="Name of the blueprint to launch",
+ required=True,
+ )
+ parser.add_argument(
+ "--architect-type",
+ dest="architect_type",
+ help="Name of the architect to launch with",
+ required=True,
+ )
+ parser.add_argument(
+ "--requester-name",
+ dest="requester_name",
+ help="Identifier for the requester to launch as",
+ required=True,
+ )
+ return parser
+
+ def get_running_task_runs(self):
+ """Return the currently running task runs and their handlers"""
+ return self._task_runs_tracked.copy()
+
+ def parse_and_launch_run(
+ self,
+ arg_list: Optional[List[str]] = None,
+ extra_args: Optional[Dict[str, Any]] = None,
+ ) -> Optional[str]:
+ """
+ Wrapper around parse and launch run that prints errors on failure, rather
+ than throwing. Generally for use in scripts.
+ """
+ raise Exception(
+ "Operator.parse_and_launch_run has been deprecated in favor "
+ "of using Hydra for argument configuration. See the docs at "
+ "https://github.com/facebookresearch/Mephisto/blob/master/docs/hydra_migration.md "
+ "in order to upgrade."
+ )
+
+ def validate_and_run_config_or_die(
+ self, run_config: DictConfig, shared_state: Optional[SharedTaskState] = None
+ ) -> str:
+ """
+ Parse the given arguments and launch a job.
+ """
+ if shared_state is None:
+ shared_state = SharedTaskState()
+
+ # First try to find the requester:
+ requester_name = run_config.provider.requester_name
+ requesters = self.db.find_requesters(requester_name=requester_name)
+ if len(requesters) == 0:
+ if run_config.provider.requester_name == "MOCK_REQUESTER":
+ requesters = [get_mock_requester(self.db)]
+ else:
+ raise EntryDoesNotExistException(
+ f"No requester found with name {requester_name}"
+ )
+ requester = requesters[0]
+ requester_id = requester.db_id
+ provider_type = requester.provider_type
+ assert provider_type == run_config.provider._provider_type, (
+ f"Found requester for name {requester_name} is not "
+ f"of the specified type {run_config.provider._provider_type}, "
+ f"but is instead {provider_type}."
+ )
+
+ # Next get the abstraction classes, and run validation
+ # before anything is actually created in the database
+ blueprint_type = run_config.blueprint._blueprint_type
+ architect_type = run_config.architect._architect_type
+ BlueprintClass = get_blueprint_from_type(blueprint_type)
+ ArchitectClass = get_architect_from_type(architect_type)
+ CrowdProviderClass = get_crowd_provider_from_type(provider_type)
+
+ BlueprintClass.assert_task_args(run_config, shared_state)
+ ArchitectClass.assert_task_args(run_config, shared_state)
+ CrowdProviderClass.assert_task_args(run_config, shared_state)
+
+ # Find an existing task or create a new one
+ task_name = run_config.task.get("task_name", None)
+ if task_name is None:
+ task_name = blueprint_type
+ logger.warning(
+ f"Task is using the default blueprint name {task_name} as a name, "
+ "as no task_name is provided"
+ )
+ tasks = self.db.find_tasks(task_name=task_name)
+ task_id = None
+ if len(tasks) == 0:
+ task_id = self.db.new_task(task_name, blueprint_type)
+ else:
+ task_id = tasks[0].db_id
+
+ logger.info(f"Creating a task run under task name: {task_name}")
+
+ # Create a new task run
+ new_run_id = self.db.new_task_run(
+ task_id,
+ requester_id,
+ json.dumps(OmegaConf.to_container(run_config, resolve=True)),
+ provider_type,
+ blueprint_type,
+ requester.is_sandbox(),
+ )
+ task_run = TaskRun(self.db, new_run_id)
+
+ try:
+ # Register the blueprint with args to the task run,
+ # ensure cached
+ blueprint = BlueprintClass(task_run, run_config, shared_state)
+ task_run.get_blueprint(args=run_config, shared_state=shared_state)
+
+ # If anything fails after here, we have to cleanup the architect
+ build_dir = os.path.join(task_run.get_run_dir(), "build")
+ os.makedirs(build_dir, exist_ok=True)
+ architect = ArchitectClass(
+ self.db, run_config, shared_state, task_run, build_dir
+ )
+
+ # Setup and deploy the server
+ built_dir = architect.prepare()
+ task_url = architect.deploy()
+
+ # TODO(#102) maybe the cleanup (destruction of the server configuration?) should only
+ # happen after everything has already been reviewed, this way it's possible to
+ # retrieve the exact build directory to review a task for real
+ architect.cleanup()
+
+ # Create the backend runner
+ task_runner = BlueprintClass.TaskRunnerClass(
+ task_run, run_config, shared_state
+ )
+
+ # Small hack for auto appending block qualification
+ existing_qualifications = shared_state.qualifications
+ if run_config.blueprint.get("block_qualification", None) is not None:
+ existing_qualifications.append(
+ make_qualification_dict(
+ run_config.blueprint.block_qualification, QUAL_NOT_EXIST, None
+ )
+ )
+ if run_config.blueprint.get("onboarding_qualification", None) is not None:
+ existing_qualifications.append(
+ make_qualification_dict(
+ OnboardingRequired.get_failed_qual(
+ run_config.blueprint.onboarding_qualification
+ ),
+ QUAL_NOT_EXIST,
+ None,
+ )
+ )
+ shared_state.qualifications = existing_qualifications
+
+ # Register the task with the provider
+ provider = CrowdProviderClass(self.db)
+ provider.setup_resources_for_task_run(
+ task_run, run_config, shared_state, task_url
+ )
+
+ initialization_data_array = blueprint.get_initialization_data()
+
+ # Link the job together
+ job = self.supervisor.register_job(
+ architect, task_runner, provider, existing_qualifications
+ )
+ if self.supervisor.sending_thread is None:
+ self.supervisor.launch_sending_thread()
+ except (KeyboardInterrupt, Exception) as e:
+ logger.error(
+ "Encountered error while launching run, shutting down", exc_info=True
+ )
+ try:
+ architect.shutdown()
+ except (KeyboardInterrupt, Exception) as architect_exception:
+ logger.exception(
+ f"Could not shut down architect: {architect_exception}",
+ exc_info=True,
+ )
+ raise e
+
+ launcher = TaskLauncher(self.db, task_run, initialization_data_array)
+ launcher.create_assignments()
+ launcher.launch_units(task_url)
+
+ self._task_runs_tracked[task_run.db_id] = TrackedRun(
+ task_run=task_run,
+ task_launcher=launcher,
+ task_runner=task_runner,
+ architect=architect,
+ job=job,
+ )
+ task_run.update_completion_progress(status=False)
+
+ return task_run.db_id
+
+ def _track_and_kill_runs(self):
+ """
+ Background thread that shuts down servers when a task
+ is fully done.
+ """
+ while not self.is_shutdown:
+ runs_to_check = list(self._task_runs_tracked.values())
+ for tracked_run in runs_to_check:
+ task_run = tracked_run.task_run
+ task_run.update_completion_progress(
+ task_launcher=tracked_run.task_launcher
+ )
+ if not task_run.get_is_completed():
+ continue
+ else:
+ self.supervisor.shutdown_job(tracked_run.job)
+ tracked_run.architect.shutdown()
+ tracked_run.task_launcher.shutdown()
+ del self._task_runs_tracked[task_run.db_id]
+ time.sleep(2)
+
+ def shutdown(self, skip_input=True):
+ logger.info("operator shutting down")
+ self.is_shutdown = True
+ for tracked_run in self._task_runs_tracked.values():
+ logger.info("expiring units")
+ tracked_run.task_launcher.shutdown()
+ tracked_run.task_launcher.expire_units()
+ try:
+ remaining_runs = self._task_runs_tracked.values()
+ while len(remaining_runs) > 0:
+ next_runs = []
+ for tracked_run in remaining_runs:
+ if tracked_run.task_run.get_is_completed():
+ tracked_run.architect.shutdown()
+ else:
+ next_runs.append(tracked_run)
+ if len(next_runs) > 0:
+ logger.info(
+ f"Waiting on {len(remaining_runs)} task runs, Ctrl-C ONCE to FORCE QUIT"
+ )
+ time.sleep(30)
+ remaining_runs = next_runs
+ except Exception as e:
+ logger.exception(
+ f"Encountered problem during shutting down {e}", exc_info=True
+ )
+ import traceback
+
+ traceback.print_exc()
+ except (KeyboardInterrupt, SystemExit) as e:
+ logger.info(
+ "Skipping waiting for outstanding task completions, shutting down servers now!"
+ )
+ for tracked_run in remaining_runs:
+ tracked_run.architect.shutdown()
+ finally:
+ self.supervisor.shutdown()
+ self._run_tracker_thread.join()
+
+ def validate_and_run_config(
+ self, run_config: DictConfig, shared_state: Optional[SharedTaskState] = None
+ ) -> Optional[str]:
+ """
+ Wrapper around validate_and_run_config_or_die that prints errors on
+ failure, rather than throwing. Generally for use in scripts.
+ """
+ try:
+ return self.validate_and_run_config_or_die(
+ run_config=run_config, shared_state=shared_state
+ )
+ except (KeyboardInterrupt, Exception) as e:
+ logger.error("Ran into error while launching run: ", exc_info=True)
+ return None
+
+ def parse_and_launch_run_wrapper(
+ self,
+ arg_list: Optional[List[str]] = None,
+ extra_args: Optional[Dict[str, Any]] = None,
+ ) -> Optional[str]:
+ """
+ Wrapper around parse and launch run that prints errors on failure, rather
+ than throwing. Generally for use in scripts.
+ """
+ raise Exception(
+ "Operator.parse_and_launch_run_wrapper has been deprecated in favor "
+ "of using Hydra for argument configuration. See the docs at "
+ "https://github.com/facebookresearch/Mephisto/blob/master/docs/hydra_migration.md "
+ "in order to upgrade."
+ )
+
+ def print_run_details(self):
+ """Print details about running tasks"""
+ # TODO(#93) parse these tasks and get the full details
+ for task in self.get_running_task_runs():
+ logger.info(f"Operator running task ID = {task}")
+
+ def wait_for_runs_then_shutdown(
+ self, skip_input=False, log_rate: Optional[int] = None
+ ) -> None:
+ """
+ Wait for task_runs to complete, and then shutdown.
+
+ Set log_rate to get print statements of currently running tasks
+ at the specified interval
+ """
+ try:
+ try:
+ last_log = 0.0
+ while len(self.get_running_task_runs()) > 0:
+ if log_rate is not None:
+ if time.time() - last_log > log_rate:
+ last_log = time.time()
+ self.print_run_details()
+ time.sleep(10)
+
+ except Exception as e:
+ if skip_input:
+ raise e
+
+ traceback.print_exc()
+ should_quit = input(
+ "The above exception happened while running a task, do "
+ "you want to shut down? (y)/n: "
+ )
+ if should_quit not in ["n", "N", "no", "No"]:
+ raise e
+
+ except Exception as e:
+ import traceback
+
+ traceback.print_exc()
+ except (KeyboardInterrupt, SystemExit) as e:
+ logger.exception(
+ "Cleaning up after keyboard interrupt, please wait!", exc_info=True
+ )
+ finally:
+ self.shutdown()
diff --git a/mephisto/operations/registry.py b/mephisto/operations/registry.py
new file mode 100644
index 000000000..d8ea8e515
--- /dev/null
+++ b/mephisto/operations/registry.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Union, Type, Dict, Any, List, TYPE_CHECKING
+from mephisto.operations.utils import get_root_dir, get_provider_dir
+from mephisto.operations.hydra_config import register_abstraction_config
+import importlib
+import os
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.blueprint import Blueprint
+ from mephisto.abstractions.crowd_provider import CrowdProvider
+ from mephisto.abstractions.architect import Architect
+
+
+BLUEPRINTS: Dict[str, Type["Blueprint"]] = {}
+ARCHITECTS: Dict[str, Type["Architect"]] = {}
+PROVIDERS: Dict[str, Type["CrowdProvider"]] = {}
+
+
+def register_mephisto_abstraction():
+ """
+ Decorator method for classes that extend a mephisto abstraction, used
+ to pull Mephisto abstractions out of anywhere that defines them.
+ """
+
+ def register_cls(
+ base_class: Union[Type["Blueprint"], Type["Architect"], Type["CrowdProvider"]]
+ ):
+ from mephisto.abstractions.blueprint import Blueprint
+ from mephisto.abstractions.crowd_provider import CrowdProvider
+ from mephisto.abstractions.architect import Architect
+
+ if issubclass(base_class, Blueprint):
+ name = base_class.BLUEPRINT_TYPE
+ BLUEPRINTS[name] = base_class
+ type_key = "blueprint"
+ elif issubclass(base_class, Architect):
+ name = base_class.ARCHITECT_TYPE
+ ARCHITECTS[name] = base_class
+ type_key = "architect"
+ elif issubclass(base_class, CrowdProvider):
+ name = base_class.PROVIDER_TYPE
+ PROVIDERS[name] = base_class
+ type_key = "provider"
+ else:
+ raise AssertionError(
+ f"Provided class {base_class} not a child of one of the mephisto "
+ "abstractions, expected one of Blueprint, Architect, or CrowdProvider."
+ )
+ register_abstraction_config(
+ name=name, node=base_class.ArgsClass, abstraction_type=type_key
+ )
+ return base_class
+
+ return register_cls
+
+
+def uses_mephisto(module: Any):
+ """
+ Register a module as having defined classes for special Mephisto abstractions.
+ Should be put in the __init__.py of the base module.
+ """
+ # TODO register the module and file path to the local mephisto registry file
+ pass
+
+
+def fill_registries():
+ """
+ Ensure that all of the required modules are picked up by the mephisto server
+ """
+ # TODO pick up on local file changes such that Mephisto won't need to be
+ # restarted to add new abstractions
+
+ # TODO pass through all of the use_mephisto modules in the local registry file
+ # to ensure that all of the modules are added
+
+ # TODO(WISH) these can be made recursive finds to pass through subfolders
+ # Import Mephisto CrowdProviders
+ provider_root = get_provider_dir()
+ for dir_name in os.listdir(provider_root):
+ provider_dir = os.path.join(provider_root, dir_name)
+ if not os.path.isdir(provider_dir):
+ continue
+ for filename in os.listdir(provider_dir):
+ if filename.endswith("provider.py"):
+ provider_name = filename[: filename.find(".py")]
+ importlib.import_module(
+ f"mephisto.abstractions.providers.{dir_name}.{provider_name}"
+ )
+
+ # Import Mephisto Architects
+ architect_root = os.path.join(
+ get_root_dir(), "mephisto", "abstractions", "architects"
+ )
+ for filename in os.listdir(architect_root):
+ if filename.endswith("architect.py"):
+ architect_name = filename[: filename.find(".py")]
+ importlib.import_module(
+ f"mephisto.abstractions.architects.{architect_name}"
+ )
+
+ # Import Mephisto Blueprints
+ blueprint_root = os.path.join(
+ get_root_dir(), "mephisto", "abstractions", "blueprints"
+ )
+ for dir_name in os.listdir(blueprint_root):
+ blueprint_dir = os.path.join(blueprint_root, dir_name)
+ if not os.path.isdir(blueprint_dir):
+ continue
+ for filename in os.listdir(blueprint_dir):
+ if filename.endswith("blueprint.py"):
+ blueprint_name = filename[: filename.find(".py")]
+ importlib.import_module(
+ f"mephisto.abstractions.blueprints.{dir_name}.{blueprint_name}"
+ )
+
+
+def get_crowd_provider_from_type(provider_type: str) -> Type["CrowdProvider"]:
+ """Return the crowd provider class for the given string"""
+ if provider_type in PROVIDERS:
+ return PROVIDERS[provider_type]
+ else:
+ raise NotImplementedError(
+ f"Missing provider type {provider_type}, is it registered?"
+ )
+
+
+def get_blueprint_from_type(task_type: str) -> Type["Blueprint"]:
+ """Return the blueprint class for the given string"""
+ if task_type in BLUEPRINTS:
+ return BLUEPRINTS[task_type]
+ else:
+ raise NotImplementedError(
+ f"Missing blueprint type {task_type}, is it registered?"
+ )
+
+
+def get_architect_from_type(architect_type: str) -> Type["Architect"]:
+ """Return the architect class for the given string"""
+ if architect_type in ARCHITECTS:
+ return ARCHITECTS[architect_type]
+ else:
+ raise NotImplementedError(
+ f"Missing architect type {architect_type}, is it registered?"
+ )
+
+
+def get_valid_provider_types() -> List[str]:
+ """
+ Return the valid provider types that are currently supported by
+ the mephisto framework
+ """
+ return list(PROVIDERS.keys())
+
+
+def get_valid_blueprint_types() -> List[str]:
+ """
+ Return the valid provider types that are currently supported by
+ the mephisto framework
+ """
+ return list(BLUEPRINTS.keys())
+
+
+def get_valid_architect_types() -> List[str]:
+ """
+ Return the valid provider types that are currently supported by
+ the mephisto framework
+ """
+ return list(ARCHITECTS.keys())
diff --git a/mephisto/operations/supervisor.py b/mephisto/operations/supervisor.py
new file mode 100644
index 000000000..9ac75a3f5
--- /dev/null
+++ b/mephisto/operations/supervisor.py
@@ -0,0 +1,855 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import threading
+from queue import PriorityQueue, Empty
+import time
+from mephisto.data_model.packet import (
+ Packet,
+ PACKET_TYPE_ALIVE,
+ PACKET_TYPE_AGENT_ACTION,
+ PACKET_TYPE_NEW_AGENT,
+ PACKET_TYPE_NEW_WORKER,
+ PACKET_TYPE_REQUEST_AGENT_STATUS,
+ PACKET_TYPE_RETURN_AGENT_STATUS,
+ PACKET_TYPE_INIT_DATA,
+ PACKET_TYPE_GET_INIT_DATA,
+ PACKET_TYPE_PROVIDER_DETAILS,
+ PACKET_TYPE_SUBMIT_ONBOARDING,
+ PACKET_TYPE_REQUEST_ACTION,
+ PACKET_TYPE_UPDATE_AGENT_STATUS,
+)
+from mephisto.data_model.worker import Worker
+from mephisto.data_model.qualification import worker_is_qualified
+from mephisto.data_model.agent import Agent, OnboardingAgent
+from mephisto.abstractions.blueprint import OnboardingRequired, AgentState
+from mephisto.operations.registry import get_crowd_provider_from_type
+from mephisto.abstractions.channel import Channel, STATUS_CHECK_TIME
+
+from dataclasses import dataclass
+
+from typing import Dict, Set, Optional, List, Any, Union, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.data_model.assignment import Assignment, Unit
+ from mephisto.abstractions.database import MephistoDB
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.blueprint import TaskRunner
+ from mephisto.abstractions.crowd_provider import CrowdProvider
+ from mephisto.abstractions.architect import Architect
+
+from mephisto.operations.logger_core import get_logger
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+# This class manages communications between the server
+# and workers, ensures that their status is properly tracked,
+# and also provides some helping utility functions for
+# groups of workers or worker/agent compatibility.
+
+# Mostly, the supervisor oversees the communications
+# between jobs and workers over the channels
+
+STATUS_TO_TEXT_MAP = {
+ AgentState.STATUS_EXPIRED: "This task is no longer available to be completed. "
+ "Please return it and try a different task",
+ AgentState.STATUS_TIMEOUT: "You took to long to respond to this task, and have timed out. "
+ "The task is no longer available, please return it.",
+ AgentState.STATUS_DISCONNECT: "You have disconnected from our server during the duration of the task. "
+ "If you have done substantial work, please reach out to see if we can recover it. ",
+ AgentState.STATUS_PARTNER_DISCONNECT: "One of your partners has disconnected while working on this task. We won't penalize "
+ "you for them leaving, so please submit this task as is.",
+}
+
+SYSTEM_CHANNEL_ID = "mephisto" # TODO pull from somewhere
+START_DEATH_TIME = 10
+
+# State storage
+@dataclass
+class Job:
+ architect: "Architect"
+ task_runner: "TaskRunner"
+ provider: "CrowdProvider"
+ qualifications: List[Dict[str, Any]]
+ registered_channel_ids: List[str]
+
+
+@dataclass
+class ChannelInfo:
+ channel_id: str
+ job: "Job"
+ channel: Channel
+
+
+@dataclass
+class AgentInfo:
+ agent: Union["Agent", "OnboardingAgent"]
+ used_channel_id: str
+ assignment_thread: Optional[threading.Thread] = None
+
+
+class Supervisor:
+ def __init__(self, db: "MephistoDB"):
+ self.db = db
+ # Tracked state
+ self.agents: Dict[str, AgentInfo] = {}
+ self.agents_by_registration_id: Dict[str, AgentInfo] = {}
+ self.channels: Dict[str, ChannelInfo] = {}
+ # Map from onboarding id to agent request packet
+ self.onboarding_packets: Dict[str, Packet] = {}
+
+ # Agent status handling
+ self.last_status_check = time.time()
+
+ # Message handling
+ self.message_queue: List[Packet] = []
+ self.sending_thread: Optional[threading.Thread] = None
+
+ def _on_channel_open(self, channel_id: str):
+ """Handler for what to do when a socket opens, we send an alive"""
+ channel_info = self.channels[channel_id]
+ self._send_alive(channel_info)
+
+ def _on_catastrophic_disconnect(self, channel_id):
+ # TODO(#102) Catastrophic disconnect needs to trigger cleanup
+ logger.error(f"Channel {channel_id} called on_catastrophic_disconnect")
+
+ def _on_channel_message(self, channel_id: str, packet: Packet):
+ """Incoming message handler defers to the internal handler"""
+ try:
+ channel_info = self.channels[channel_id]
+ self._on_message(packet, channel_info)
+ except Exception as e:
+ # TODO(#93) better error handling about failed messages
+ logger.exception(
+ f"Channel {channel_id} encountered error on packet {packet}",
+ exc_info=True,
+ )
+ raise
+
+ def register_job(
+ self,
+ architect: "Architect",
+ task_runner: "TaskRunner",
+ provider: "CrowdProvider",
+ qualifications: Optional[List[Dict[str, Any]]] = None,
+ ):
+ if qualifications is None:
+ qualifications = []
+ task_run = task_runner.task_run
+ channels = architect.get_channels(
+ self._on_channel_open,
+ self._on_catastrophic_disconnect,
+ self._on_channel_message,
+ )
+ job = Job(
+ architect=architect,
+ task_runner=task_runner,
+ provider=provider,
+ qualifications=qualifications,
+ registered_channel_ids=[],
+ )
+ for channel in channels:
+ channel_id = self.register_channel(channel, job)
+ job.registered_channel_ids.append(channel_id)
+ return job
+
+ def register_channel(self, channel: Channel, job: "Job") -> str:
+ """Register the channel to the specific job"""
+ channel_id = channel.channel_id
+
+ channel_info = ChannelInfo(channel_id=channel_id, channel=channel, job=job)
+ self.channels[channel_id] = channel_info
+
+ channel.open()
+ self._send_alive(channel_info)
+ start_time = time.time()
+ while not channel.is_alive():
+ if time.time() - start_time > START_DEATH_TIME:
+ # TODO(OWN) Ask channel why it might have failed to connect?
+ self.channels[channel_id].channel.close()
+ raise ConnectionRefusedError( # noqa F821 we only support py3
+ "Was not able to establish a connection with the server, "
+ "please try to run again. If that fails,"
+ "please ensure that your local device has the correct SSL "
+ "certs installed."
+ )
+ try:
+ self._send_alive(channel_info)
+ except Exception:
+ pass
+ time.sleep(0.3)
+ return channel_id
+
+ def close_channel(self, channel_id: str):
+ """Close the given channel by id"""
+ self.channels[channel_id].channel.close()
+ del self.channels[channel_id]
+
+ def shutdown_job(self, job: Job):
+ """Close any channels related to a job"""
+ job_channels = job.registered_channel_ids
+ for channel_id in job_channels:
+ self.close_channel(channel_id)
+
+ def shutdown(self):
+ """Close all of the channels, join threads"""
+ channels_to_close = list(self.channels.keys())
+ for channel_id in channels_to_close:
+ self.close_channel(channel_id)
+ if self.sending_thread is not None:
+ self.sending_thread.join()
+
+ def _send_alive(self, channel_info: ChannelInfo) -> bool:
+ logger.info("Sending alive")
+ return channel_info.channel.send(
+ Packet(
+ packet_type=PACKET_TYPE_ALIVE,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_info.channel_id,
+ )
+ )
+
+ def _on_act(self, packet: Packet, channel_info: ChannelInfo):
+ """Handle an action as sent from an agent"""
+ agent = self.agents[packet.sender_id].agent
+
+ # If the packet is_submit, and has files, we need to
+ # process downloading those files first
+ if packet.data.get("MEPHISTO_is_submit") is True:
+ data_files = packet.data.get("files")
+ if data_files is not None:
+ save_dir = agent.get_data_dir()
+ architect = channel_info.job.architect
+ for f_obj in data_files:
+ architect.download_file(f_obj["filename"], save_dir)
+
+ # TODO(OWN) Packets stored as info from workers can also be
+ # saved somewhere locally just in case the world dies, and
+ # then cleaned up once the world completes successfully
+ agent.pending_actions.append(packet)
+ agent.has_action.set()
+
+ def _on_submit_onboarding(self, packet: Packet, channel_info: ChannelInfo):
+ """Handle the submission of onboarding data"""
+ onboarding_id = packet.sender_id
+ if onboarding_id not in self.agents:
+ logger.warning(
+ f"Onboarding agent {onboarding_id} already submitted or disconnected, "
+ f"but is calling _on_submit_onboarding again"
+ )
+ return
+ agent_info = self.agents[onboarding_id]
+ agent = agent_info.agent
+ # Update the request id for the original packet (which has the required
+ # registration data) to be the new submission packet (so that we answer
+ # back properly under the new request)
+ self.onboarding_packets[onboarding_id].data["request_id"] = packet.data[
+ "request_id"
+ ]
+ del packet.data["request_id"]
+ assert isinstance(
+ agent, OnboardingAgent
+ ), "Only onboarding agents should submit onboarding"
+ agent.pending_actions.append(packet)
+ agent.has_action.set()
+ self._register_agent_from_onboarding(agent_info)
+ logger.info(f"Onboarding agent {onboarding_id} registered out from onboarding")
+ del self.agents[onboarding_id]
+ del self.onboarding_packets[onboarding_id]
+
+ def _register_worker(self, packet: Packet, channel_info: ChannelInfo):
+ """Process a worker registration packet to register a worker"""
+ crowd_data = packet.data["provider_data"]
+ crowd_provider = channel_info.job.provider
+ worker_name = crowd_data["worker_name"]
+ workers = self.db.find_workers(worker_name=worker_name)
+ if len(workers) == 0:
+ # TODO(WISH) get rid of sandbox designation
+ workers = self.db.find_workers(worker_name=worker_name + "_sandbox")
+ if len(workers) == 0:
+ worker = crowd_provider.WorkerClass.new_from_provider_data(
+ self.db, crowd_data
+ )
+ else:
+ worker = workers[0]
+
+ if not worker_is_qualified(worker, channel_info.job.qualifications):
+ self.message_queue.append(
+ Packet(
+ packet_type=PACKET_TYPE_PROVIDER_DETAILS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_info.channel_id,
+ data={"request_id": packet.data["request_id"], "worker_id": None},
+ )
+ )
+ else:
+ self.message_queue.append(
+ Packet(
+ packet_type=PACKET_TYPE_PROVIDER_DETAILS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_info.channel_id,
+ data={
+ "request_id": packet.data["request_id"],
+ "worker_id": worker.db_id,
+ },
+ )
+ )
+
+ def _launch_and_run_onboarding(
+ self, agent_info: "AgentInfo", task_runner: "TaskRunner"
+ ):
+ """Launch a thread to supervise the completion of onboarding for a task"""
+ tracked_agent = agent_info.agent
+ assert isinstance(tracked_agent, OnboardingAgent), (
+ "Can launch onboarding for OnboardingAgents, not Agents"
+ f", got {tracked_agent}"
+ )
+ try:
+ task_runner.launch_onboarding(tracked_agent)
+ except Exception as e:
+ import traceback
+
+ traceback.print_exc()
+ task_runner.cleanup_onboarding(tracked_agent)
+ finally:
+ if tracked_agent.get_status() not in [
+ AgentState.STATUS_WAITING,
+ AgentState.STATUS_APPROVED,
+ AgentState.STATUS_REJECTED,
+ ]:
+ onboarding_id = tracked_agent.get_agent_id()
+ logger.info(
+ f"Onboarding agent {onboarding_id} disconnected or errored, "
+ f"final status {tracked_agent.get_status()}."
+ )
+ del self.agents[onboarding_id]
+ del self.onboarding_packets[onboarding_id]
+
+ def _launch_and_run_assignment(
+ self,
+ assignment: "Assignment",
+ agent_infos: List["AgentInfo"],
+ task_runner: "TaskRunner",
+ ):
+ """Launch a thread to supervise the completion of an assignment"""
+ try:
+ tracked_agents: List["Agent"] = []
+ for a in agent_infos:
+ assert isinstance(
+ a.agent, Agent
+ ), f"Can launch assignments for Agents, not OnboardingAgents, got {a.agent}"
+ tracked_agents.append(a.agent)
+ task_runner.launch_assignment(assignment, tracked_agents)
+ for agent_info in agent_infos:
+ self._mark_agent_done(agent_info)
+ # Wait for agents to be complete
+ for agent_info in agent_infos:
+ agent = agent_info.agent
+ if agent.get_status() not in AgentState.complete():
+ if not agent.did_submit.is_set():
+ # Wait for a submit to occur
+ # TODO(#94) make submit timeout configurable
+ agent.has_action.wait(timeout=300)
+ agent.act()
+ agent.mark_done()
+ except Exception as e:
+ logger.exception(f"Cleaning up assignment: {e}", exc_info=True)
+ task_runner.cleanup_assignment(assignment)
+ finally:
+ task_run = task_runner.task_run
+ for unit in assignment.get_units():
+ task_run.clear_reservation(unit)
+
+ def _launch_and_run_unit(
+ self, unit: "Unit", agent_info: "AgentInfo", task_runner: "TaskRunner"
+ ):
+ """Launch a thread to supervise the completion of an assignment"""
+ try:
+ agent = agent_info.agent
+ assert isinstance(
+ agent, Agent
+ ), f"Can launch units for Agents, not OnboardingAgents, got {agent}"
+ task_runner.launch_unit(unit, agent)
+ if agent.get_status() not in AgentState.complete():
+ self._mark_agent_done(agent_info)
+ if not agent.did_submit.is_set():
+ # Wait for a submit to occur
+ # TODO(#94) make submit timeout configurable
+ agent.has_action.wait(timeout=300)
+ agent.act()
+ agent.mark_done()
+ except Exception as e:
+ logger.exception(f"Cleaning up unit: {e}", exc_info=True)
+ task_runner.cleanup_unit(unit)
+ finally:
+ task_runner.task_run.clear_reservation(unit)
+
+ def _assign_unit_to_agent(
+ self, packet: Packet, channel_info: ChannelInfo, units: List["Unit"]
+ ):
+ """Handle creating an agent for the specific worker to register an agent"""
+
+ crowd_data = packet.data["provider_data"]
+ task_run = channel_info.job.task_runner.task_run
+ crowd_provider = channel_info.job.provider
+ worker_id = crowd_data["worker_id"]
+ worker = Worker(self.db, worker_id)
+
+ logger.debug(
+ f"Worker {worker_id} is being assigned one of " f"{len(units)} units."
+ )
+
+ reserved_unit = None
+ while len(units) > 0 and reserved_unit is None:
+ unit = units.pop(0)
+ reserved_unit = task_run.reserve_unit(unit)
+ if reserved_unit is None:
+ self.message_queue.append(
+ Packet(
+ packet_type=PACKET_TYPE_PROVIDER_DETAILS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_info.channel_id,
+ data={"request_id": packet.data["request_id"], "agent_id": None},
+ )
+ )
+ else:
+ agent = crowd_provider.AgentClass.new_from_provider_data(
+ self.db, worker, unit, crowd_data
+ )
+ logger.debug(f"Created agent {agent}, {agent.db_id}.")
+ self.message_queue.append(
+ Packet(
+ packet_type=PACKET_TYPE_PROVIDER_DETAILS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_info.channel_id,
+ data={
+ "request_id": packet.data["request_id"],
+ "agent_id": agent.get_agent_id(),
+ },
+ )
+ )
+ agent_info = AgentInfo(agent=agent, used_channel_id=channel_info.channel_id)
+ self.agents[agent.get_agent_id()] = agent_info
+ self.agents_by_registration_id[
+ crowd_data["agent_registration_id"]
+ ] = agent_info
+
+ # Launch individual tasks
+ if not channel_info.job.task_runner.is_concurrent:
+ unit_thread = threading.Thread(
+ target=self._launch_and_run_unit,
+ args=(unit, agent_info, channel_info.job.task_runner),
+ name=f"Unit-thread-{unit.db_id}",
+ )
+ agent_info.assignment_thread = unit_thread
+ unit_thread.start()
+ else:
+ # See if the concurrent unit is ready to launch
+ assignment = unit.get_assignment()
+ agents = assignment.get_agents()
+ if None in agents:
+ agent.update_status(AgentState.STATUS_WAITING)
+ return # need to wait for all agents to be here to launch
+
+ # Launch the backend for this assignment
+ agent_infos = [self.agents[a.db_id] for a in agents if a is not None]
+
+ assign_thread = threading.Thread(
+ target=self._launch_and_run_assignment,
+ args=(assignment, agent_infos, channel_info.job.task_runner),
+ name=f"Assignment-thread-{assignment.db_id}",
+ )
+
+ for agent_info in agent_infos:
+ agent_info.agent.update_status(AgentState.STATUS_IN_TASK)
+ agent_info.assignment_thread = assign_thread
+
+ assign_thread.start()
+
+ def _register_agent_from_onboarding(self, onboarding_agent_info: AgentInfo):
+ """
+ Convert the onboarding agent to a full agent
+ """
+ onboarding_agent = onboarding_agent_info.agent
+ current_status = onboarding_agent.get_status()
+ channel_id = onboarding_agent_info.used_channel_id
+ channel_info = self.channels[channel_id]
+ task_runner = channel_info.job.task_runner
+ task_run = task_runner.task_run
+ blueprint = task_run.get_blueprint(args=task_runner.args)
+ worker = onboarding_agent.get_worker()
+
+ assert (
+ isinstance(blueprint, OnboardingRequired) and blueprint.use_onboarding
+ ), "Should only be registering from onboarding if onboarding is required and set"
+ worker_passed = blueprint.validate_onboarding(worker, onboarding_agent)
+ worker.grant_qualification(
+ blueprint.onboarding_qualification_name, int(worker_passed), skip_crowd=True
+ )
+ if not worker_passed:
+ worker.grant_qualification(
+ blueprint.onboarding_failed_name, int(worker_passed)
+ )
+ onboarding_agent.update_status(AgentState.STATUS_REJECTED)
+ else:
+ onboarding_agent.update_status(AgentState.STATUS_APPROVED)
+
+ # get the list of tentatively valid units
+ units = task_run.get_valid_units_for_worker(worker)
+ usable_units = channel_info.job.task_runner.filter_units_for_worker(
+ units, worker
+ )
+
+ if not worker_passed:
+ # TODO(WISH) it may be worth investigating launching a dummy task for these
+ # instances where a worker has failed onboarding, but the onboarding
+ # task still allowed submission of the failed data (no front-end validation)
+ # units = [self.dummy_launcher.launch_dummy()]
+ # self._assign_unit_to_agent(packet, channel_info, units)
+ usable_units = []
+
+ packet = self.onboarding_packets[onboarding_agent.get_agent_id()]
+ self._try_send_agent_messages(onboarding_agent_info)
+ self._send_status_update(onboarding_agent_info)
+ self._assign_unit_to_agent(packet, channel_info, usable_units)
+
+ def _register_agent(self, packet: Packet, channel_info: ChannelInfo):
+ """Process an agent registration packet to register an agent"""
+ # First see if this is a reconnection
+ crowd_data = packet.data["provider_data"]
+ agent_registration_id = crowd_data["agent_registration_id"]
+ logger.debug(f"Incoming request to register agent {agent_registration_id}.")
+ if agent_registration_id in self.agents_by_registration_id:
+ agent = self.agents_by_registration_id[agent_registration_id].agent
+ # Update the source channel, in case it has changed
+ self.agents[agent.get_agent_id()].used_channel_id = channel_info.channel_id
+ self.message_queue.append(
+ Packet(
+ packet_type=PACKET_TYPE_PROVIDER_DETAILS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_info.channel_id,
+ data={
+ "request_id": packet.data["request_id"],
+ "agent_id": agent.get_agent_id(),
+ },
+ )
+ )
+ logger.debug(
+ f"Found existing agent_registration_id {agent_registration_id}, "
+ f"reconnecting to agent {agent.get_agent_id()}."
+ )
+ return
+
+ # Process a new agent
+ task_runner = channel_info.job.task_runner
+ task_run = task_runner.task_run
+ worker_id = crowd_data["worker_id"]
+ worker = Worker(self.db, worker_id)
+
+ # get the list of tentatively valid units
+ units = task_run.get_valid_units_for_worker(worker)
+ if len(units) == 0:
+ self.message_queue.append(
+ Packet(
+ packet_type=PACKET_TYPE_PROVIDER_DETAILS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_info.channel_id,
+ data={"request_id": packet.data["request_id"], "agent_id": None},
+ )
+ )
+ logger.debug(
+ f"Found existing agent_registration_id {agent_registration_id}, "
+ f"had no valid units."
+ )
+ return
+
+ # If there's onboarding, see if this worker has already been disqualified
+ worker_id = crowd_data["worker_id"]
+ worker = Worker(self.db, worker_id)
+ blueprint = task_run.get_blueprint(args=task_runner.args)
+ if isinstance(blueprint, OnboardingRequired) and blueprint.use_onboarding:
+ if worker.is_disqualified(blueprint.onboarding_qualification_name):
+ self.message_queue.append(
+ Packet(
+ packet_type=PACKET_TYPE_PROVIDER_DETAILS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_info.channel_id,
+ data={
+ "request_id": packet.data["request_id"],
+ "agent_id": None,
+ },
+ )
+ )
+ logger.debug(
+ f"Worker {worker_id} is already disqualified by onboarding "
+ f"qual {blueprint.onboarding_qualification_name}."
+ )
+ return
+ elif not worker.is_qualified(blueprint.onboarding_qualification_name):
+ # Send a packet with onboarding information
+ onboard_data = blueprint.get_onboarding_data(worker.db_id)
+ onboard_agent = OnboardingAgent.new(self.db, worker, task_run)
+ onboard_agent.state.set_init_state(onboard_data)
+ agent_info = AgentInfo(
+ agent=onboard_agent, used_channel_id=channel_info.channel_id
+ )
+ onboard_id = onboard_agent.get_agent_id()
+ # register onboarding agent
+ self.agents[onboard_id] = agent_info
+ self.onboarding_packets[onboard_id] = packet
+ self.message_queue.append(
+ Packet(
+ packet_type=PACKET_TYPE_PROVIDER_DETAILS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_info.channel_id,
+ data={
+ "request_id": packet.data["request_id"],
+ "agent_id": onboard_id,
+ "onboard_data": onboard_data,
+ },
+ )
+ )
+
+ logger.debug(
+ f"Worker {worker_id} is starting onboarding thread with "
+ f"onboarding agent id {onboard_id}."
+ )
+
+ # Create an onboarding thread
+ onboard_thread = threading.Thread(
+ target=self._launch_and_run_onboarding,
+ args=(agent_info, channel_info.job.task_runner),
+ name=f"Onboard-thread-{onboard_id}",
+ )
+
+ onboard_agent.update_status(AgentState.STATUS_ONBOARDING)
+ agent_info.assignment_thread = onboard_thread
+ onboard_thread.start()
+ return
+
+ # Not onboarding, so just register directly
+ self._assign_unit_to_agent(packet, channel_info, units)
+
+ def _get_init_data(self, packet, channel_info: ChannelInfo):
+ """Get the initialization data for the assigned agent's task"""
+ task_runner = channel_info.job.task_runner
+ agent_id = packet.data["provider_data"]["agent_id"]
+ agent_info = self.agents[agent_id]
+ assert isinstance(
+ agent_info.agent, Agent
+ ), f"Can only get init unit data for Agents, not OnboardingAgents, got {agent_info}"
+ unit_data = task_runner.get_init_data_for_agent(agent_info.agent)
+
+ agent_data_packet = Packet(
+ packet_type=PACKET_TYPE_INIT_DATA,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_info.channel_id,
+ data={"request_id": packet.data["request_id"], "init_data": unit_data},
+ )
+
+ self.message_queue.append(agent_data_packet)
+
+ if isinstance(unit_data, dict) and unit_data.get("raw_messages") is not None:
+ # TODO bring these into constants somehow
+ for message in unit_data["raw_messages"]:
+ packet = Packet.from_dict(message)
+ packet.receiver_id = agent_id
+ agent_info.agent.pending_observations.append(packet)
+
+ def _on_message(self, packet: Packet, channel_info: ChannelInfo):
+ """Handle incoming messages from the channel"""
+ # TODO(#102) this method currently assumes that the packet's sender_id will
+ # always be a valid agent in our list of agent_infos. At the moment this
+ # is a valid assumption, but will not be on recovery from catastrophic failure.
+ if packet.type == PACKET_TYPE_AGENT_ACTION:
+ self._on_act(packet, channel_info)
+ elif packet.type == PACKET_TYPE_NEW_AGENT:
+ self._register_agent(packet, channel_info)
+ elif packet.type == PACKET_TYPE_SUBMIT_ONBOARDING:
+ self._on_submit_onboarding(packet, channel_info)
+ elif packet.type == PACKET_TYPE_NEW_WORKER:
+ self._register_worker(packet, channel_info)
+ elif packet.type == PACKET_TYPE_GET_INIT_DATA:
+ self._get_init_data(packet, channel_info)
+ elif packet.type == PACKET_TYPE_RETURN_AGENT_STATUS:
+ # Record this status response
+ self._handle_updated_agent_status(packet.data)
+ else:
+ # PACKET_TYPE_REQUEST_AGENT_STATUS, PACKET_TYPE_ALIVE,
+ # PACKET_TYPE_INIT_DATA
+ raise Exception(f"Unexpected packet type {packet.type}")
+
+ # TODO(#103) maybe batching these is better?
+ def _try_send_agent_messages(self, agent_info: AgentInfo):
+ """Handle sending any possible messages for a specific agent"""
+ channel_info = self.channels[agent_info.used_channel_id]
+ agent = agent_info.agent
+ while len(agent.pending_observations) > 0:
+ curr_obs = agent.pending_observations.pop(0)
+ did_send = channel_info.channel.send(curr_obs)
+ if not did_send:
+ logger.error(f"Failed to send packet {curr_obs} to {channel_info}")
+ agent.pending_observations.insert(0, curr_obs)
+ return # something up with the channel, try later
+
+ def _send_message_queue(self) -> None:
+ """Send all of the messages in the system queue"""
+ while len(self.message_queue) > 0:
+ curr_obs = self.message_queue.pop(0)
+ channel = self.channels[curr_obs.receiver_id].channel
+ did_send = channel.send(curr_obs)
+ if not did_send:
+ logger.error(
+ f"Failed to send packet {curr_obs} to server {curr_obs.receiver_id}"
+ )
+ self.message_queue.insert(0, curr_obs)
+ return # something up with the channel, try later
+
+ def _send_status_update(self, agent_info: AgentInfo) -> None:
+ """
+ Handle telling the frontend agent about a change in their
+ active status. (Pushing a change in AgentState)
+ """
+ send_packet = Packet(
+ packet_type=PACKET_TYPE_UPDATE_AGENT_STATUS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=agent_info.agent.get_agent_id(),
+ data={
+ "status": agent_info.agent.db_status,
+ "state": {
+ "done_text": STATUS_TO_TEXT_MAP.get(agent_info.agent.db_status),
+ "task_done": agent_info.agent.db_status
+ == AgentState.STATUS_PARTNER_DISCONNECT,
+ },
+ },
+ )
+ channel_info = self.channels[agent_info.used_channel_id]
+ channel_info.channel.send(send_packet)
+
+ def _mark_agent_done(self, agent_info: AgentInfo) -> None:
+ """
+ Handle marking an agent as done, and telling the frontend agent
+ that they have successfully completed their task.
+
+ If the agent is in a final non-successful status, or already
+ told of partner disconnect, skip
+ """
+ if agent_info.agent.db_status in AgentState.complete() + [
+ AgentState.STATUS_PARTNER_DISCONNECT
+ ]:
+ return
+ send_packet = Packet(
+ packet_type=PACKET_TYPE_UPDATE_AGENT_STATUS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=agent_info.agent.get_agent_id(),
+ data={
+ "status": "completed",
+ "state": {
+ "done_text": "You have completed this task. Please submit.",
+ "task_done": True,
+ },
+ },
+ )
+ channel_info = self.channels[agent_info.used_channel_id]
+ channel_info.channel.send(send_packet)
+
+ def _handle_updated_agent_status(self, status_map: Dict[str, str]):
+ """
+ Handle updating the local statuses for agents based on
+ the previously reported agent statuses.
+
+ Takes as input a mapping from agent_id to server-side status
+ """
+ for agent_id, status in status_map.items():
+ if status not in AgentState.valid():
+ logger.warning(f"Invalid status for agent {agent_id}: {status}")
+ continue
+ if agent_id not in self.agents:
+ # no longer tracking agent
+ continue
+ agent = self.agents[agent_id].agent
+ db_status = agent.get_status()
+ if agent.has_updated_status.is_set():
+ continue # Incoming info may be stale if we have new info to send
+ if status == AgentState.STATUS_NONE:
+ # Stale or reconnect, send a status update
+ self._send_status_update(self.agents[agent_id])
+ continue
+ if status != db_status:
+ if db_status in AgentState.complete():
+ logger.info(
+ f"Got updated status {status} when already final: {agent.db_status}"
+ )
+ continue
+ elif status == AgentState.STATUS_COMPLETED:
+ continue # COMPLETED can only be marked locally
+ agent.update_status(status)
+ pass
+
+ def _request_action(self, agent_info: AgentInfo) -> None:
+ """
+ Request an act from the agent targetted here. If the
+ agent is found by the server, this request will be
+ forwarded.
+ """
+ send_packet = Packet(
+ packet_type=PACKET_TYPE_REQUEST_ACTION,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=agent_info.agent.get_agent_id(),
+ data={},
+ )
+ channel_info = self.channels[agent_info.used_channel_id]
+ channel_info.channel.send(send_packet)
+
+ def _request_status_update(self) -> None:
+ """
+ Check last round of statuses, then request
+ an update from the server on all agent's current status
+ """
+ if time.time() - self.last_status_check < STATUS_CHECK_TIME:
+ return
+
+ self.last_status_check = time.time()
+
+ for channel_id, channel_info in self.channels.items():
+ send_packet = Packet(
+ packet_type=PACKET_TYPE_REQUEST_AGENT_STATUS,
+ sender_id=SYSTEM_CHANNEL_ID,
+ receiver_id=channel_id,
+ data={},
+ )
+ channel_info.channel.send(send_packet)
+
+ def _channel_handling_thread(self) -> None:
+ """Thread for handling outgoing messages through the channels"""
+ while len(self.channels) > 0:
+ current_agents = list(self.agents.values())
+ for agent_info in current_agents:
+ # Send requests for action
+ if agent_info.agent.wants_action.is_set():
+ self._request_action(agent_info)
+ agent_info.agent.wants_action.clear()
+ # Pass updated statuses
+ if agent_info.agent.has_updated_status.is_set():
+ self._send_status_update(agent_info)
+ agent_info.agent.has_updated_status.clear()
+ # clear the message queue for this agent
+ self._try_send_agent_messages(agent_info)
+ # Send all messages from the system
+ self._send_message_queue()
+ self._request_status_update()
+ # TODO(#103) is there a way we can trigger this when
+ # agents observe instead?
+ time.sleep(0.1)
+
+ def launch_sending_thread(self) -> None:
+ """Launch the sending thread for this supervisor"""
+ self.sending_thread = threading.Thread(
+ target=self._channel_handling_thread, name=f"channel-sending-thread"
+ )
+ self.sending_thread.start()
diff --git a/mephisto/operations/task_launcher.py b/mephisto/operations/task_launcher.py
new file mode 100644
index 000000000..de5b4c666
--- /dev/null
+++ b/mephisto/operations/task_launcher.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# TODO(#99) do we standardize some kinds of data loader formats? perhaps
+# one that loads from files, and then an arbitrary kind? Simple
+# interface could be like an iterator. This class will launch tasks
+# as if the loader is an iterator.
+
+from mephisto.data_model.assignment import (
+ Assignment,
+ Unit,
+ InitializationData,
+ AssignmentState,
+)
+
+from typing import Dict, Optional, List, Any, TYPE_CHECKING, Iterator
+from tqdm import tqdm
+import os
+import time
+import enum
+
+if TYPE_CHECKING:
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.abstractions.database import MephistoDB
+
+import threading
+from mephisto.operations.logger_core import get_logger
+import types
+
+logger = get_logger(name=__name__, verbose=True, level="info")
+
+UNIT_GENERATOR_WAIT_SECONDS = 10
+ASSIGNMENT_GENERATOR_WAIT_SECONDS = 0.5
+
+
+class GeneratorType(enum.Enum):
+ NONE = 0
+ UNIT = 1
+ ASSIGNMENT = 2
+
+
+class TaskLauncher:
+ """
+ This class is responsible for managing the process of registering
+ and launching units, including the steps for pre-processing
+ data and storing them locally for assignments when appropriate.
+ """
+
+ def __init__(
+ self,
+ db: "MephistoDB",
+ task_run: "TaskRun",
+ assignment_data_iterator: Iterator[InitializationData],
+ max_num_concurrent_units: int = 0,
+ ):
+ """Prepare the task launcher to get it ready to launch the assignments"""
+ self.db = db
+ self.task_run = task_run
+ self.assignment_data_iterable = assignment_data_iterator
+ self.assignments: List[Assignment] = []
+ self.units: List[Unit] = []
+ self.provider_type = task_run.get_provider().PROVIDER_TYPE
+ self.max_num_concurrent_units = max_num_concurrent_units
+ self.launched_units: Dict[str, Unit] = {}
+ self.unlaunched_units: Dict[str, Unit] = {}
+ self.keep_launching_units: bool = False
+ self.finished_generators: bool = False
+ self.assignment_thread_done: bool = True
+
+ self.unlaunched_units_access_condition = threading.Condition()
+ if isinstance(self.assignment_data_iterable, types.GeneratorType):
+ self.generator_type = GeneratorType.ASSIGNMENT
+ self.assignment_thread_done = False
+ else:
+ self.generator_type = GeneratorType.NONE
+ run_dir = task_run.get_run_dir()
+ os.makedirs(run_dir, exist_ok=True)
+
+ logger.debug(f"type of assignment data: {type(self.assignment_data_iterable)}")
+ self.units_thread = None
+ self.assignments_thread = None
+
+ def _create_single_assignment(self, assignment_data) -> None:
+ """ Create a single assignment in the database using its read assignment_data """
+ task_run = self.task_run
+ task_config = task_run.get_task_config()
+ assignment_id = self.db.new_assignment(
+ task_run.task_id,
+ task_run.db_id,
+ task_run.requester_id,
+ task_run.task_type,
+ task_run.provider_type,
+ task_run.sandbox,
+ )
+ assignment = Assignment(self.db, assignment_id)
+ assignment.write_assignment_data(assignment_data)
+ self.assignments.append(assignment)
+ unit_count = len(assignment_data.unit_data)
+ for unit_idx in range(unit_count):
+ unit_id = self.db.new_unit(
+ task_run.task_id,
+ task_run.db_id,
+ task_run.requester_id,
+ assignment_id,
+ unit_idx,
+ task_config.task_reward,
+ task_run.provider_type,
+ task_run.task_type,
+ task_run.sandbox,
+ )
+ self.units.append(Unit(self.db, unit_id))
+ with self.unlaunched_units_access_condition:
+ self.unlaunched_units[unit_id] = Unit(self.db, unit_id)
+
+ def _try_generating_assignments(self) -> None:
+ """ Try to generate more assignments from the assignments_data_iterator"""
+ while not self.finished_generators:
+ try:
+ data = next(self.assignment_data_iterable)
+ self._create_single_assignment(data)
+ except StopIteration:
+ self.finished_generators = True
+ self.assignment_thread_done = True
+ time.sleep(ASSIGNMENT_GENERATOR_WAIT_SECONDS)
+
+ def create_assignments(self) -> None:
+ """ Create an assignment and associated units for the generated assignment data """
+ self.keep_launching_units = True
+ if self.generator_type == GeneratorType.NONE:
+ for data in self.assignment_data_iterable:
+ self._create_single_assignment(data)
+ else:
+ self.assignments_thread = threading.Thread(
+ target=self._try_generating_assignments, args=()
+ )
+ self.assignments_thread.start()
+
+ def generate_units(self):
+ """units generator which checks that only 'max_num_concurrent_units' running at the same time,
+ i.e. in the LAUNCHED or ASSIGNED states"""
+ while self.keep_launching_units:
+ units_id_to_remove = []
+ for db_id, unit in self.launched_units.items():
+ status = unit.get_status()
+ if (
+ status != AssignmentState.LAUNCHED
+ and status != AssignmentState.ASSIGNED
+ ):
+ units_id_to_remove.append(db_id)
+ for db_id in units_id_to_remove:
+ self.launched_units.pop(db_id)
+
+ num_avail_units = self.max_num_concurrent_units - len(self.launched_units)
+ num_avail_units = (
+ len(self.unlaunched_units)
+ if self.max_num_concurrent_units == 0
+ else num_avail_units
+ )
+
+ units_id_to_remove = []
+ for i, item in enumerate(self.unlaunched_units.items()):
+ db_id, unit = item
+ if i < num_avail_units:
+ self.launched_units[unit.db_id] = unit
+ units_id_to_remove.append(db_id)
+ yield unit
+ else:
+ break
+ with self.unlaunched_units_access_condition:
+ for db_id in units_id_to_remove:
+ self.unlaunched_units.pop(db_id)
+
+ time.sleep(UNIT_GENERATOR_WAIT_SECONDS)
+ if not self.unlaunched_units:
+ break
+
+ def _launch_limited_units(self, url: str) -> None:
+ """ use units' generator to launch limited number of units according to (max_num_concurrent_units)"""
+ while not self.finished_generators:
+ for unit in self.generate_units():
+ unit.launch(url)
+ if self.generator_type == GeneratorType.NONE:
+ break
+
+ def launch_units(self, url: str) -> None:
+ """launch any units registered by this TaskLauncher"""
+ self.units_thread = threading.Thread(
+ target=self._launch_limited_units, args=(url,)
+ )
+ self.units_thread.start()
+
+ def get_assignments_are_all_created(self) -> bool:
+ return self.assignment_thread_done
+
+ def expire_units(self) -> None:
+ """Clean up all units on this TaskLauncher"""
+ self.keep_launching_units = False
+ self.finished_generators = True
+ for unit in tqdm(self.units):
+ try:
+ unit.expire()
+ except Exception as e:
+ logger.exception(
+ f"Warning: failed to expire unit {unit.db_id}. Stated error: {e}",
+ exc_info=True,
+ )
+
+ def shutdown(self) -> None:
+ """Clean up running threads for generating assignments and units"""
+ if self.assignments_thread is not None:
+ self.assignments_thread.join()
+ self.units_thread.join()
diff --git a/mephisto/operations/utils.py b/mephisto/operations/utils.py
new file mode 100644
index 000000000..4e0e40fba
--- /dev/null
+++ b/mephisto/operations/utils.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import sys, glob, importlib
+
+import shlex
+from distutils.dir_util import copy_tree
+import functools
+from mephisto.data_model.constants import NO_PROJECT_NAME
+from mephisto.operations.config_handler import (
+ add_config_arg,
+ get_config_arg,
+ CORE_SECTION,
+ DATA_STORAGE_KEY,
+ DEFAULT_CONFIG_FILE,
+)
+
+from typing import Optional, Dict, Any, List, Type, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.crowd_provider import CrowdProvider
+ from mephisto.data_model.task_runner import TaskRunner
+ from mephisto.abstractions.architect import Architect
+ from mephisto.data_model.task_run import TaskRun
+ from mephisto.data_model.requester import Requester
+
+
+loaded_data_dir = None
+
+
+def ensure_user_confirm(display_text, skip_input=False) -> None:
+ """
+ Helper to provide the flow for having a user confirm a specific occurrence
+ before it happens. skip_input will make this method return without
+ checking, which is useful for automated scripts
+ """
+ if skip_input:
+ return
+ res = input(f'{display_text}\nEnter "n" to exit and anything else to continue:')
+ if res == "n":
+ raise SystemExit(0)
+ return
+
+
+def get_root_dir() -> str:
+ """Return the currently configured root mephisto directory"""
+ # This file is at ROOT/mephisto/core/utils.py
+ return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+def get_mock_requester(db) -> "Requester":
+ """Get or create a mock requester to use for test tasks"""
+ # TODO(#98) Need to split utils into those operating for the data model
+ # and those operating on the data model, and those operating beyond
+ mock_requesters = db.find_requesters(provider_type="mock")
+ if len(mock_requesters) == 0:
+ db.new_requester("MOCK_REQUESTER", "mock")
+ mock_requesters = db.find_requesters(provider_type="mock")
+ return mock_requesters[0]
+
+
+def get_provider_dir() -> str:
+ """
+ Return the path to the mephisto providers diroctory
+ """
+ return os.path.join(get_root_dir(), "mephisto/abstractions/providers")
+
+
+def get_gallery_dir() -> str:
+ """
+ Return the path to the mephisto task gallery
+ """
+ return os.path.join(get_root_dir(), "gallery")
+
+
+def get_dir_for_task(task_name: str, not_exists_ok: bool = False) -> Optional[str]:
+ """
+ Return the directory for the given task, if it exists. Check the user's task
+ dir first and then the gallery second.
+ """
+ dir_path = os.path.join(get_tasks_dir(), task_name)
+ if os.path.exists(dir_path) or not_exists_ok:
+ return dir_path
+ dir_path = os.path.join(get_gallery_dir(), task_name)
+ if os.path.exists(dir_path) or not_exists_ok:
+ return dir_path
+ return None
+
+
+def get_tasks_dir() -> str:
+ """
+ Return the directory where the mephisto user has configured their personal tasks
+ to exist in
+ """
+ return os.path.join(get_root_dir(), "mephisto/tasks")
+
+
+def get_root_data_dir() -> str:
+ """
+ Return the directory where the mephisto data is expected to go
+ """
+ global loaded_data_dir
+ if loaded_data_dir is None:
+ default_data_dir = os.path.join(get_root_dir(), "data")
+ actual_data_dir = get_config_arg(CORE_SECTION, DATA_STORAGE_KEY)
+ if actual_data_dir is None:
+ data_dir_location = input(
+ "Please enter the full path to a location to store Mephisto run data. By default this "
+ f"would be at '{default_data_dir}'. This dir should NOT be on a distributed file "
+ "store. Press enter to use the default: "
+ ).strip()
+ if len(data_dir_location) == 0:
+ data_dir_location = default_data_dir
+ data_dir_location = os.path.expanduser(data_dir_location)
+ os.makedirs(data_dir_location, exist_ok=True)
+ # Check to see if there is existing data to possibly move to the data dir:
+ database_loc = os.path.join(default_data_dir, "database.db")
+ if os.path.exists(database_loc) and data_dir_location != default_data_dir:
+ should_migrate = (
+ input(
+ "We have found an existing database in the default data directory, do you want to "
+ f"copy any existing data from the default location to {data_dir_location}? (y)es/no: "
+ )
+ .lower()
+ .strip()
+ )
+ if len(should_migrate) == 0 or should_migrate[0] == "y":
+ copy_tree(default_data_dir, data_dir_location)
+ print(
+ "Mephisto data successfully copied, once you've confirmed the migration worked, "
+ "feel free to remove all of the contents in "
+ f"{default_data_dir} EXCEPT for `README.md`."
+ )
+ add_config_arg(CORE_SECTION, DATA_STORAGE_KEY, data_dir_location)
+
+ loaded_data_dir = get_config_arg(CORE_SECTION, DATA_STORAGE_KEY)
+
+ if not os.path.isdir(loaded_data_dir):
+ raise NotADirectoryError(
+ f"The provided Mephisto data directory {loaded_data_dir} as set in "
+ f"{DEFAULT_CONFIG_FILE} is not a directory! Please locate your Mephisto "
+ f"data directory and update {DEFAULT_CONFIG_FILE} to point to it."
+ )
+
+ return loaded_data_dir
+
+
+def get_data_dir(root_dir: Optional[str] = None) -> str:
+ """
+ Return the directory where the mephisto data is expected to go
+ """
+ if root_dir is None:
+ return get_root_data_dir()
+ return os.path.join(root_dir, "data")
+
+
+def get_mephisto_tmp_dir() -> str:
+ """
+ Return the directory where the mephisto temporary build files go
+ """
+ return os.path.join(get_root_dir(), "tmp")
+
+
+def get_dir_for_run(task_run: "TaskRun", project_name: str = NO_PROJECT_NAME) -> str:
+ """
+ Return the directory where the mephisto run data is expected to go
+ """
+ run_id = task_run.db_id
+ root_dir = task_run.db.db_root
+ return os.path.join(get_data_dir(root_dir), "runs", project_name, run_id)
+
+
+def build_arg_list_from_dict(in_dict: Dict[str, Any]) -> List[str]:
+ arg_list = []
+ for key, val in in_dict.items():
+ arg_list.append(f"--{key.replace('_', '-')}")
+ arg_list.append(str(val))
+ return arg_list
+
+
+def find_or_create_qualification(db, qualification_name) -> None:
+ """
+ Ensure the given qualification exists in the db,
+ creating it if it doesn't already
+ """
+ from mephisto.abstractions.database import EntryAlreadyExistsException
+
+ try:
+ db.make_qualification(qualification_name)
+ except EntryAlreadyExistsException:
+ pass # qualification already exists
diff --git a/mephisto/providers/__init__.py b/mephisto/providers/__init__.py
index 240697e32..6aa7771bc 100644
--- a/mephisto/providers/__init__.py
+++ b/mephisto/providers/__init__.py
@@ -3,3 +3,13 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.providers import mock, mturk, mturk_sandbox
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mock/__init__.py b/mephisto/providers/mock/__init__.py
index 240697e32..5f0fbb91e 100644
--- a/mephisto/providers/mock/__init__.py
+++ b/mephisto/providers/mock/__init__.py
@@ -3,3 +3,13 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.providers.mock import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mock/mock_agent.py b/mephisto/providers/mock/mock_agent.py
index 0b9b71e9e..9697e253d 100644
--- a/mephisto/providers/mock/mock_agent.py
+++ b/mephisto/providers/mock/mock_agent.py
@@ -4,89 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.agent import Agent
-from mephisto.data_model.blueprint import AgentState
-from mephisto.providers.mock.provider_type import PROVIDER_TYPE
-
-from typing import List, Optional, Tuple, Dict, Mapping, Any, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.assignment import Unit
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.worker import Worker
- from mephisto.data_model.packet import Packet
- from mephisto.providers.mock.mock_datastore import MockDatastore
-
-
-class MockAgent(Agent):
- """
- This class encompasses a worker as they are working on an individual assignment.
- It maintains details for the current task at hand such as start and end time,
- connection status, etc.
- """
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- super().__init__(db, db_id, row=row)
- self.datastore: "MockDatastore" = db.get_datastore_for_provider(PROVIDER_TYPE)
- if db_id not in self.datastore.agent_data:
- self.datastore.agent_data[db_id] = {
- "observed": [],
- "pending_acts": [],
- "acts": [],
- }
-
- def observe(self, packet: "Packet") -> None:
- """Put observations into this mock agent's observation list"""
- self.datastore.agent_data[self.db_id]["observed"].append(packet)
- super().observe(packet)
-
- def act(self, timeout=None) -> Optional["Packet"]:
- """
- Either take an act from this mock agent's act queue (for use
- by tests and other mock purposes) or request a regular act
- (for use in manual testing).
- """
- if len(self.datastore.agent_data[self.db_id]["pending_acts"]) > 0:
- act = self.datastore.agent_data[self.db_id]["pending_acts"].pop(0)
- else:
- act = super().act(timeout=timeout)
-
- if act is not None:
- self.datastore.agent_data[self.db_id]["acts"].append(act)
- return act
-
- def approve_work(self) -> None:
- """
- Approve the work done on this specific Unit
-
- Mock Units
- """
- self.update_status(AgentState.STATUS_APPROVED)
-
- def reject_work(self, reason) -> None:
- """
- Reject the work done on this specific Unit
- """
- self.update_status(AgentState.STATUS_REJECTED)
-
- def mark_done(self) -> None:
- """
- Take any required step with the crowd_provider to ensure that
- the worker can submit their work and be marked as complete via
- a call to get_status
- """
- if self.get_status() not in AgentState.complete():
- self.db.update_agent(
- agent_id=self.db_id, status=AgentState.STATUS_COMPLETED
- )
-
- def mark_disconnected(self) -> None:
- """Mark this mock agent as having disconnected"""
- self.db.update_agent(agent_id=self.db_id, status=AgentState.STATUS_DISCONNECT)
-
- @staticmethod
- def new(db: "MephistoDB", worker: "Worker", unit: "Unit") -> "Agent":
- """Create an agent for this worker to be used for work on the given Unit."""
- return MockAgent._register_agent(db, worker, unit, PROVIDER_TYPE)
+from mephisto.abstractions.providers.mock.mock_agent import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mock/mock_datastore.py b/mephisto/providers/mock/mock_datastore.py
index ae4f910cf..9a987f724 100644
--- a/mephisto/providers/mock/mock_datastore.py
+++ b/mephisto/providers/mock/mock_datastore.py
@@ -4,216 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-import boto3
-import sqlite3
-import os
-import threading
-
-from datetime import datetime
-
-
-from botocore.exceptions import ClientError
-from botocore.exceptions import ProfileNotFound
-
-from typing import Dict, Any, Optional
-
-MTURK_REGION_NAME = "us-east-1"
-
-CREATE_REQUESTERS_TABLE = """CREATE TABLE IF NOT EXISTS requesters (
- requester_id TEXT PRIMARY KEY UNIQUE,
- is_registered BOOLEAN
-);
-"""
-
-CREATE_UNITS_TABLE = """CREATE TABLE IF NOT EXISTS units (
- unit_id TEXT PRIMARY KEY UNIQUE,
- is_expired BOOLEAN
-);
-"""
-
-CREATE_WORKERS_TABLE = """CREATE TABLE IF NOT EXISTS workers (
- worker_id TEXT PRIMARY KEY UNIQUE,
- is_blocked BOOLEAN
-);
-"""
-
-
-class MockDatastore:
- """
- Handles storing mock results and statuses across processes for use
- in unit testing and manual experimentation.
- """
-
- def __init__(self, datastore_root: str):
- """Initialize local storage of active agents, connect to the database"""
- self.agent_data: Dict[str, Dict[str, Any]] = {}
- self.table_access_condition = threading.Condition()
- self.conn: Dict[int, sqlite3.Connection] = {}
- self.db_path = os.path.join(datastore_root, "mock.db")
- self.init_tables()
- self.datastore_root = datastore_root
-
- def _get_connection(self) -> sqlite3.Connection:
- """Returns a singular database connection to be shared amongst all
- calls for a given thread.
- """
- curr_thread = threading.get_ident()
- if curr_thread not in self.conn or self.conn[curr_thread] is None:
- conn = sqlite3.connect(self.db_path)
- conn.row_factory = sqlite3.Row
- self.conn[curr_thread] = conn
- return self.conn[curr_thread]
-
- def init_tables(self) -> None:
- """
- Run all the table creation SQL queries to ensure the expected tables exist
- """
- with self.table_access_condition:
- conn = self._get_connection()
- conn.execute("PRAGMA foreign_keys = 1")
- c = conn.cursor()
- c.execute(CREATE_REQUESTERS_TABLE)
- c.execute(CREATE_UNITS_TABLE)
- c.execute(CREATE_WORKERS_TABLE)
- conn.commit()
-
- def ensure_requester_exists(self, requester_id: str) -> None:
- """Create a record of this requester if it doesn't exist"""
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """INSERT OR IGNORE INTO requesters(
- requester_id,
- is_registered
- ) VALUES (?, ?);""",
- (requester_id, False),
- )
- conn.commit()
- return None
-
- def set_requester_registered(self, requester_id: str, val: bool) -> None:
- """Set the requester registration status for the given id"""
- self.ensure_requester_exists(requester_id)
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """UPDATE requesters
- SET is_registered = ?
- WHERE requester_id = ?
- """,
- (val, requester_id),
- )
- conn.commit()
- return None
-
- def get_requester_registered(self, requester_id: str) -> bool:
- """Get the registration status of a requester"""
- self.ensure_requester_exists(requester_id)
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT is_registered from requesters
- WHERE requester_id = ?
- """,
- (requester_id,),
- )
- results = c.fetchall()
- return bool(results[0]["is_registered"])
-
- def ensure_worker_exists(self, worker_id: str) -> None:
- """Create a record of this worker if it doesn't exist"""
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """INSERT OR IGNORE INTO workers(
- worker_id,
- is_blocked
- ) VALUES (?, ?);""",
- (worker_id, False),
- )
- conn.commit()
- return None
-
- def set_worker_blocked(self, worker_id: str, val: bool) -> None:
- """Set the worker registration status for the given id"""
- self.ensure_worker_exists(worker_id)
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """UPDATE workers
- SET is_blocked = ?
- WHERE worker_id = ?
- """,
- (val, worker_id),
- )
- conn.commit()
- return None
-
- def get_worker_blocked(self, worker_id: str) -> bool:
- """Get the registration status of a worker"""
- self.ensure_worker_exists(worker_id)
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT is_blocked from workers
- WHERE worker_id = ?
- """,
- (worker_id,),
- )
- results = c.fetchall()
- return bool(results[0]["is_blocked"])
-
- def ensure_unit_exists(self, unit_id: str) -> None:
- """Create a record of this unit if it doesn't exist"""
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """INSERT OR IGNORE INTO units(
- unit_id,
- is_expired
- ) VALUES (?, ?);""",
- (unit_id, False),
- )
- conn.commit()
- return None
-
- def set_unit_expired(self, unit_id: str, val: bool) -> None:
- """Set the unit registration status for the given id"""
- self.ensure_unit_exists(unit_id)
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """UPDATE units
- SET is_expired = ?
- WHERE unit_id = ?
- """,
- (val, unit_id),
- )
- conn.commit()
- return None
-
- def get_unit_expired(self, unit_id: str) -> bool:
- """Get the registration status of a unit"""
- self.ensure_unit_exists(unit_id)
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT is_expired from units
- WHERE unit_id = ?
- """,
- (unit_id,),
- )
- results = c.fetchall()
- return bool(results[0]["is_expired"])
+from mephisto.abstractions.providers.mock.mock_datastore import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mock/mock_provider.py b/mephisto/providers/mock/mock_provider.py
index 5774ff305..ef0914df3 100644
--- a/mephisto/providers/mock/mock_provider.py
+++ b/mephisto/providers/mock/mock_provider.py
@@ -4,85 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.crowd_provider import CrowdProvider, ProviderArgs
-from mephisto.providers.mock.mock_agent import MockAgent
-from mephisto.providers.mock.mock_requester import MockRequester
-from mephisto.providers.mock.mock_unit import MockUnit
-from mephisto.providers.mock.mock_worker import MockWorker
-from mephisto.providers.mock.mock_datastore import MockDatastore
-from mephisto.providers.mock.provider_type import PROVIDER_TYPE
-from mephisto.data_model.requester import RequesterArgs
-from mephisto.core.registry import register_mephisto_abstraction
-from dataclasses import dataclass, field
-
-from typing import ClassVar, Dict, Any, Optional, Type, List, TYPE_CHECKING
-
-import os
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.assignment import Unit
- from mephisto.data_model.worker import Worker
- from mephisto.data_model.requester import Requester
- from mephisto.data_model.agent import Agent
- from mephisto.data_model.blueprint import SharedTaskState
- from omegaconf import DictConfig
-
-
-@dataclass
-class MockProviderArgs(ProviderArgs):
- """Base class for arguments to configure Crowd Providers"""
-
- _provider_type: str = PROVIDER_TYPE
-
-
-@register_mephisto_abstraction()
-class MockProvider(CrowdProvider):
- """
- Mock implementation of a CrowdProvider that stores everything
- in a local state in the class for use in tests.
- """
-
- UnitClass: ClassVar[Type["Unit"]] = MockUnit
-
- RequesterClass: ClassVar[Type["Requester"]] = MockRequester
-
- WorkerClass: ClassVar[Type["Worker"]] = MockWorker
-
- AgentClass: ClassVar[Type["Agent"]] = MockAgent
-
- ArgsClass = MockProviderArgs
-
- SUPPORTED_TASK_TYPES: ClassVar[List[str]] = ["mock"]
-
- PROVIDER_TYPE = PROVIDER_TYPE
-
- curr_db_location: ClassVar[str]
-
- def initialize_provider_datastore(self, storage_path: str) -> Any:
- """Mocks don't need any initialization"""
- return MockDatastore(datastore_root=storage_path)
-
- def setup_resources_for_task_run(
- self,
- task_run: "TaskRun",
- args: "DictConfig",
- shared_state: "SharedTaskState",
- server_url: str,
- ) -> None:
- """Mocks don't do any initialization"""
- return None
-
- def cleanup_resources_from_task_run(
- self, task_run: "TaskRun", server_url: str
- ) -> None:
- """Mocks don't do any initialization"""
- return None
-
- @classmethod
- def get_wrapper_js_path(cls):
- """
- Return the path to the `wrap_crowd_source.js` file for this
- provider to be deployed to the server
- """
- return os.path.join(os.path.dirname(__file__), "wrap_crowd_source.js")
+from mephisto.abstractions.providers.mock.mock_provider import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mock/mock_requester.py b/mephisto/providers/mock/mock_requester.py
index 858a31747..424f112c4 100644
--- a/mephisto/providers/mock/mock_requester.py
+++ b/mephisto/providers/mock/mock_requester.py
@@ -4,71 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from dataclasses import dataclass, field
-from mephisto.data_model.requester import Requester, RequesterArgs
-from mephisto.providers.mock.provider_type import PROVIDER_TYPE
-
-from typing import Optional, Dict, List, Mapping, Any, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.task import TaskRun
- from mephisto.providers.mock.mock_datastore import MockDatastore
- from argparse import _ArgumentGroup as ArgumentGroup
- from omegaconf import DictConfig
-
-MOCK_BUDGET = 100000.0
-
-
-@dataclass
-class MockRequesterArgs(RequesterArgs):
- name: str = field(
- default="MOCK_REQUESTER",
- metadata={
- "help": "Name for the requester in the Mephisto DB.",
- "required": True,
- },
- )
- force_fail: bool = field(
- default=False, metadata={"help": "Trigger a failed registration"}
- )
-
-
-class MockRequester(Requester):
- """
- High level class representing a requester on some kind of crowd provider. Sets some default
- initializations, but mostly should be extended by the specific requesters for crowd providers
- with whatever implementation details are required to get those to work.
- """
-
- ArgsClass = MockRequesterArgs
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- super().__init__(db, db_id, row=row)
- self.datastore: "MockDatastore" = db.get_datastore_for_provider(PROVIDER_TYPE)
-
- def register(self, args: Optional["DictConfig"] = None) -> None:
- """Mock requesters don't actually register credentials"""
- if args is not None:
- if args.get("force_fail") is True:
- raise Exception("Forced failure test exception was set")
- else:
- self.datastore.set_requester_registered(self.db_id, True)
-
- def is_registered(self) -> bool:
- """Return the registration status"""
- return self.datastore.get_requester_registered(self.db_id)
-
- def get_available_budget(self) -> float:
- """MockRequesters have $100000 to spend"""
- return MOCK_BUDGET
-
- def is_sandbox(self) -> bool:
- """MockRequesters are for testing only, and are thus treated as sandbox"""
- return True
-
- @staticmethod
- def new(db: "MephistoDB", requester_name: str) -> "Requester":
- return MockRequester._register_requester(db, requester_name, PROVIDER_TYPE)
+from mephisto.abstractions.providers.mock.mock_requester import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mock/mock_unit.py b/mephisto/providers/mock/mock_unit.py
index 0758a4e10..127ef2e3c 100644
--- a/mephisto/providers/mock/mock_unit.py
+++ b/mephisto/providers/mock/mock_unit.py
@@ -4,70 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.assignment import Unit
-from mephisto.data_model.assignment_state import AssignmentState
-from mephisto.data_model.blueprint import AgentState
-
-from mephisto.providers.mock.provider_type import PROVIDER_TYPE
-from typing import List, Optional, Tuple, Dict, Mapping, Any, Type, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.assignment import Assignment
- from mephisto.providers.mock.mock_datastore import MockDatastore
-
-from mephisto.core.logger_core import get_logger
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-
-class MockUnit(Unit):
- """
- This class tracks the status of an individual worker's contribution to a
- higher level assignment. It is the smallest 'unit' of work to complete
- the assignment, and this class is only responsible for checking
- the status of that work itself being done.
-
- It should be extended for usage with a specific crowd provider
- """
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- super().__init__(db, db_id, row=row)
- self.datastore: "MockDatastore" = db.get_datastore_for_provider(PROVIDER_TYPE)
-
- def launch(self, task_url: str) -> None:
- """Mock launches do nothing right now beyond updating state"""
- self.db.update_unit(self.db_id, status=AssignmentState.LAUNCHED)
-
- # TODO(OWN) get this link to the frontend
- port = task_url.split(":")[1].split("/")[0]
- print(task_url)
- print(
- f"Mock task launched: localhost:{port} for preview, "
- f"localhost:{port}/?worker_id=x&assignment_id={self.db_id}"
- )
- logger.info(
- f"Mock task launched: localhost:{port} for preview, "
- f"localhost:{port}/?worker_id=x&assignment_id={self.db_id} for assignment {self.assignment_id}"
- )
-
- return None
-
- def expire(self) -> float:
- """Expiration is immediate on Mocks"""
- self.db.update_unit(self.db_id, status=AssignmentState.EXPIRED)
- self.datastore.set_unit_expired(self.db_id, True)
- return 0.0
-
- def is_expired(self) -> bool:
- """Determine if this unit is expired as according to the vendor."""
- return self.datastore.get_unit_expired(self.db_id)
-
- @staticmethod
- def new(
- db: "MephistoDB", assignment: "Assignment", index: int, pay_amount: float
- ) -> "Unit":
- """Create a Unit for the given assignment"""
- return MockUnit._register_unit(db, assignment, index, pay_amount, PROVIDER_TYPE)
+from mephisto.abstractions.providers.mock.mock_unit import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mock/mock_worker.py b/mephisto/providers/mock/mock_worker.py
index 3ec78a280..4682b72e6 100644
--- a/mephisto/providers/mock/mock_worker.py
+++ b/mephisto/providers/mock/mock_worker.py
@@ -4,58 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.worker import Worker
-from mephisto.providers.mock.provider_type import PROVIDER_TYPE
-from typing import List, Optional, Tuple, Dict, Mapping, Type, Any, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.assignment import Unit, Agent
- from mephisto.data_model.requester import Requester
- from mephisto.providers.mock.mock_datastore import MockDatastore
-
-
-class MockWorker(Worker):
- """
- This class represents an individual - namely a person. It maintains components of ongoing identity for a user.
- """
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- super().__init__(db, db_id, row=row)
- self.datastore: "MockDatastore" = db.get_datastore_for_provider(PROVIDER_TYPE)
-
- def bonus_worker(
- self, amount: float, reason: str, unit: Optional["Unit"] = None
- ) -> Tuple[bool, str]:
- """Bonus this worker for work any reason. Return success of bonus"""
- return True, ""
-
- def block_worker(
- self,
- reason: str,
- unit: Optional["Unit"] = None,
- requester: Optional["Requester"] = None,
- ) -> Tuple[bool, str]:
- """Block this worker for a specified reason. Return success of block"""
- self.datastore.set_worker_blocked(self.db_id, True)
- return True, ""
-
- def unblock_worker(self, reason: str, requester: "Requester") -> bool:
- """unblock a blocked worker for the specified reason. Return success of unblock"""
- self.datastore.set_worker_blocked(self.db_id, False)
- return True
-
- def is_blocked(self, requester: "Requester") -> bool:
- """Determine if a worker is blocked"""
- return self.datastore.get_worker_blocked(self.db_id)
-
- def is_eligible(self, task_run: "TaskRun") -> bool:
- """Determine if this worker is eligible for the given task run"""
- return True
-
- @staticmethod
- def new(db: "MephistoDB", worker_id: str) -> "Worker":
- return MockWorker._register_worker(db, worker_id, PROVIDER_TYPE)
+from mephisto.abstractions.providers.mock.mock_worker import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mock/provider_type.py b/mephisto/providers/mock/provider_type.py
index d044e6891..fb0a77804 100644
--- a/mephisto/providers/mock/provider_type.py
+++ b/mephisto/providers/mock/provider_type.py
@@ -4,4 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-PROVIDER_TYPE = "mock"
+from mephisto.abstractions.providers.mock.provider_type import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mturk/__init__.py b/mephisto/providers/mturk/__init__.py
index 240697e32..176886c03 100644
--- a/mephisto/providers/mturk/__init__.py
+++ b/mephisto/providers/mturk/__init__.py
@@ -3,3 +3,13 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.providers.mturk import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mturk/mturk_agent.py b/mephisto/providers/mturk/mturk_agent.py
index ac7643026..e9ceabd5d 100644
--- a/mephisto/providers/mturk/mturk_agent.py
+++ b/mephisto/providers/mturk/mturk_agent.py
@@ -4,106 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.agent import Agent
-from mephisto.data_model.blueprint import AgentState
-from mephisto.providers.mturk.provider_type import PROVIDER_TYPE
-from mephisto.providers.mturk.mturk_utils import (
- approve_work,
- reject_work,
- get_assignment,
+from mephisto.abstractions.providers.mturk.mturk_agent import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
)
-
-from typing import List, Optional, Tuple, Dict, Mapping, Any, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.assignment import Unit
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.worker import Worker
- from mephisto.data_model.packet import Packet
- from mephisto.providers.mturk.requester import MTurkRequester
- from mephisto.providers.mturk.unit import MTurkUnit
- from mephisto.providers.mturk.datastore import MTurkDatastore
-
-
-class MTurkAgent(Agent):
- """
- This class encompasses a worker as they are working on an individual assignment.
- It maintains details for the current task at hand such as start and end time,
- connection status, etc.
- """
-
- # Ensure inherited methods use this level's provider type
- PROVIDER_TYPE = PROVIDER_TYPE
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- super().__init__(db, db_id, row=row)
- self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
- self.PROVIDER_TYPE
- )
- unit: "MTurkUnit" = self.get_unit()
- self.mturk_assignment_id = unit.get_mturk_assignment_id()
- # TODO(#97) any additional init as is necessary once
- # a mock DB exists
-
- def _get_mturk_assignment_id(self):
- if self.mturk_assignment_id is None:
- self.mturk_assignment_id = self.get_unit().get_mturk_assignment_id()
- return self.mturk_assignment_id
-
- def _get_client(self) -> Any:
- """
- Get an mturk client for usage with mturk_utils for this agent
- """
- unit = self.get_unit()
- requester: "MTurkRequester" = unit.get_requester()
- return self.datastore.get_client_for_requester(requester._requester_name)
-
- @classmethod
- def new_from_provider_data(
- cls,
- db: "MephistoDB",
- worker: "Worker",
- unit: "Unit",
- provider_data: Dict[str, Any],
- ) -> "Agent":
- """
- Wrapper around the new method that allows registering additional
- bookkeeping information from a crowd provider for this agent
- """
- datastore: "MTurkDatastore" = db.get_datastore_for_provider(cls.PROVIDER_TYPE)
- datastore.register_assignment_to_hit(
- provider_data["hit_id"], unit.db_id, provider_data["assignment_id"]
- )
- return super().new_from_provider_data(db, worker, unit, provider_data)
-
- # Required functions for Agent Interface
-
- def approve_work(self) -> None:
- """Approve the work done on this specific Unit"""
- client = self._get_client()
- approve_work(client, self._get_mturk_assignment_id(), override_rejection=True)
- self.update_status(AgentState.STATUS_APPROVED)
-
- def reject_work(self, reason) -> None:
- """Reject the work done on this specific Unit"""
- client = self._get_client()
- reject_work(client, self._get_mturk_assignment_id(), reason)
- self.update_status(AgentState.STATUS_REJECTED)
-
- def mark_done(self) -> None:
- """
- MTurk agents are marked as done on the side of MTurk, so if this agent
- is marked as done there's nothing else we need to do as the task has been
- submitted.
- """
- if self.get_status() != AgentState.STATUS_DISCONNECT:
- self.db.update_agent(
- agent_id=self.db_id, status=AgentState.STATUS_COMPLETED
- )
-
- @staticmethod
- def new(db: "MephistoDB", worker: "Worker", unit: "Unit") -> "Agent":
- """Create an agent for this worker to be used for work on the given Unit."""
- return MTurkAgent._register_agent(db, worker, unit, PROVIDER_TYPE)
diff --git a/mephisto/providers/mturk/mturk_datastore.py b/mephisto/providers/mturk/mturk_datastore.py
index 971b06918..15a22aa2a 100644
--- a/mephisto/providers/mturk/mturk_datastore.py
+++ b/mephisto/providers/mturk/mturk_datastore.py
@@ -4,286 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-import boto3
-import sqlite3
-import os
-import threading
-
-from datetime import datetime
-
-
-from botocore.exceptions import ClientError
-from botocore.exceptions import ProfileNotFound
-
-from typing import Dict, Any, Optional
-
-MTURK_REGION_NAME = "us-east-1"
-
-CREATE_HITS_TABLE = """CREATE TABLE IF NOT EXISTS hits (
- hit_id TEXT PRIMARY KEY UNIQUE,
- unit_id TEXT,
- assignment_id TEXT,
- link TEXT,
- assignment_time_in_seconds INTEGER NOT NULL,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
-);
-"""
-
-CREATE_RUN_MAP_TABLE = """CREATE TABLE IF NOT EXISTS run_mappings (
- hit_id TEXT,
- run_id TEXT
-);
-"""
-
-CREATE_RUNS_TABLE = """CREATE TABLE IF NOT EXISTS runs (
- run_id TEXT PRIMARY KEY UNIQUE,
- arn_id TEXT,
- hit_type_id TEXT NOT NULL,
- hit_config_path TEXT NOT NULL,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
-);
-"""
-
-CREATE_QUALIFICATIONS_TABLE = """CREATE TABLE IF NOT EXISTS qualifications (
- qualification_name TEXT PRIMARY KEY UNIQUE,
- requester_id TEXT,
- mturk_qualification_name TEXT,
- mturk_qualification_id TEXT,
- creation_date DATETIME DEFAULT CURRENT_TIMESTAMP
-);
-"""
-
-
-class MTurkDatastore:
- """
- Handles storing multiple sessions for different requesters
- across a single mephisto thread (locked to a MephistoDB).
- Also creates a relevant tables for mapping between MTurk
- and mephisto.
- """
-
- def __init__(self, datastore_root: str):
- """Initialize the session storage to empty, initialize tables if needed"""
- self.session_storage: Dict[str, boto3.Session] = {}
- self.table_access_condition = threading.Condition()
- self.conn: Dict[int, sqlite3.Connection] = {}
- self.db_path = os.path.join(datastore_root, "mturk.db")
- self.init_tables()
- self.datastore_root = datastore_root
-
- def _get_connection(self) -> sqlite3.Connection:
- """Returns a singular database connection to be shared amongst all
- calls for a given thread.
- """
- # TODO(#101) is there a problem with having just one db connection?
- # Will this cause bugs with failed commits?
- curr_thread = threading.get_ident()
- if curr_thread not in self.conn or self.conn[curr_thread] is None:
- conn = sqlite3.connect(self.db_path)
- conn.row_factory = sqlite3.Row
- self.conn[curr_thread] = conn
- return self.conn[curr_thread]
-
- def init_tables(self) -> None:
- """
- Run all the table creation SQL queries to ensure the expected tables exist
- """
- with self.table_access_condition:
- conn = self._get_connection()
- conn.execute("PRAGMA foreign_keys = 1")
- with conn:
- c = conn.cursor()
- c.execute(CREATE_HITS_TABLE)
- c.execute(CREATE_RUNS_TABLE)
- c.execute(CREATE_RUN_MAP_TABLE)
- c.execute(CREATE_QUALIFICATIONS_TABLE)
-
- def new_hit(self, hit_id: str, hit_link: str, duration: int, run_id: str) -> None:
- """Register a new HIT mapping in the table"""
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- c.execute(
- """INSERT INTO hits(
- hit_id,
- link,
- assignment_time_in_seconds
- ) VALUES (?, ?, ?);""",
- (hit_id, hit_link, duration),
- )
- c.execute(
- """INSERT INTO run_mappings(
- hit_id,
- run_id
- ) VALUES (?, ?);""",
- (hit_id, run_id),
- )
-
- def get_unassigned_hit_ids(self, run_id: str):
- """
- Return a list of all HIT ids that haven't been assigned
- """
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT
- hit_id,
- unit_id,
- run_id
- FROM
- hits
- INNER JOIN run_mappings
- USING (hit_id)
- WHERE unit_id IS NULL
- AND run_id = ?;
- """,
- (run_id,),
- )
- results = c.fetchall()
- return [r["hit_id"] for r in results]
-
- def register_assignment_to_hit(
- self,
- hit_id: str,
- unit_id: Optional[str] = None,
- assignment_id: Optional[str] = None,
- ) -> None:
- """
- Register a specific assignment and hit to the given unit,
- or clear the assignment after a return
- """
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- c.execute(
- """UPDATE hits
- SET assignment_id = ?, unit_id = ?
- WHERE hit_id = ?
- """,
- (assignment_id, unit_id, hit_id),
- )
-
- def get_hit_mapping(self, unit_id: str) -> sqlite3.Row:
- """Get the mapping between Mephisto IDs and MTurk ids"""
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from hits
- WHERE unit_id = ?
- """,
- (unit_id,),
- )
- results = c.fetchall()
- return results[0]
-
- def register_run(
- self, run_id: str, arn_id: str, hit_type_id: str, hit_config_path: str
- ) -> None:
- """Register a new task run in the mturk table"""
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- c.execute(
- """INSERT INTO runs(
- run_id,
- arn_id,
- hit_type_id,
- hit_config_path
- ) VALUES (?, ?, ?, ?);""",
- (run_id, arn_id, hit_type_id, hit_config_path),
- )
-
- def get_run(self, run_id: str) -> sqlite3.Row:
- """Get the details for a run by task_run_id"""
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from runs
- WHERE run_id = ?
- """,
- (run_id,),
- )
- results = c.fetchall()
- return results[0]
-
- def create_qualification_mapping(
- self,
- qualification_name: str,
- requester_id: str,
- mturk_qualification_name: str,
- mturk_qualification_id: str,
- ) -> None:
- """
- Create a mapping between mephisto qualification name and mturk
- qualification details in the local datastore
- """
- with self.table_access_condition, self._get_connection() as conn:
- c = conn.cursor()
- c.execute(
- """INSERT INTO qualifications(
- qualification_name,
- requester_id,
- mturk_qualification_name,
- mturk_qualification_id
- ) VALUES (?, ?, ?, ?);""",
- (
- qualification_name,
- requester_id,
- mturk_qualification_name,
- mturk_qualification_id,
- ),
- )
- return None
-
- def get_qualification_mapping(
- self, qualification_name: str
- ) -> Optional[sqlite3.Row]:
- """Get the mapping between Mephisto qualifications and MTurk qualifications"""
- with self.table_access_condition:
- conn = self._get_connection()
- c = conn.cursor()
- c.execute(
- """
- SELECT * from qualifications
- WHERE qualification_name = ?
- """,
- (qualification_name,),
- )
- results = c.fetchall()
- if len(results) == 0:
- return None
- return results[0]
-
- def get_session_for_requester(self, requester_name: str) -> boto3.Session:
- """
- Either create a new session for the given requester or return
- the existing one if it has already been created
- """
- if requester_name not in self.session_storage:
- session = boto3.Session(
- profile_name=requester_name, region_name=MTURK_REGION_NAME
- )
- self.session_storage[requester_name] = session
-
- return self.session_storage[requester_name]
-
- def get_client_for_requester(self, requester_name: str) -> Any:
- """
- Return the client for the given requester, which should allow
- direct calls to the mturk surface
- """
- return self.get_session_for_requester(requester_name).client("mturk")
-
- def get_sandbox_client_for_requester(self, requester_name: str) -> Any:
- """
- Return the client for the given requester, which should allow
- direct calls to the mturk surface
- """
- return self.get_session_for_requester(requester_name).client(
- service_name="mturk",
- region_name="us-east-1",
- endpoint_url="https://mturk-requester-sandbox.us-east-1.amazonaws.com",
- )
+from mephisto.abstractions.providers.mturk.mturk_datastore import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mturk/mturk_provider.py b/mephisto/providers/mturk/mturk_provider.py
index 437c494d1..c2c1cbb79 100644
--- a/mephisto/providers/mturk/mturk_provider.py
+++ b/mephisto/providers/mturk/mturk_provider.py
@@ -4,160 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-import os
-from mephisto.data_model.task_config import TaskConfig
-from mephisto.providers.mturk.provider_type import PROVIDER_TYPE
-from mephisto.providers.mturk.mturk_datastore import MTurkDatastore
-from mephisto.data_model.crowd_provider import CrowdProvider, ProviderArgs
-from mephisto.data_model.requester import RequesterArgs
-from mephisto.providers.mturk.mturk_agent import MTurkAgent
-from mephisto.providers.mturk.mturk_requester import MTurkRequester
-from mephisto.providers.mturk.mturk_unit import MTurkUnit
-from mephisto.providers.mturk.mturk_worker import MTurkWorker
-from mephisto.providers.mturk.mturk_utils import (
- create_hit_type,
- create_hit_config,
- setup_sns_topic,
- delete_sns_topic,
- delete_qualification,
+from mephisto.abstractions.providers.mturk.mturk_provider import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
)
-from mephisto.core.registry import register_mephisto_abstraction
-from dataclasses import dataclass, field
-
-from typing import ClassVar, Dict, Any, Optional, Type, List, cast, TYPE_CHECKING
-
-from mephisto.data_model.requester import Requester
-
-if TYPE_CHECKING:
- from mephisto.data_model.blueprint import SharedTaskState
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.assignment import Unit
- from mephisto.data_model.worker import Worker
- from mephisto.data_model.agent import Agent
- from omegaconf import DictConfig
-
-
-@dataclass
-class MTurkProviderArgs(ProviderArgs):
- """Provider args for an MTurk provider"""
-
- _provider_type: str = PROVIDER_TYPE
-
-
-@register_mephisto_abstraction()
-class MTurkProvider(CrowdProvider):
- """
- Implementation of a crowdprovider that interfaces with MTurk
- """
-
- # Ensure inherited methods use this level's provider type
- PROVIDER_TYPE = PROVIDER_TYPE
-
- UnitClass: ClassVar[Type["Unit"]] = MTurkUnit
-
- RequesterClass: ClassVar[Type["Requester"]] = MTurkRequester
-
- WorkerClass: ClassVar[Type["Worker"]] = MTurkWorker
-
- AgentClass: ClassVar[Type["Agent"]] = MTurkAgent
-
- ArgsClass = MTurkProviderArgs
-
- SUPPORTED_TASK_TYPES: ClassVar[List[str]] = [
- # TODO
- ]
-
- def initialize_provider_datastore(self, storage_path: str) -> Any:
- """
- MTurk itself is the source of truth for most data required to run
- tasks on MTurk. The datastore holds sessions to connect with
- MTurk as well as mappings between MTurk ids and Mephisto ids
- """
- return MTurkDatastore(datastore_root=storage_path)
-
- def _get_client(self, requester_name: str) -> Any:
- """
- Get an mturk client for usage with mturk_utils
- """
- return self.datastore.get_client_for_requester(requester_name)
-
- def setup_resources_for_task_run(
- self,
- task_run: "TaskRun",
- args: "DictConfig",
- shared_state: "SharedTaskState",
- server_url: str,
- ) -> None:
- """
- Set up SNS queue to recieve agent events from MTurk, and produce the
- HIT type for this task run.
- """
- requester = cast("MTurkRequester", task_run.get_requester())
- session = self.datastore.get_session_for_requester(requester._requester_name)
- task_config = task_run.get_task_config()
-
- # Set up SNS queue
- # TODO(OWN) implement arn?
- task_run_id = task_run.db_id
- # task_name = task_run.get_task().task_name
- # arn_id = setup_sns_topic(session, task_name, server_url, task_run_id)
- arn_id = "TEST"
-
- # Set up HIT config
- config_dir = os.path.join(self.datastore.datastore_root, task_run_id)
- task_config = TaskConfig(task_run)
-
- # Find or create relevant qualifications
- qualifications = []
- for qualification in shared_state.qualifications:
- applicable_providers = qualification["applicable_providers"]
- if (
- applicable_providers is None
- or self.PROVIDER_TYPE in applicable_providers
- ):
- qualifications.append(qualification)
- for qualification in qualifications:
- qualification_name = qualification["qualification_name"]
- if requester.PROVIDER_TYPE == "mturk_sandbox":
- qualification_name += "_sandbox"
- if self.datastore.get_qualification_mapping(qualification_name) is None:
- qualification[
- "QualificationTypeId"
- ] = requester._create_new_mturk_qualification(qualification_name)
-
- if hasattr(shared_state, "mturk_specific_qualifications"):
- qualifications += shared_state.mturk_specific_qualifications
-
- # Set up HIT type
- client = self._get_client(requester._requester_name)
- hit_type_id = create_hit_type(client, task_config, qualifications)
- self.datastore.register_run(task_run_id, arn_id, hit_type_id, config_dir)
-
- def cleanup_resources_from_task_run(
- self, task_run: "TaskRun", server_url: str
- ) -> None:
- """Shut down the SNS queue for this task."""
- requester = cast("MTurkRequester", task_run.get_requester())
- session = self.datastore.get_session_for_requester(requester._requester_name)
- run_row = self.datastore.get_run(task_run.db_id)
- delete_sns_topic(session, run_row["arn_id"])
-
- @classmethod
- def get_wrapper_js_path(cls):
- """
- Return the path to the `wrap_crowd_source.js` file for this
- provider to be deployed to the server
- """
- return os.path.join(os.path.dirname(__file__), "wrap_crowd_source.js")
-
- def cleanup_qualification(self, qualification_name: str) -> None:
- """Remove the qualification from the sandbox server, if it exists"""
- mapping = self.datastore.get_qualification_mapping(qualification_name)
- if mapping is None:
- return None
-
- requester_id = mapping["requester_id"]
- requester = Requester(self.db, requester_id)
- assert isinstance(requester, MTurkRequester), "Must be an mturk requester"
- client = requester._get_client(requester._requester_name)
- delete_qualification(client, mapping["mturk_qualification_id"])
diff --git a/mephisto/providers/mturk/mturk_requester.py b/mephisto/providers/mturk/mturk_requester.py
index 0054d8e09..3e116c418 100644
--- a/mephisto/providers/mturk/mturk_requester.py
+++ b/mephisto/providers/mturk/mturk_requester.py
@@ -4,147 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from uuid import uuid4
-import time
-import random
-
-from dataclasses import dataclass, field
-from omegaconf import MISSING, DictConfig
-from mephisto.data_model.requester import Requester, RequesterArgs
-from mephisto.providers.mturk.mturk_utils import (
- setup_aws_credentials,
- get_requester_balance,
- check_aws_credentials,
- find_or_create_qualification,
+from mephisto.abstractions.providers.mturk.mturk_requester import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
)
-from mephisto.providers.mturk.provider_type import PROVIDER_TYPE
-
-from typing import List, Optional, Mapping, Dict, Any, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.task import TaskRun
- from mephisto.providers.mturk.mturk_datastore import MTurkDatastore
- from argparse import _ArgumentGroup as ArgumentGroup
-
-
-MAX_QUALIFICATION_ATTEMPTS = 300
-
-
-@dataclass
-class MTurkRequesterArgs(RequesterArgs):
- _group: str = field(
- default="MTurkRequester",
- metadata={
- "help": (
- "AWS is required to create a new Requester. "
- "Please create an IAM user with programmatic access and "
- "AmazonMechanicalTurkFullAccess policy at "
- 'https://console.aws.amazon.com/iam/ (On the "Set permissions" '
- 'page, choose "Attach existing policies directly" and then select '
- '"AmazonMechanicalTurkFullAccess" policy). After creating '
- "the IAM user, you should get an Access Key ID "
- "and Secret Access Key. "
- )
- },
- )
- access_key_id: str = field(
- default=MISSING, metadata={"required": True, "help": "IAM Access Key ID"}
- )
- secret_access_key: str = field(
- default=MISSING, metadata={"required": True, "help": "IAM Secret Access Key"}
- )
-
-
-class MTurkRequester(Requester):
- """
- Wrapper for requester behavior as provided by MTurk. Makes
- all requests directly to MTurk through boto3.
- """
-
- # Ensure inherited methods use this level's provider type
- PROVIDER_TYPE = PROVIDER_TYPE
- ArgsClass = MTurkRequesterArgs
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- super().__init__(db, db_id, row=row)
- self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
- self.PROVIDER_TYPE
- )
- # Use _requester_name to preserve sandbox behavior which
- # utilizes a different requester_name
- self._requester_name = self.requester_name
-
- def _get_client(self, requester_name: str) -> Any:
- """
- Get an mturk client for usage with mturk_utils
- """
- return self.datastore.get_client_for_requester(requester_name)
-
- # Required functions for a Requester implementation
-
- def register(self, args: Optional[DictConfig] = None) -> None:
- """
- Register this requester with the crowd provider by providing any required credentials
- or such. If no args are provided, assume the registration is already made and try
- to assert it as such.
- """
- for req_field in ["access_key_id", "secret_access_key"]:
- if args is not None and req_field not in args:
- raise Exception(
- f'Missing IAM "{req_field}" in requester registration args'
- )
- setup_aws_credentials(self._requester_name, args)
-
- def is_registered(self) -> bool:
- """Return whether or not this requester has registered yet"""
- return check_aws_credentials(self._requester_name)
-
- def get_available_budget(self) -> float:
- """Get the available budget from MTurk"""
- client = self._get_client(self._requester_name)
- return get_requester_balance(client)
-
- def _create_new_mturk_qualification(self, qualification_name: str) -> str:
- """
- Create a new qualification on MTurk owned by the requester provided
- """
- client = self._get_client(self._requester_name)
- qualification_desc = f"Equivalent qualification for {qualification_name}."
- use_qualification_name = qualification_name
- qualification_id = find_or_create_qualification(
- client, qualification_name, qualification_desc, must_be_owned=True
- )
- if qualification_id is None:
- # Try to append time to make the qualification unique
- use_qualification_name = f"{qualification_name}_{time.time()}"
- qualification_id = find_or_create_qualification(
- client, use_qualification_name, qualification_desc, must_be_owned=True
- )
- attempts = 0
- while qualification_id is None:
- # Append something somewhat random
- use_qualification_name = f"{qualification_name}_{str(uuid4())}"
- qualification_id = find_or_create_qualification(
- client,
- use_qualification_name,
- qualification_desc,
- must_be_owned=True,
- )
- attempts += 1
- if attempts > MAX_QUALIFICATION_ATTEMPTS:
- raise Exception(
- "Something has gone extremely wrong with creating qualification "
- f"{qualification_name} for requester {self.requester_name}"
- )
- # Store the new qualification in the datastore
- self.datastore.create_qualification_mapping(
- qualification_name, self.db_id, use_qualification_name, qualification_id
- )
- return qualification_id
-
- @staticmethod
- def new(db: "MephistoDB", requester_name: str) -> "Requester":
- return MTurkRequester._register_requester(db, requester_name, PROVIDER_TYPE)
diff --git a/mephisto/providers/mturk/mturk_unit.py b/mephisto/providers/mturk/mturk_unit.py
index 10339eb16..a6bbe52d5 100644
--- a/mephisto/providers/mturk/mturk_unit.py
+++ b/mephisto/providers/mturk/mturk_unit.py
@@ -4,246 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from datetime import datetime
-
-from mephisto.data_model.assignment import Unit
-from mephisto.data_model.assignment_state import AssignmentState
-from mephisto.data_model.blueprint import AgentState
-from mephisto.providers.mturk.mturk_utils import (
- expire_hit,
- get_hit,
- create_hit_with_hit_type,
+from mephisto.abstractions.providers.mturk.mturk_unit import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
)
-from mephisto.providers.mturk.provider_type import PROVIDER_TYPE
-from typing import List, Optional, Tuple, Mapping, Dict, Any, Type, cast, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.assignment import Assignment
- from mephisto.providers.mturk.mturk_requester import MTurkRequester
- from mephisto.providers.mturk.mturk_datastore import MTurkDatastore
-
-from mephisto.core.logger_core import get_logger
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-
-class MTurkUnit(Unit):
- """
- This class tracks the status of an individual worker's contribution to a
- higher level assignment. It is the smallest 'unit' of work to complete
- the assignment, and this class is only responsible for checking
- the status of that work itself being done.
- """
-
- # Ensure inherited methods use this level's provider type
- PROVIDER_TYPE = PROVIDER_TYPE
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- super().__init__(db, db_id, row=row)
- self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
- self.PROVIDER_TYPE
- )
- self.hit_id: Optional[str] = None
- self._sync_hit_mapping()
- self.__requester: Optional["MTurkRequester"] = None
-
- def _get_client(self, requester_name: str) -> Any:
- """
- Get an mturk client for usage with mturk_utils
- """
- return self.datastore.get_client_for_requester(requester_name)
-
- def _sync_hit_mapping(self) -> None:
- """Sync with the datastore to see if any mappings have updated"""
- try:
- mapping = dict(self.datastore.get_hit_mapping(self.db_id))
- self.hit_id = mapping["hit_id"]
- self.mturk_assignment_id = mapping.get("assignment_id")
- self.assignment_time_in_seconds = mapping.get("assignment_time_in_seconds")
- except IndexError:
- # HIT does not appear to exist
- self.hit_id = None
- self.mturk_assignment_id = None
- self.assignment_time_in_seconds = -1
-
- def get_mturk_assignment_id(self) -> Optional[str]:
- """
- Return the MTurk assignment id associated with this unit
- """
- if self.mturk_assignment_id is None:
- self._sync_hit_mapping()
- return self.mturk_assignment_id
-
- def get_mturk_hit_id(self) -> Optional[str]:
- """
- Return the MTurk hit id associated with this unit
- """
- if self.hit_id is None:
- self._sync_hit_mapping()
- return self.hit_id
-
- def get_requester(self) -> "MTurkRequester":
- """Wrapper around regular Requester as this will be MTurkRequesters"""
- if self.__requester is None:
- self.__requester = cast("MTurkRequester", super().get_requester())
- return self.__requester
-
- def clear_assigned_agent(self) -> None:
- """
- Additionally to clearing the agent, we also need to dissociate the
- hit_id from this unit in the MTurkDatastore
- """
- super().clear_assigned_agent()
- mturk_hit_id = self.get_mturk_hit_id()
- if mturk_hit_id is not None:
- self.datastore.register_assignment_to_hit(mturk_hit_id)
- self._sync_hit_mapping()
-
- # Required Unit functions
-
- def get_status(self) -> str:
- """Get status for this unit directly from MTurk, fall back on local info"""
- if self.db_status in [
- AssignmentState.CREATED,
- AssignmentState.ACCEPTED,
- AssignmentState.EXPIRED,
- AssignmentState.SOFT_REJECTED,
- ]:
- # These statuses don't change with a get_status call
- return self.db_status
-
- if self.db_status in [AssignmentState.COMPLETED, AssignmentState.REJECTED]:
- # These statuses only change on agent dependent changes
- agent = self.get_assigned_agent()
- found_status = self.db_status
- if agent is not None:
- agent_status = agent.get_status()
- if agent_status == AgentState.STATUS_APPROVED:
- found_status = AssignmentState.ACCEPTED
- elif agent_status == AgentState.STATUS_REJECTED:
- found_status = AssignmentState.REJECTED
- elif agent_status == AgentState.STATUS_SOFT_REJECTED:
- found_status = AssignmentState.SOFT_REJECTED
- else:
- logger.warning("Agent is None")
- if found_status != self.db_status:
- self.set_db_status(found_status)
- return self.db_status
-
- # Remaining statuses are tracking a live HIT
-
- mturk_hit_id = self.get_mturk_hit_id()
- if mturk_hit_id is None:
- # Can't determine anything if there is no HIT on this assignment
- return self.db_status
-
- requester = self.get_requester()
- client = self._get_client(requester._requester_name)
- hit = get_hit(client, mturk_hit_id)
- hit_data = hit["HIT"]
-
- local_status = self.db_status
- external_status = self.db_status
-
- if hit_data["HITStatus"] == "Assignable":
- external_status = AssignmentState.LAUNCHED
- elif hit_data["HITStatus"] == "Unassignable":
- external_status = AssignmentState.ASSIGNED
- elif hit_data["HITStatus"] in ["Reviewable", "Reviewing"]:
- external_status = AssignmentState.COMPLETED
- if hit_data["NumberOfAssignmentsAvailable"] != 0:
- external_status = AssignmentState.EXPIRED
- elif hit_data["HITStatus"] == "Disposed":
- # The HIT was deleted, must rely on what we have
- external_status = local_status
- else:
- raise Exception(f"Unexpected HIT status {hit_data['HITStatus']}")
-
- if external_status != local_status:
- if (
- local_status == AssignmentState.ASSIGNED
- and external_status == AssignmentState.LAUNCHED
- ):
- # Treat this as a return event, this hit is now doable by someone else
- agent = self.get_assigned_agent()
- if agent is not None:
- # mark the agent as having returned the HIT, to
- # free any running tasks and have Blueprint decide on cleanup.
- agent.update_status(AgentState.STATUS_RETURNED)
- self.set_db_status(external_status)
-
- return self.db_status
-
- def launch(self, task_url: str) -> None:
- """Create this HIT on MTurk (making it availalbe) and register the ids in the local db"""
- task_run = self.get_assignment().get_task_run()
- duration = task_run.get_task_config().assignment_duration_in_seconds
- run_id = task_run.db_id
- hit_type_id = self.datastore.get_run(run_id)["hit_type_id"]
- requester = self.get_requester()
- client = self._get_client(requester._requester_name)
- frame_height = 650
- hit_link, hit_id, response = create_hit_with_hit_type(
- client, frame_height, task_url, hit_type_id
- )
- # TODO(OWN) get this link to the frontend
- print(hit_link)
-
- # We create a hit for this unit, but note that this unit may not
- # necessarily match with the same HIT that was launched for it.
- self.datastore.new_hit(hit_id, hit_link, duration, run_id)
- self.set_db_status(AssignmentState.LAUNCHED)
- return None
-
- def expire(self) -> float:
- """
- Send a request to expire the HIT, and if it's not assigned return,
- otherwise just return the maximum assignment duration
- """
- delay = 0
- if self.get_status() == AssignmentState.ASSIGNED:
- # The assignment is currently being worked on,
- # so we will set the wait time to be the
- # amount of time we granted for working on this assignment
- if self.assignment_time_in_seconds is not None:
- delay = self.assignment_time_in_seconds
- mturk_hit_id = self.get_mturk_hit_id()
- requester = self.get_requester()
- client = self._get_client(requester._requester_name)
- if mturk_hit_id is not None:
- expire_hit(client, mturk_hit_id)
- return delay
- else:
- unassigned_hit_ids = self.datastore.get_unassigned_hit_ids(self.task_run_id)
-
- if len(unassigned_hit_ids) == 0:
- logger.warning(
- f"Number of unassigned hit IDs more than 1; Potential RACE CONDITION"
- )
- return delay
- hit_id = unassigned_hit_ids[0]
- expire_hit(client, hit_id)
- self.datastore.register_assignment_to_hit(hit_id, self.db_id)
- self.set_db_status(AssignmentState.EXPIRED)
- return delay
-
- def is_expired(self) -> bool:
- """
- Determine if this unit is expired as according to the vendor.
-
- In this case, we keep track of the expiration locally by refreshing
- the hit's status and seeing if we've expired.
- """
- return self.get_status() == AssignmentState.EXPIRED
-
- @staticmethod
- def new(
- db: "MephistoDB", assignment: "Assignment", index: int, pay_amount: float
- ) -> "Unit":
- """Create a Unit for the given assignment"""
- return MTurkUnit._register_unit(
- db, assignment, index, pay_amount, PROVIDER_TYPE
- )
diff --git a/mephisto/providers/mturk/mturk_utils.py b/mephisto/providers/mturk/mturk_utils.py
index a2a1c6fa7..2248e7f86 100644
--- a/mephisto/providers/mturk/mturk_utils.py
+++ b/mephisto/providers/mturk/mturk_utils.py
@@ -4,701 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-import boto3
-import os
-import json
-import re
-from typing import Dict, Optional, Tuple, List, Any, TYPE_CHECKING
-from datetime import datetime
-
-from botocore import client
-from botocore.exceptions import ClientError
-from botocore.exceptions import ProfileNotFound
-from botocore.config import Config
-from omegaconf import DictConfig
-
-from mephisto.core.logger_core import get_logger
-from mephisto.core.config_handler import get_config_arg
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-if TYPE_CHECKING:
- from mephisto.data_model.task_config import TaskConfig
-
-MTURK_TASK_FEE = 0.2
-MTURK_BONUS_FEE = 0.2
-SANDBOX_ENDPOINT = "https://mturk-requester-sandbox.us-east-1.amazonaws.com"
-
-MTurkClient = Any
-
-MTURK_LOCALE_REQUIREMENT = "00000000000000000071"
-
-botoconfig = Config(retries=dict(max_attempts=10))
-
-
-def client_is_sandbox(client: MTurkClient) -> bool:
- """
- Determine if the given client is communicating with
- the live server or a sandbox
- """
- return client.meta.endpoint_url == SANDBOX_ENDPOINT
-
-
-def check_aws_credentials(profile_name: str) -> bool:
- try:
- # Check existing credentials
- boto3.Session(profile_name=profile_name)
- return True
- except ProfileNotFound:
- return False
-
-
-def setup_aws_credentials(
- profile_name: str, register_args: Optional[DictConfig] = None
-) -> bool:
- try:
- # Check existing credentials
- boto3.Session(profile_name=profile_name)
- return True
- except ProfileNotFound:
- # Setup new credentials
- if register_args is not None:
- aws_access_key_id = register_args.access_key_id
- aws_secret_access_key = register_args.secret_access_key
- else:
- print(
- f"AWS credentials for {profile_name} not found. Please create "
- "an IAM user with "
- "programmatic access and AdministratorAccess policy at "
- 'https://console.aws.amazon.com/iam/ (On the "Set permissions" '
- 'page, choose "Attach existing policies directly" and then select '
- '"AdministratorAccess" policy). After creating the IAM user, '
- "please enter the user's Access Key ID and Secret Access "
- "Key below:"
- )
- aws_access_key_id = input("Access Key ID: ")
- aws_secret_access_key = input("Secret Access Key: ")
- if not os.path.exists(os.path.expanduser("~/.aws/")):
- os.makedirs(os.path.expanduser("~/.aws/"))
- aws_credentials_file_path = "~/.aws/credentials"
- aws_credentials_file_string = None
- expanded_aws_file_path = os.path.expanduser(aws_credentials_file_path)
- if os.path.exists(expanded_aws_file_path):
- with open(expanded_aws_file_path, "r") as aws_credentials_file:
- aws_credentials_file_string = aws_credentials_file.read()
- with open(expanded_aws_file_path, "a+") as aws_credentials_file:
- # Clean up file
- if aws_credentials_file_string:
- if aws_credentials_file_string.endswith("\n\n"):
- pass
- elif aws_credentials_file_string.endswith("\n"):
- aws_credentials_file.write("\n")
- else:
- aws_credentials_file.write("\n\n")
- # Write login details
- aws_credentials_file.write("[{}]\n".format(profile_name))
- aws_credentials_file.write(
- "aws_access_key_id={}\n".format(aws_access_key_id)
- )
- aws_credentials_file.write(
- "aws_secret_access_key={}\n".format(aws_secret_access_key)
- )
- print(
- "AWS credentials successfully saved in {} file.\n".format(
- aws_credentials_file_path
- )
- )
- return True
-
-
-def calculate_mturk_task_fee(task_amount: float) -> float:
- """
- MTurk Pricing: https://requester.mturk.com/pricing
- 20% fee on the reward and bonus amount (if any) you pay Workers.
- """
- return MTURK_TASK_FEE * task_amount
-
-
-def calculate_mturk_bonus_fee(bonus_amount: float) -> float:
- """
- MTurk Pricing: https://requester.mturk.com/pricing
- 20% fee on the reward and bonus amount (if any) you pay Workers.
- """
- return MTURK_TASK_FEE * bonus_amount
-
-
-def get_requester_balance(client: MTurkClient) -> float:
- """Get the balance for the requester associated with this client"""
- return float(client.get_account_balance()["AvailableBalance"])
-
-
-def check_mturk_balance(client: MTurkClient, balance_needed: float):
- """Checks to see if there is at least balance_needed amount in the
- requester account, returns True if the balance is greater than
- balance_needed
- """
- # Test that you can connect to the API by checking your account balance
- # In Sandbox this always returns $10,000
- try:
- user_balance = float(client.get_account_balance()["AvailableBalance"])
- except ClientError as e:
- if e.response["Error"]["Code"] == "RequestError":
- print(
- "ERROR: To use the MTurk API, you will need an Amazon Web "
- "Services (AWS) Account. Your AWS account must be linked to "
- "your Amazon Mechanical Turk Account. Visit "
- "https://requestersandbox.mturk.com/developer to get started. "
- "(Note: if you have recently linked your account, please wait "
- "for a couple minutes before trying again.)\n"
- )
- quit()
- else:
- raise
-
- if user_balance < balance_needed:
- print(
- "You might not have enough money in your MTurk account. Please go "
- "to https://requester.mturk.com/account and increase your balance "
- "to at least ${}, and then try again.".format(balance_needed)
- )
- return False
- else:
- return True
-
-
-def create_hit_config(
- opt: Dict[str, Any], task_description: str, unique_worker: bool, is_sandbox: bool
-) -> None:
- """Writes a HIT config to file"""
- mturk_submit_url = "https://workersandbox.mturk.com/mturk/externalSubmit"
- if not is_sandbox:
- mturk_submit_url = "https://www.mturk.com/mturk/externalSubmit"
- hit_config = {
- "task_description": task_description,
- "is_sandbox": is_sandbox,
- "mturk_submit_url": mturk_submit_url,
- "unique_worker": unique_worker,
- "frame_height": opt.get("frame_height", 650),
- "allow_reviews": opt.get("allow_reviews", False),
- "block_mobile": opt.get("block_mobile", True),
- # Populate the chat pane title from chat_title, defaulting to the
- # hit_title if the task provides no chat_title
- "chat_title": opt.get("chat_title", opt.get("hit_title", "Live Chat")),
- "template_type": opt.get("frontend_template_type", "default"),
- }
- hit_config_file_path = os.path.join(opt["tmp_dir"], "hit_config.json")
- if os.path.exists(hit_config_file_path):
- os.remove(hit_config_file_path)
- with open(hit_config_file_path, "w") as hit_config_file:
- hit_config_file.write(json.dumps(hit_config))
-
-
-def delete_qualification(client: MTurkClient, qualification_id: str) -> None:
- """Deletes a qualification by id"""
- client.delete_qualification_type(QualificationTypeId=qualification_id)
-
-
-def find_qualification(
- client: MTurkClient, qualification_name: str, must_be_owned: bool = True
-) -> Tuple[bool, Optional[str]]:
- """Query amazon to find the existing qualification name, return the Id,
- otherwise return none.
- If must_be_owned is true, it only returns qualifications owned by the user.
- Will return False if it finds another's qualification
-
- The return format is (meets_owner_constraint, qual_id)
- """
- # Search for the qualification owned by the current user
- response = client.list_qualification_types(
- Query=qualification_name, MustBeRequestable=True, MustBeOwnedByCaller=True
- )
- for qualification in response["QualificationTypes"]:
- if qualification["Name"] == qualification_name:
- return (True, qualification["QualificationTypeId"])
-
- # Qualification was not found to exist, check to see if someone else has it
- response = client.list_qualification_types(
- Query=qualification_name, MustBeRequestable=True, MustBeOwnedByCaller=False
- )
-
- for qualification in response["QualificationTypes"]:
- if qualification["Name"] == qualification_name:
- if must_be_owned:
- return (False, qualification["QualificationTypeId"])
- return (True, qualification["QualificationTypeId"])
- return (True, None)
-
-
-def find_or_create_qualification(
- client: MTurkClient,
- qualification_name: str,
- description: str,
- must_be_owned: bool = True,
-) -> Optional[str]:
- """Query amazon to find the existing qualification name, return the Id. If
- it exists and must_be_owned is true but we don't own it, this returns none.
- If it doesn't exist, the qualification is created
- """
- qual_usable, qual_id = find_qualification(
- client, qualification_name, must_be_owned=must_be_owned
- )
-
- if qual_usable is False:
- return None
-
- if qual_id is not None:
- return qual_id
-
- # Create the qualification, as it doesn't exist yet
- response = client.create_qualification_type(
- Name=qualification_name,
- Description=description,
- QualificationTypeStatus="Active",
- )
- return response["QualificationType"]["QualificationTypeId"]
-
-
-def give_worker_qualification(
- client: MTurkClient,
- worker_id: str,
- qualification_id: str,
- value: Optional[int] = None,
-) -> None:
- """Give a qualification to the given worker"""
- if value is not None:
- client.associate_qualification_with_worker(
- QualificationTypeId=qualification_id,
- WorkerId=worker_id,
- IntegerValue=value,
- SendNotification=False,
- )
- else:
- client.associate_qualification_with_worker(
- QualificationTypeId=qualification_id,
- WorkerId=worker_id,
- IntegerValue=1,
- SendNotification=False,
- )
-
-
-def remove_worker_qualification(
- client: MTurkClient, worker_id: str, qualification_id: str, reason: str = ""
-) -> None:
- """Give a qualification to the given worker"""
- client.disassociate_qualification_from_worker(
- QualificationTypeId=qualification_id, WorkerId=worker_id, Reason=reason
- )
-
-
-def convert_mephisto_qualifications(
- client: MTurkClient, qualifications: List[Dict[str, Any]]
-):
- """Convert qualifications from mephisto's format to MTurk's"""
- converted_qualifications = []
- for qualification in qualifications:
- converted = {}
- mturk_keys = [
- "QualificationTypeId",
- "Comparator",
- "IntegerValue",
- "IntegerValues",
- "LocaleValues",
- "ActionsGuarded",
- ]
- for key in mturk_keys:
- converted[key] = qualification.get(key)
-
- if converted["QualificationTypeId"] is None:
- qualification_name = qualification["qualification_name"]
- if client_is_sandbox(client):
- qualification_name += "_sandbox"
- qual_id = find_or_create_qualification(
- client,
- qualification_name,
- "Qualification required for Mephisto-launched tasks",
- False,
- )
- if qual_id is None:
- # TODO log more loudly that this qualification is being skipped?
- print(
- f"Qualification name {qualification_name} can not be found or created on MTurk"
- )
- converted["QualificationTypeId"] = qual_id
-
- if converted["Comparator"] is None:
- converted["Comparator"] = qualification["comparator"]
-
- # if no Mturk Values are set, pull from the qualification's value
- if (
- converted["IntegerValue"] is None
- and converted["IntegerValues"] is None
- and converted["LocaleValues"] is None
- ):
- value = qualification["value"]
- if isinstance(value, list):
- converted["IntegerValues"] = value
- elif isinstance(value, int):
- converted["IntegerValue"] = value
-
- # IntegerValue is deprecated, and needs conversion to IntegerValues
- if converted["IntegerValue"] is not None:
- converted["IntegerValues"] = [converted["IntegerValue"]]
- del converted["IntegerValue"]
-
- if converted["IntegerValues"] is None:
- del converted["IntegerValues"]
-
- if converted["LocaleValues"] is None:
- del converted["LocaleValues"]
-
- if converted["ActionsGuarded"] is None:
- converted["ActionsGuarded"] = "DiscoverPreviewAndAccept"
-
- converted_qualifications.append(converted)
- return converted_qualifications
-
-
-def create_hit_type(
- client: MTurkClient,
- task_config: "TaskConfig",
- qualifications: List[Dict[str, Any]],
- auto_approve_delay: Optional[int] = 7 * 24 * 3600, # default 1 week
-) -> str:
- """Create a HIT type to be used to generate HITs of the requested params"""
- hit_title = task_config.task_title
- hit_description = task_config.task_description
- hit_keywords = ",".join(task_config.task_tags)
- hit_reward = task_config.task_reward
- assignment_duration_in_seconds = task_config.assignment_duration_in_seconds
- existing_qualifications = convert_mephisto_qualifications(client, qualifications)
-
- # If the user hasn't specified a location qualification, we assume to
- # restrict the HIT to some english-speaking countries.
- locale_requirements: List[Any] = []
- has_locale_qual = False
- if existing_qualifications is not None:
- for q in existing_qualifications:
- if q["QualificationTypeId"] == MTURK_LOCALE_REQUIREMENT:
- has_locale_qual = True
- locale_requirements += existing_qualifications
-
- if not has_locale_qual and not client_is_sandbox(client):
- allowed_locales = get_config_arg("mturk", "allowed_locales")
- if allowed_locales is None:
- allowed_locales = [
- {"Country": "US"},
- {"Country": "CA"},
- {"Country": "GB"},
- {"Country": "AU"},
- {"Country": "NZ"},
- ]
- locale_requirements.append(
- {
- "QualificationTypeId": MTURK_LOCALE_REQUIREMENT,
- "Comparator": "In",
- "LocaleValues": allowed_locales,
- "ActionsGuarded": "DiscoverPreviewAndAccept",
- }
- )
-
- # Create the HIT type
- response = client.create_hit_type(
- AutoApprovalDelayInSeconds=auto_approve_delay,
- AssignmentDurationInSeconds=assignment_duration_in_seconds,
- Reward=str(hit_reward),
- Title=hit_title,
- Keywords=hit_keywords,
- Description=hit_description,
- QualificationRequirements=locale_requirements,
- )
- hit_type_id = response["HITTypeId"]
- return hit_type_id
-
-
-def create_hit_with_hit_type(
- client: MTurkClient,
- frame_height: int,
- page_url: str,
- hit_type_id: str,
- num_assignments: int = 1,
-) -> Tuple[str, str, Dict[str, Any]]:
- """Creates the actual HIT given the type and page to direct clients to"""
- page_url = page_url.replace("&", "&")
- amazon_ext_url = (
- "http://mechanicalturk.amazonaws.com/"
- "AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd"
- )
- question_data_struture = (
- ''
- "{}" # noqa: E131
- "{}"
- ""
- "".format(amazon_ext_url, page_url, 650)
- )
-
- is_sandbox = client_is_sandbox(client)
-
- # Create the HIT
- response = client.create_hit_with_hit_type(
- HITTypeId=hit_type_id,
- MaxAssignments=num_assignments,
- LifetimeInSeconds=60 * 60 * 24 * 31,
- Question=question_data_struture,
- )
-
- # The response included several fields that will be helpful later
- hit_type_id = response["HIT"]["HITTypeId"]
- hit_id = response["HIT"]["HITId"]
-
- # Construct the hit URL
- url_target = "workersandbox"
- if not is_sandbox:
- url_target = "www"
- hit_link = "https://{}.mturk.com/mturk/preview?groupId={}".format(
- url_target, hit_type_id
- )
- return hit_link, hit_id, response
-
-
-def expire_hit(client: MTurkClient, hit_id: str):
- # Update expiration to a time in the past, the HIT expires instantly
- past_time = datetime(2015, 1, 1)
- client.update_expiration_for_hit(HITId=hit_id, ExpireAt=past_time)
-
-
-def setup_sns_topic(
- session: boto3.Session, task_name: str, server_url: str, task_run_id: str
-) -> str:
- """Create an sns topic and return the arn identifier"""
- # Create the topic and subscribe to it so that our server receives notifs
- client = session.client("sns", region_name="us-east-1", config=botoconfig)
- pattern = re.compile("[^a-zA-Z0-9_-]+")
- filtered_task_name = pattern.sub("", task_name)
- response = client.create_topic(Name=filtered_task_name)
- arn = response["TopicArn"]
- topic_sub_url = "{}/sns_posts?task_run_id={}".format(server_url, task_run_id)
- client.subscribe(TopicArn=arn, Protocol="https", Endpoint=topic_sub_url)
- response = client.get_topic_attributes(TopicArn=arn)
- policy_json = """{{
- "Version": "2008-10-17",
- "Id": "{}/MTurkOnlyPolicy",
- "Statement": [
- {{
- "Sid": "MTurkOnlyPolicy",
- "Effect": "Allow",
- "Principal": {{
- "Service": "mturk-requester.amazonaws.com"
- }},
- "Action": "SNS:Publish",
- "Resource": "{}"
- }}
- ]}}""".format(
- arn, arn
- )
- client.set_topic_attributes(
- TopicArn=arn, AttributeName="Policy", AttributeValue=policy_json
- )
- return arn
-
-
-def subscribe_to_hits(client: MTurkClient, hit_type_id: str, sns_arn: str) -> None:
- """Subscribe an sns channel to the specific hit type"""
- # Get the mturk client and create notifications for our hits
- client.update_notification_settings(
- HITTypeId=hit_type_id,
- Notification={
- "Destination": sns_arn,
- "Transport": "SNS",
- "Version": "2006-05-05",
- "EventTypes": [
- "AssignmentAbandoned",
- "AssignmentReturned",
- "AssignmentSubmitted",
- ],
- },
- Active=True,
- )
-
-
-def send_test_notif(client: MTurkClient, topic_arn: str, event_type: str) -> None:
- """
- Send a test notification of the given event type to the sns
- queue associated with the given arn
- """
- client.send_test_event_notification(
- Notification={
- "Destination": topic_arn,
- "Transport": "SNS",
- "Version": "2006-05-05",
- "EventTypes": [
- "AssignmentAbandoned",
- "AssignmentReturned",
- "AssignmentSubmitted",
- ],
- },
- TestEventType=event_type,
- )
-
-
-def delete_sns_topic(session: boto3.Session, topic_arn: str) -> None:
- """Remove the sns queue of the given identifier"""
- client = session.client("sns", region_name="us-east-1", config=botoconfig)
- client.delete_topic(TopicArn=topic_arn)
-
-
-def get_hit(client: MTurkClient, hit_id: str) -> Dict[str, Any]:
- """Get hit from mturk by hit_id"""
- return client.get_hit(HITId=hit_id)
-
-
-def get_assignment(client: MTurkClient, assignment_id: str) -> Dict[str, Any]:
- """Gets assignment from mturk by assignment_id. Only works if the
- assignment is in a completed state
- """
- return client.get_assignment(AssignmentId=assignment_id)
-
-
-def get_assignments_for_hit(client: MTurkClient, hit_id: str) -> List[Dict[str, Any]]:
- """Get completed assignments for a hit"""
- assignments_info = client.list_assignments_for_hit(HITId=hit_id)
- return assignments_info.get("Assignments", [])
-
-
-def approve_work(
- client: MTurkClient, assignment_id: str, override_rejection: bool = False
-) -> None:
- """approve work for a given assignment through the mturk client"""
- try:
- client.approve_assignment(
- AssignmentId=assignment_id, OverrideRejection=override_rejection
- )
- except Exception as e:
- # TODO(#93) Break down this error to the many reasons why approve may fail,
- # only silently pass on approving an already approved assignment
- logger.exception(
- f"Approving MTurk assignment failed, likely because it has auto-approved. Details: {e}",
- exc_info=True,
- )
-
-
-def reject_work(client: MTurkClient, assignment_id: str, reason: str) -> None:
- """reject work for a given assignment through the mturk client"""
- try:
- client.reject_assignment(AssignmentId=assignment_id, RequesterFeedback=reason)
- except Exception as e:
- # TODO(#93) Break down this error to the many reasons why approve may fail,
- # only silently pass on approving an already approved assignment
- logger.exception(
- f"Rejecting MTurk assignment failed, likely because it has auto-approved. Details:{e}",
- exc_info=True,
- )
-
-
-def approve_assignments_for_hit(
- client: MTurkClient, hit_id: str, override_rejection: bool = False
-):
- """Approve work for assignments associated with a given hit, through
- mturk client
- """
- assignments = get_assignments_for_hit(client, hit_id)
- for assignment in assignments:
- assignment_id = assignment["AssignmentId"]
- client.approve_assignment(
- AssignmentId=assignment_id, OverrideRejection=override_rejection
- )
-
-
-def block_worker(client: MTurkClient, worker_id: str, reason: str) -> None:
- """Block a worker by id using the mturk client, passes reason along"""
- res = client.create_worker_block(WorkerId=worker_id, Reason=reason)
-
-
-def unblock_worker(client: MTurkClient, worker_id: str, reason: str) -> None:
- """Remove a block on the given worker"""
- client.delete_worker_block(WorkerId=worker_id, Reason=reason)
-
-
-def is_worker_blocked(client: MTurkClient, worker_id: str) -> bool:
- """Determine if the given worker is blocked by this client"""
- blocks = client.list_worker_blocks(MaxResults=100)["WorkerBlocks"]
- blocked_ids = [x["WorkerId"] for x in blocks]
- return worker_id in blocked_ids
-
-
-def pay_bonus(
- client: MTurkClient,
- worker_id: str,
- bonus_amount: float,
- assignment_id: str,
- reason: str,
- unique_request_token: str,
-) -> bool:
- """Handles paying bonus to a Turker, fails for insufficient funds.
- Returns True on success and False on failure
- """
- total_cost = bonus_amount + calculate_mturk_bonus_fee(bonus_amount)
- if not check_mturk_balance(client, balance_needed=total_cost):
- print("Cannot pay bonus. Reason: Insufficient " "funds in your MTurk account.")
- return False
-
- client.send_bonus(
- WorkerId=worker_id,
- BonusAmount=str(bonus_amount),
- AssignmentId=assignment_id,
- Reason=reason,
- UniqueRequestToken=unique_request_token,
- )
-
- return True
-
-
-def email_worker(
- client: MTurkClient, worker_id: str, subject: str, message_text: str
-) -> Tuple[bool, str]:
- """Send an email to a worker through the mturk client"""
- response = client.notify_workers(
- Subject=subject, MessageText=message_text, WorkerIds=[worker_id]
- )
- if len(response["NotifyWorkersFailureStatuses"]) > 0:
- failure_message = response["NotifyWorkersFailureStatuses"][0]
- return (False, failure_message["NotifyWorkersFailureMessage"])
- else:
- return (True, "")
-
-
-def get_outstanding_hits(client: MTurkClient) -> Dict[str, List[Dict[str, Any]]]:
- """Return the HITs sorted by HITTypeId that are still on the MTurk Server"""
- new_hits = client.list_hits(MaxResults=100)
- all_hits = new_hits["HITs"]
- while len(new_hits["HITs"]) > 0:
- new_hits = client.list_hits(MaxResults=100, NextToken=new_hits["NextToken"])
- all_hits += new_hits["HITs"]
-
- hit_by_type: Dict[str, List[Dict[str, Any]]] = {}
- for h in all_hits:
- hit_type = h["HITTypeId"]
- if hit_type not in hit_by_type:
- hit_by_type[hit_type] = []
- hit_by_type[hit_type].append(h)
-
- return hit_by_type
-
-
-def expire_and_dispose_hits(
- client: MTurkClient, hits: List[Dict[str, Any]]
-) -> List[Dict[str, Any]]:
- """
- Loops over attempting to expire and dispose any hits in the hits list that can be disposed
-
- Returns any HITs that could not be disposed of
- """
- non_disposed_hits = []
- for h in hits:
- try:
- client.delete_hit(HITId=h["HITId"])
- except:
- client.update_expiration_for_hit(
- HITId=h["HITId"], ExpireAt=datetime(2015, 1, 1)
- )
- non_disposed_hits.append(h)
- return non_disposed_hits
+from mephisto.abstractions.providers.mturk.mturk_utils import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mturk/mturk_worker.py b/mephisto/providers/mturk/mturk_worker.py
index 4ce2ae8f2..dccb4da9b 100644
--- a/mephisto/providers/mturk/mturk_worker.py
+++ b/mephisto/providers/mturk/mturk_worker.py
@@ -4,200 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.worker import Worker
-from mephisto.data_model.requester import Requester
-from mephisto.providers.mturk.provider_type import PROVIDER_TYPE
-from mephisto.providers.mturk.mturk_utils import (
- pay_bonus,
- block_worker,
- unblock_worker,
- is_worker_blocked,
- give_worker_qualification,
- remove_worker_qualification,
+from mephisto.abstractions.providers.mturk.mturk_worker import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
)
-from mephisto.providers.mturk.mturk_requester import MTurkRequester
-
-from uuid import uuid4
-
-from typing import List, Optional, Tuple, Dict, Mapping, Any, cast, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.providers.mturk.mturk_datastore import MTurkDatastore
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.assignment import Unit
- from mephisto.providers.mturk.mturk_unit import MTurkUnit
- from mephisto.providers.mturk.mturk_requester import MTurkRequester
-
-from mephisto.core.logger_core import get_logger
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-
-class MTurkWorker(Worker):
- """
- This class represents an individual - namely a person. It maintains components of ongoing identity for a user.
- """
-
- # Ensure inherited methods use this level's provider type
- PROVIDER_TYPE = PROVIDER_TYPE
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- super().__init__(db, db_id, row=row)
- self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
- self.PROVIDER_TYPE
- )
- self._worker_name = self.worker_name # sandbox workers use a different name
-
- @classmethod
- def get_from_mturk_worker_id(
- cls, db: "MephistoDB", mturk_worker_id: str
- ) -> Optional["MTurkWorker"]:
- """Get the MTurkWorker from the given worker_id"""
- if cls.PROVIDER_TYPE != PROVIDER_TYPE:
- mturk_worker_id += "_sandbox"
- workers = db.find_workers(
- worker_name=mturk_worker_id, provider_type=cls.PROVIDER_TYPE
- )
- if len(workers) == 0:
- # TODO warn?
- return None
- return workers[0]
-
- def get_mturk_worker_id(self):
- return self._worker_name
-
- def _get_client(self, requester_name: str) -> Any:
- """
- Get an mturk client for usage with mturk_utils
- """
- return self.datastore.get_client_for_requester(requester_name)
-
- def grant_crowd_qualification(
- self, qualification_name: str, value: int = 1
- ) -> None:
- """
- Grant a qualification by the given name to this worker. Check the local
- MTurk db to find the matching MTurk qualification to grant, and pass
- that. If no qualification exists, try to create one.
-
- In creating a new qualification, Mephisto resolves the ambiguity over which
- requester to associate that qualification with by using the FIRST requester
- of the given account type (either `mturk` or `mturk_sandbox`)
- """
- mturk_qual_details = self.datastore.get_qualification_mapping(
- qualification_name
- )
- if mturk_qual_details is not None:
- requester = Requester(self.db, mturk_qual_details["requester_id"])
- qualification_id = mturk_qual_details["mturk_qualification_id"]
- else:
- target_type = (
- "mturk_sandbox" if qualification_name.endswith("sandbox") else "mturk"
- )
- requester = self.db.find_requesters(provider_type=target_type)[0]
- assert isinstance(
- requester, MTurkRequester
- ), "find_requesters must return mturk requester for given provider types"
- qualification_id = requester._create_new_mturk_qualification(
- qualification_name
- )
- assert isinstance(
- requester, MTurkRequester
- ), "Must be an MTurk requester for MTurk quals"
- client = self._get_client(requester._requester_name)
- give_worker_qualification(
- client, self.get_mturk_worker_id(), qualification_id, value
- )
- return None
-
- def revoke_crowd_qualification(self, qualification_name: str) -> None:
- """
- Revoke the qualification by the given name from this worker. Check the local
- MTurk db to find the matching MTurk qualification to revoke, pass if
- no such qualification exists.
- """
- mturk_qual_details = self.datastore.get_qualification_mapping(
- qualification_name
- )
- if mturk_qual_details is None:
- logger.error(
- f"No locally stored MTurk qualification to revoke for name {qualification_name}"
- )
- return None
-
- requester = Requester(self.db, mturk_qual_details["requester_id"])
- assert isinstance(
- requester, MTurkRequester
- ), "Must be an MTurk requester from MTurk quals"
- client = self._get_client(requester._requester_name)
- qualification_id = mturk_qual_details["mturk_qualification_id"]
- remove_worker_qualification(
- client, self.get_mturk_worker_id(), qualification_id
- )
- return None
-
- def bonus_worker(
- self, amount: float, reason: str, unit: Optional["Unit"] = None
- ) -> Tuple[bool, str]:
- """Bonus this worker for work any reason. Return tuple of success and failure message"""
- if unit is None:
- # TODO(WISH) implement
- return False, "bonusing via compensation tasks not yet available"
-
- unit = cast("MTurkUnit", unit)
- requester = unit.get_assignment().get_task_run().get_requester()
- client = self._get_client(requester._requester_name)
- mturk_assignment_id = unit.get_mturk_assignment_id()
- assert mturk_assignment_id is not None, "Cannot bonus for a unit with no agent"
- pay_bonus(
- client, self._worker_name, amount, mturk_assignment_id, reason, str(uuid4())
- )
- return True, ""
-
- def block_worker(
- self,
- reason: str,
- unit: Optional["Unit"] = None,
- requester: Optional["Requester"] = None,
- ) -> Tuple[bool, str]:
- """Block this worker for a specified reason. Return success of block"""
- if unit is None and requester is None:
- # TODO(WISH) soft block from all requesters? Maybe have the master
- # requester soft block?
- return (
- False,
- "Blocking without a unit or requester not yet supported for MTurkWorkers",
- )
- elif unit is not None and requester is None:
- requester = unit.get_assignment().get_task_run().get_requester()
- requester = cast("MTurkRequester", requester)
- client = self._get_client(requester._requester_name)
- block_worker(client, self._worker_name, reason)
- return True, ""
-
- def unblock_worker(self, reason: str, requester: "Requester") -> bool:
- """unblock a blocked worker for the specified reason. Return success of unblock"""
- requester = cast("MTurkRequester", requester)
- client = self._get_client(requester._requester_name)
- unblock_worker(client, self._worker_name, reason)
- return True
-
- def is_blocked(self, requester: "Requester") -> bool:
- """Determine if a worker is blocked"""
- requester = cast("MTurkRequester", requester)
- client = self._get_client(requester._requester_name)
- return is_worker_blocked(client, self._worker_name)
-
- def is_eligible(self, task_run: "TaskRun") -> bool:
- """
- Qualifications are handled primarily by MTurk, so if a worker is able to get
- through to be able to access the task, they should be eligible
- """
- return True
-
- @staticmethod
- def new(db: "MephistoDB", worker_id: str) -> "Worker":
- return MTurkWorker._register_worker(db, worker_id, PROVIDER_TYPE)
diff --git a/mephisto/providers/mturk/provider_type.py b/mephisto/providers/mturk/provider_type.py
index 240b3ffa5..555ca4c5b 100644
--- a/mephisto/providers/mturk/provider_type.py
+++ b/mephisto/providers/mturk/provider_type.py
@@ -4,4 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-PROVIDER_TYPE = "mturk"
+from mephisto.abstractions.providers.mturk.provider_type import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mturk_sandbox/__init__.py b/mephisto/providers/mturk_sandbox/__init__.py
index 240697e32..cbafdf3e4 100644
--- a/mephisto/providers/mturk_sandbox/__init__.py
+++ b/mephisto/providers/mturk_sandbox/__init__.py
@@ -3,3 +3,13 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.providers.mturk_sandbox import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mturk_sandbox/provider_type.py b/mephisto/providers/mturk_sandbox/provider_type.py
index 2c944bfe9..46254c7ad 100644
--- a/mephisto/providers/mturk_sandbox/provider_type.py
+++ b/mephisto/providers/mturk_sandbox/provider_type.py
@@ -4,4 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-PROVIDER_TYPE = "mturk_sandbox"
+from mephisto.abstractions.providers.mturk_sandbox.provider_type import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mturk_sandbox/sandbox_mturk_agent.py b/mephisto/providers/mturk_sandbox/sandbox_mturk_agent.py
index f15a92b87..027925ec6 100644
--- a/mephisto/providers/mturk_sandbox/sandbox_mturk_agent.py
+++ b/mephisto/providers/mturk_sandbox/sandbox_mturk_agent.py
@@ -4,38 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.providers.mturk_sandbox.provider_type import PROVIDER_TYPE
-from mephisto.providers.mturk.mturk_agent import MTurkAgent
-
-from typing import Any, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.providers.mturk.requester import MTurkRequester
- from mephisto.data_model.assignment import Unit
- from mephisto.data_model.agent import Agent
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.worker import Worker
-
-
-class SandboxMTurkAgent(MTurkAgent):
- """
- Wrapper for a regular MTurk agent that will only communicate with sandbox
- """
-
- # Ensure inherited methods use this level's provider type
- PROVIDER_TYPE = PROVIDER_TYPE
-
- def _get_client(self) -> Any:
- """
- Get an mturk client for usage with mturk_utils for this agent
- """
- unit = self.get_unit()
- requester: "MTurkRequester" = unit.get_requester()
- return self.datastore.get_sandbox_client_for_requester(
- requester._requester_name
- )
-
- @staticmethod
- def new(db: "MephistoDB", worker: "Worker", unit: "Unit") -> "Agent":
- """Create an agent for this worker to be used for work on the given Unit."""
- return SandboxMTurkAgent._register_agent(db, worker, unit, PROVIDER_TYPE)
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_agent import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mturk_sandbox/sandbox_mturk_provider.py b/mephisto/providers/mturk_sandbox/sandbox_mturk_provider.py
index e2514e0e0..f58c674b0 100644
--- a/mephisto/providers/mturk_sandbox/sandbox_mturk_provider.py
+++ b/mephisto/providers/mturk_sandbox/sandbox_mturk_provider.py
@@ -4,73 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.providers.mturk_sandbox.provider_type import PROVIDER_TYPE
-from mephisto.providers.mturk.mturk_provider import MTurkProvider, MTurkProviderArgs
-from mephisto.providers.mturk_sandbox.sandbox_mturk_agent import SandboxMTurkAgent
-from mephisto.providers.mturk_sandbox.sandbox_mturk_requester import (
- SandboxMTurkRequester,
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_provider import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
)
-from mephisto.providers.mturk_sandbox.sandbox_mturk_unit import SandboxMTurkUnit
-from mephisto.providers.mturk_sandbox.sandbox_mturk_worker import SandboxMTurkWorker
-from mephisto.core.registry import register_mephisto_abstraction
-
-import os
-from dataclasses import dataclass
-
-from typing import Any, ClassVar, Type, List, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.assignment import Unit
- from mephisto.data_model.worker import Worker
- from mephisto.data_model.requester import Requester
- from mephisto.data_model.agent import Agent
-
-
-@dataclass
-class SandboxMTurkProviderArgs(MTurkProviderArgs):
- """Provider args for a sandbox MTurk provider"""
-
- _provider_type: str = PROVIDER_TYPE
-
-
-@register_mephisto_abstraction()
-class SandboxMTurkProvider(MTurkProvider):
- """
- Mock implementation of a CrowdProvider that stores everything
- in a local state in the class for use in tests.
- """
-
- # Ensure inherited methods use this level's provider type
- PROVIDER_TYPE = PROVIDER_TYPE
-
- UnitClass: ClassVar[Type["Unit"]] = SandboxMTurkUnit
-
- RequesterClass: ClassVar[Type["Requester"]] = SandboxMTurkRequester
-
- WorkerClass: ClassVar[Type["Worker"]] = SandboxMTurkWorker
-
- AgentClass: ClassVar[Type["Agent"]] = SandboxMTurkAgent
-
- ArgsClass = SandboxMTurkProviderArgs
-
- SUPPORTED_TASK_TYPES: ClassVar[List[str]] = [
- # TODO
- ]
-
- def _get_client(self, requester_name: str) -> Any:
- """
- Get an mturk client for usage with mturk_utils
- """
- return self.datastore.get_sandbox_client_for_requester(requester_name)
-
- @classmethod
- def get_wrapper_js_path(cls):
- """
- Return the path to the `wrap_crowd_source.js` file for this
- provider to be deployed to the server
- """
- return os.path.join(os.path.dirname(__file__), "wrap_crowd_source.js")
-
- def cleanup_qualification(self, qualification_name: str) -> None:
- """Remove the qualification from the sandbox server"""
- return super().cleanup_qualification(f"{qualification_name}_sandbox")
diff --git a/mephisto/providers/mturk_sandbox/sandbox_mturk_requester.py b/mephisto/providers/mturk_sandbox/sandbox_mturk_requester.py
index 03da0065c..353b63170 100644
--- a/mephisto/providers/mturk_sandbox/sandbox_mturk_requester.py
+++ b/mephisto/providers/mturk_sandbox/sandbox_mturk_requester.py
@@ -4,55 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.providers.mturk.mturk_requester import MTurkRequester
-from mephisto.providers.mturk_sandbox.provider_type import PROVIDER_TYPE
-
-from typing import Any, Optional, Mapping, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.requester import Requester
- from mephisto.providers.mturk.mturk_datastore import MTurkDatastore
-
-
-class SandboxMTurkRequester(MTurkRequester):
- """Wrapper around regular requester that handles removing the appended "sandbox" name"""
-
- # Ensure inherited methods use this level's provider type
- PROVIDER_TYPE = PROVIDER_TYPE
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- super().__init__(db, db_id, row=row)
- self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
- self.PROVIDER_TYPE
- )
- # Use _requester_name to preserve sandbox behavior which
- # utilizes a different requester_name
- assert self.requester_name.endswith(
- "_sandbox"
- ), f"{self.requester_name} is not a sandbox requester"
- self._requester_name = self.requester_name[:-8]
-
- def _get_client(self, requester_name: str) -> Any:
- """
- Get an mturk client for usage with mturk_utils
- """
- return self.datastore.get_sandbox_client_for_requester(requester_name)
-
- def is_sandbox(self) -> bool:
- """
- Determine if this is a requester on sandbox
- """
- return True
-
- # Required functions for a Requester implementation
-
- @staticmethod
- def new(db: "MephistoDB", requester_name: str) -> "Requester":
- if not requester_name.endswith("_sandbox"):
- requester_name += "_sandbox"
- return SandboxMTurkRequester._register_requester(
- db, requester_name, PROVIDER_TYPE
- )
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_requester import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mturk_sandbox/sandbox_mturk_unit.py b/mephisto/providers/mturk_sandbox/sandbox_mturk_unit.py
index cb05f1854..65aaf9eef 100644
--- a/mephisto/providers/mturk_sandbox/sandbox_mturk_unit.py
+++ b/mephisto/providers/mturk_sandbox/sandbox_mturk_unit.py
@@ -4,40 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from datetime import datetime
-
-from mephisto.providers.mturk.mturk_unit import MTurkUnit
-from mephisto.providers.mturk_sandbox.provider_type import PROVIDER_TYPE
-from typing import Any, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.assignment import Unit
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.assignment import Assignment
-
-
-class SandboxMTurkUnit(MTurkUnit):
- """
- This class tracks the status of an individual worker's contribution to a
- higher level assignment. It is the smallest 'unit' of work to complete
- the assignment, and this class is only responsible for checking
- the status of that work itself being done.
- """
-
- # Ensure inherited methods use this level's provider type
- PROVIDER_TYPE = PROVIDER_TYPE
-
- def _get_client(self, requester_name: str) -> Any:
- """
- Get an mturk client for usage with mturk_utils
- """
- return self.datastore.get_sandbox_client_for_requester(requester_name)
-
- @staticmethod
- def new(
- db: "MephistoDB", assignment: "Assignment", index: int, pay_amount: float
- ) -> "Unit":
- """Create a Unit for the given assignment"""
- return SandboxMTurkUnit._register_unit(
- db, assignment, index, pay_amount, PROVIDER_TYPE
- )
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_unit import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/providers/mturk_sandbox/sandbox_mturk_worker.py b/mephisto/providers/mturk_sandbox/sandbox_mturk_worker.py
index 36d08c631..ff0203868 100644
--- a/mephisto/providers/mturk_sandbox/sandbox_mturk_worker.py
+++ b/mephisto/providers/mturk_sandbox/sandbox_mturk_worker.py
@@ -4,59 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.providers.mturk.mturk_worker import MTurkWorker
-from mephisto.providers.mturk_sandbox.provider_type import PROVIDER_TYPE
-
-from typing import Any, Mapping, Optional, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.providers.mturk.mturk_datastore import MTurkDatastore
- from mephisto.data_model.worker import Worker
- from mephisto.data_model.database import MephistoDB
-
-
-class SandboxMTurkWorker(MTurkWorker):
- """
- This class represents an individual - namely a person. It maintains components of ongoing identity for a user.
- """
-
- # Ensure inherited methods use this level's provider type
- PROVIDER_TYPE = PROVIDER_TYPE
-
- def __init__(
- self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None
- ):
- super().__init__(db, db_id, row=row)
- self.datastore: "MTurkDatastore" = self.db.get_datastore_for_provider(
- self.PROVIDER_TYPE
- )
- # sandbox workers use a different name
- self._worker_name = self.worker_name[:-8]
-
- def grant_crowd_qualification(
- self, qualification_name: str, value: int = 1
- ) -> None:
- """
- Grant a qualification by the given name to this worker. Check the local
- MTurk db to find the matching MTurk qualification to grant, and pass
- that. If no qualification exists, try to create one.
- """
- return super().grant_crowd_qualification(qualification_name + "_sandbox", value)
-
- def revoke_crowd_qualification(self, qualification_name: str) -> None:
- """
- Revoke the qualification by the given name from this worker. Check the local
- MTurk db to find the matching MTurk qualification to revoke, pass if
- no such qualification exists.
- """
- return super().revoke_crowd_qualification(qualification_name + "_sandbox")
-
- def _get_client(self, requester_name: str) -> Any:
- """
- Get an mturk client for usage with mturk_utils
- """
- return self.datastore.get_sandbox_client_for_requester(requester_name)
-
- @staticmethod
- def new(db: "MephistoDB", worker_id: str) -> "Worker":
- return MTurkWorker._register_worker(db, worker_id + "_sandbox", PROVIDER_TYPE)
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_worker import *
+import warnings
+
+warnings.warn(
+ "Import of provider content from `mephisto.providers` is going away soon. "
+ "Please replace all of your imports from mephisto.providers "
+ "to mephisto.abstractions.providers. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/scripts/mturk/soft_block_workers_by_mturk_id.py b/mephisto/scripts/mturk/soft_block_workers_by_mturk_id.py
index f4590fd69..a3a69aaaf 100644
--- a/mephisto/scripts/mturk/soft_block_workers_by_mturk_id.py
+++ b/mephisto/scripts/mturk/soft_block_workers_by_mturk_id.py
@@ -4,9 +4,11 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.providers.mturk.utils.script_utils import direct_soft_block_mturk_workers
+from mephisto.abstractions.providers.mturk.utils.script_utils import (
+ direct_soft_block_mturk_workers,
+)
-from mephisto.core.local_database import LocalMephistoDB
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
db = LocalMephistoDB()
reqs = db.find_requesters(provider_type="mturk")
diff --git a/mephisto/server/architects/heroku_architect.py b/mephisto/server/architects/heroku_architect.py
index 069f1bb56..98b5ad58b 100644
--- a/mephisto/server/architects/heroku_architect.py
+++ b/mephisto/server/architects/heroku_architect.py
@@ -4,457 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-
-import getpass
-import glob
-import hashlib
-import netrc
-import os
-import platform
-import sh
-import shlex
-import shutil
-import subprocess
-import sys
-import time
-import requests
-import re
-from dataclasses import dataclass, field
-from omegaconf import MISSING, DictConfig
-from mephisto.core.utils import get_mephisto_tmp_dir
-from mephisto.data_model.architect import Architect, ArchitectArgs
-from mephisto.server.architects.router.build_router import build_router
-from mephisto.server.channels.websocket_channel import WebsocketChannel
-from mephisto.core.registry import register_mephisto_abstraction
-from typing import Any, Tuple, List, Dict, Optional, TYPE_CHECKING, Callable
-
-if TYPE_CHECKING:
- from mephisto.server.channels.channel import Channel
- from mephsito.data_model.packet import Packet
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.database import MephistoDB
- from mephisto.data_model.blueprint import SharedTaskState
- from argparse import _ArgumentGroup as ArgumentGroup
-
-from mephisto.core.logger_core import get_logger
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-ARCHITECT_TYPE = "heroku"
-
-USER_NAME = getpass.getuser()
-HEROKU_SERVER_BUILD_DIRECTORY = "heroku_server"
-HEROKU_CLIENT_URL = (
- "https://cli-assets.heroku.com/heroku-cli/channels/stable/heroku-cli"
+from mephisto.abstractions.architects.heroku_architect import *
+import warnings
+
+warnings.warn(
+ "Import of architects from `mephisto.server.architects` is going away soon. "
+ "Please replace all of your imports from mephisto.server.architects. "
+ "to mephisto.abstractions.architects. ",
+ PendingDeprecationWarning,
)
-
-HEROKU_WAIT_TIME = 3
-
-HEROKU_TMP_DIR = os.path.join(get_mephisto_tmp_dir(), "heroku")
-os.makedirs(HEROKU_TMP_DIR, exist_ok=True)
-
-
-@dataclass
-class HerokuArchitectArgs(ArchitectArgs):
- """Additional arguments for configuring a heroku architect"""
-
- _architect_type: str = ARCHITECT_TYPE
- use_hobby: bool = field(
- default=False, metadata={"help": "Launch on the Heroku Hobby tier"}
- )
- heroku_team: Optional[str] = field(
- default=MISSING, metadata={"help": "Heroku team to use for this launch"}
- )
-
-
-@register_mephisto_abstraction()
-class HerokuArchitect(Architect):
- """
- Sets up a server on heroku and deploys the task on that server
- """
-
- ArgsClass = HerokuArchitectArgs
- ARCHITECT_TYPE = ARCHITECT_TYPE
-
- def __init__(
- self,
- db: "MephistoDB",
- args: DictConfig,
- shared_state: "SharedTaskState",
- task_run: "TaskRun",
- build_dir_root: str,
- ):
- """
- Ensure heroku credentials are setup, then prepare the necessary files
- for launching for this task.
-
- All necessary paths should be built in the init or stored in the database
- such that a re-init on the same task run can pull the server information.
-
- This means that we can shutdown a server that is still running after a
- catastrophic failure.
- """
- # TODO(#102) put the expected info into the MephistoDB rather than storing here?
- # Servers will have a status which needs to be kept track of.
- self.args = args
- self.task_run = task_run
- self.deploy_name = f"{task_run.get_task().task_name}_{task_run.db_id}"
- self.build_dir = build_dir_root
-
- # Cache-able parameters
- self.__heroku_app_name: Optional[str] = None
- self.__heroku_executable_path: Optional[str] = None
- self.__heroku_user_identifier: Optional[str] = None
-
- def _get_socket_urls(self) -> List[str]:
- """Returns the path to the heroku app socket"""
- heroku_app_name = self.__get_app_name()
- return ["wss://{}.herokuapp.com/".format(heroku_app_name)]
-
- def get_channels(
- self,
- on_channel_open: Callable[[str], None],
- on_catastrophic_disconnect: Callable[[str], None],
- on_message: Callable[[str, "Packet"], None],
- ) -> List["Channel"]:
- """
- Return a list of all relevant channels that the Supervisor will
- need to register to in order to function
- """
- urls = self._get_socket_urls()
- return [
- WebsocketChannel(
- f"heroku_channel_{self.deploy_name}_{idx}",
- on_channel_open=on_channel_open,
- on_catastrophic_disconnect=on_catastrophic_disconnect,
- on_message=on_message,
- socket_url=url,
- )
- for idx, url in enumerate(urls)
- ]
-
- def download_file(self, target_filename: str, save_dir: str) -> None:
- """
- Heroku architects need to download the file
- """
- heroku_app_name = self.__get_app_name()
- target_url = (
- f"https://{heroku_app_name}.herokuapp.com/download_file/{target_filename}"
- )
- dest_path = os.path.join(save_dir, target_filename)
- r = requests.get(target_url, stream=True)
-
- with open(dest_path, "wb") as out_file:
- for chunk in r.iter_content(chunk_size=1024):
- if chunk:
- out_file.write(chunk)
-
- @classmethod
- def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
- """
- Assert that the provided arguments are valid. Should
- fail if a task launched with these arguments would
- not work.
-
- This should include throwing an exception if the architect
- needs login details or something similar given the
- arguments passed in.
- """
- heroku_executable_path = HerokuArchitect.get_heroku_client_path()
- try:
- output = subprocess.check_output(
- shlex.split(heroku_executable_path + " auth:whoami")
- )
- except subprocess.CalledProcessError:
- raise Exception(
- "A free Heroku account is required for launching tasks via "
- "the HerokuArchitect. Please register at "
- "https://signup.heroku.com/ and run `{} login` at the terminal "
- "to login to Heroku before trying to use HerokuArchitect."
- "".format(heroku_executable_path)
- )
- return
-
- @staticmethod
- def get_heroku_client_path() -> str:
- """
- Get the path to the heroku executable client, download a new one if it
- doesnt exist.
- """
- print("Locating heroku...")
- # Install Heroku CLI
- os_name = None
- bit_architecture = None
-
- # Get the platform we are working on
- if sys.platform == "darwin": # Mac OS X
- os_name = "darwin"
- elif sys.platform.startswith("linux"): # Linux
- os_name = "linux"
- else:
- os_name = "windows"
-
- # Find our architecture
- bit_architecture_info = platform.architecture()[0]
- if "64bit" in bit_architecture_info:
- bit_architecture = "x64"
- else:
- bit_architecture = "x86"
-
- # Find existing heroku files to use
- existing_heroku_directory_names = glob.glob(
- os.path.join(HEROKU_TMP_DIR, "heroku-cli-*")
- )
- if len(existing_heroku_directory_names) == 0:
- print("Getting heroku")
- if os.path.exists(os.path.join(HEROKU_TMP_DIR, "heroku.tar.gz")):
- os.remove(os.path.join(HEROKU_TMP_DIR, "heroku.tar.gz"))
-
- # Get the heroku client and unzip
- tar_path = os.path.join(HEROKU_TMP_DIR, "heroku.tar.gz")
- sh.wget(
- shlex.split(
- "{}-{}-{}.tar.gz -O {}".format(
- HEROKU_CLIENT_URL, os_name, bit_architecture, tar_path
- )
- )
- )
- sh.tar(shlex.split(f"-xvzf {tar_path} -C {HEROKU_TMP_DIR}"))
-
- # Clean up the tar
- if os.path.exists(tar_path):
- os.remove(tar_path)
-
- heroku_directory_name = os.path.basename(
- glob.glob(os.path.join(HEROKU_TMP_DIR, "heroku-cli-*"))[0]
- )
- heroku_directory_path = os.path.join(HEROKU_TMP_DIR, heroku_directory_name)
- return os.path.join(heroku_directory_path, "bin", "heroku")
-
- def __get_heroku_client(self) -> Tuple[str, str]:
- """
- Get an authorized heroku client path and authorization token
- """
- if (
- self.__heroku_executable_path is None
- or self.__heroku_user_identifier is None
- ):
- heroku_executable_path = HerokuArchitect.get_heroku_client_path()
-
- # get heroku credentials
- heroku_user_identifier = None
- while not heroku_user_identifier:
- try:
- output = subprocess.check_output(
- shlex.split(heroku_executable_path + " auth:whoami")
- )
- output = subprocess.check_output(
- shlex.split(heroku_executable_path + " auth:token")
- )
- heroku_user_identifier = netrc.netrc(
- os.path.join(os.path.expanduser("~"), ".netrc")
- ).hosts["api.heroku.com"][0]
- except subprocess.CalledProcessError:
- print(
- "A free Heroku account is required for launching MTurk tasks. "
- "Please register at https://signup.heroku.com/ and run `{} "
- "login` at the terminal to login to Heroku, and then run this "
- "program again.".format(heroku_executable_path)
- )
- raise Exception("Please login to heroku before trying again.")
- self.__heroku_executable_path = heroku_executable_path
- self.__heroku_user_identifier = heroku_user_identifier
- return self.__heroku_executable_path, self.__heroku_user_identifier
-
- def __get_build_directory(self) -> str:
- """
- Return the string where the server should be built in.
- """
- return os.path.join(
- self.build_dir,
- "{}_{}".format(HEROKU_SERVER_BUILD_DIRECTORY, self.deploy_name),
- )
-
- def __get_app_name(self) -> str:
- """
- Get the name of the heroku app associated with this task
- """
- if self.__heroku_app_name is None:
- _, heroku_user_identifier = self.__get_heroku_client()
- heroku_app_name = (
- "{}-{}-{}".format(
- USER_NAME,
- self.deploy_name,
- hashlib.md5(heroku_user_identifier.encode("utf-8")).hexdigest(),
- )
- )[:30]
- heroku_app_name = heroku_app_name.replace("_", "-")
- while heroku_app_name[-1] == "-":
- heroku_app_name = heroku_app_name[:-1]
- self.__heroku_app_name = re.sub(r"[^a-zA-Z0-9-]", "", heroku_app_name)
- return self.__heroku_app_name
-
- def __compile_server(self) -> str:
- """
- Move the required task files to a specific directory to be deployed to
- heroku directly. Return the location that the packaged files are
- now prepared in.
- """
- print("Building server files...")
- heroku_server_development_root = self.__get_build_directory()
- os.makedirs(heroku_server_development_root)
- heroku_server_development_path = build_router(
- heroku_server_development_root, self.task_run
- )
- return heroku_server_development_path
-
- def __setup_heroku_server(self) -> str:
- """
- Deploy the server using the setup server directory, return the URL
- """
-
- heroku_executable_path, heroku_user_identifier = self.__get_heroku_client()
- server_dir = self.__get_build_directory()
-
- print("Heroku: Starting server...")
-
- heroku_server_directory_path = os.path.join(server_dir, "router")
- sh.git(shlex.split(f"-C {heroku_server_directory_path} init"))
-
- heroku_app_name = self.__get_app_name()
-
- # Create or attach to the server
- return_dir = os.getcwd()
- os.chdir(heroku_server_directory_path)
- try:
- if self.args.architect.get("heroku_team", None) is not None:
- subprocess.check_output(
- shlex.split(
- "{} create {} --team {}".format(
- heroku_executable_path,
- heroku_app_name,
- self.args.architect.heroku_team,
- )
- )
- )
- else:
- subprocess.check_output(
- shlex.split(
- "{} create {}".format(heroku_executable_path, heroku_app_name)
- )
- )
- except subprocess.CalledProcessError as e: # User has too many apps?
- # TODO(#93) check response codes to determine what actually happened
- logger.exception(e, exc_info=True)
- sh.rm(shlex.split("-rf {}".format(heroku_server_directory_path)))
- raise Exception(
- "You have hit your limit on concurrent apps with heroku, which are"
- " required to run multiple concurrent tasks.\nPlease wait for some"
- " of your existing tasks to complete. If you have no tasks "
- "running, login to heroku and delete some of the running apps or "
- "verify your account to allow more concurrent apps"
- )
-
- # Enable WebSockets
- try:
- subprocess.check_output(
- shlex.split(
- "{} features:enable http-session-affinity".format(
- heroku_executable_path
- )
- )
- )
- except subprocess.CalledProcessError: # Already enabled WebSockets
- pass
- os.chdir(return_dir)
-
- # commit and push to the heroku server
- sh.git(shlex.split(f"-C {heroku_server_directory_path} add -A"))
- sh.git(shlex.split(f'-C {heroku_server_directory_path} commit -m "app"'))
- sh.git(shlex.split(f"-C {heroku_server_directory_path} push -f heroku master"))
-
- os.chdir(heroku_server_directory_path)
- subprocess.check_output(
- shlex.split("{} ps:scale web=1".format(heroku_executable_path))
- )
-
- if self.args.architect.use_hobby is True:
- try:
- subprocess.check_output(
- shlex.split("{} dyno:type Hobby".format(heroku_executable_path))
- )
- except subprocess.CalledProcessError: # User doesn't have hobby access
- self.__delete_heroku_server()
- sh.rm(shlex.split("-rf {}".format(heroku_server_directory_path)))
- raise Exception(
- "Server launched with hobby flag but account cannot create "
- "hobby servers."
- )
- os.chdir(return_dir)
-
- time.sleep(HEROKU_WAIT_TIME)
-
- return "https://{}.herokuapp.com".format(heroku_app_name)
-
- def __delete_heroku_server(self):
- """
- Remove the heroku server associated with this task run
- """
- heroku_executable_path, heroku_user_identifier = self.__get_heroku_client()
- heroku_app_name = self.__get_app_name()
- print("Heroku: Deleting server: {}".format(heroku_app_name))
- subprocess.check_output(
- shlex.split(
- "{} destroy {} --confirm {}".format(
- heroku_executable_path, heroku_app_name, heroku_app_name
- )
- )
- )
- time.sleep(HEROKU_WAIT_TIME)
-
- def server_is_running(self) -> bool:
- """
- Utility function to check if the given heroku app (by app-name) is
- still running
- """
- heroku_executable_path, _token = self.__get_heroku_client()
- app_name = self.__get_app_name()
- output = subprocess.check_output(shlex.split(heroku_executable_path + " apps"))
- all_apps = str(output, "utf-8")
- return app_name in all_apps
-
- def build_is_clean(self) -> bool:
- """
- Utility function to see if the build has been cleaned up
- """
- server_dir = self.__get_build_directory()
- return not os.path.exists(server_dir)
-
- def prepare(self) -> str:
- """
- Produce the server files that will be deployed to the server
- """
- return self.__compile_server()
-
- def deploy(self) -> str:
- """
- Launch the server, and push the task files to the server. Return
- the server URL
- """
- return self.__setup_heroku_server()
-
- def cleanup(self) -> None:
- """
- Remove any files that were used for the deployment process that
- no longer need to be kept track of now that the task has
- been launched.
- """
- server_dir = self.__get_build_directory()
- sh.rm(shlex.split("-rf {}".format(server_dir)))
-
- def shutdown(self) -> None:
- """
- Shut down the server launched by this Architect, as stored
- in the db.
- """
- self.__delete_heroku_server()
diff --git a/mephisto/server/architects/local_architect.py b/mephisto/server/architects/local_architect.py
index c48596e22..c671dfe5a 100644
--- a/mephisto/server/architects/local_architect.py
+++ b/mephisto/server/architects/local_architect.py
@@ -4,179 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-import os
-import signal
-import subprocess
-import sh
-import shutil
-import shlex
-import time
-import requests
-
-from mephisto.data_model.architect import Architect, ArchitectArgs
-from dataclasses import dataclass, field
-from mephisto.core.registry import register_mephisto_abstraction
-from typing import Any, Optional, Dict, List, TYPE_CHECKING, Callable
-
-if TYPE_CHECKING:
- from mephisto.server.channels.channel import Channel
- from mephsito.data_model.packet import Packet
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.database import MephistoDB
- from argparse import _ArgumentGroup as ArgumentGroup
- from omegaconf import DictConfig
- from mephisto.data_model.blueprint import SharedTaskState
-
-from mephisto.server.architects.router.build_router import build_router
-from mephisto.server.channels.websocket_channel import WebsocketChannel
-from mephisto.core.utils import get_mephisto_tmp_dir
-
-ARCHITECT_TYPE = "local"
-
-
-@dataclass
-class LocalArchitectArgs(ArchitectArgs):
- """Additional arguments for configuring a local architect"""
-
- _architect_type: str = ARCHITECT_TYPE
- hostname: str = field(
- default="localhost", metadata={"help": "Addressible location of the server"}
- )
- port: str = field(default="3000", metadata={"help": "Port to launch the server on"})
-
-
-@register_mephisto_abstraction()
-class LocalArchitect(Architect):
- """
- Provides methods for setting up a server locally and deploying tasks
- onto that server.
- """
-
- ArgsClass = LocalArchitectArgs
- ARCHITECT_TYPE = ARCHITECT_TYPE
-
- def __init__(
- self,
- db: "MephistoDB",
- args: "DictConfig",
- shared_state: "SharedTaskState",
- task_run: "TaskRun",
- build_dir_root: str,
- ):
- """Create an architect for use in testing"""
- self.task_run = task_run
- self.build_dir = build_dir_root
- self.task_run_id = task_run.db_id
- # TODO(#102) move some of this into the db, server status
- # needs to be in order to restart
- self.server_process_pid: Optional[int] = None
- self.server_process: Optional[subprocess.Popen] = None
- self.server_dir: Optional[str] = None
- self.running_dir: Optional[str] = None
- self.hostname: Optional[str] = args.architect.hostname
- self.port: Optional[str] = args.architect.port
- self.cleanup_called = False
-
- def _get_socket_urls(self) -> List[str]:
- """Return the path to the local server socket"""
- assert self.hostname is not None, "No hostname for socket"
- assert self.port is not None, "No ports for socket"
- if "https://" in self.hostname:
- basename = self.hostname.split("https://")[1]
- protocol = "wss"
- elif "http://" in self.hostname:
- basename = self.hostname.split("http://")[1]
- protocol = "ws"
- else:
- basename = self.hostname
- protocol = "ws"
-
- if basename in ["localhost", "127.0.0.1"]:
- protocol = "ws"
-
- return [f"{protocol}://{basename}:{self.port}/"]
-
- def get_channels(
- self,
- on_channel_open: Callable[[str], None],
- on_catastrophic_disconnect: Callable[[str], None],
- on_message: Callable[[str, "Packet"], None],
- ) -> List["Channel"]:
- """
- Return a list of all relevant channels that the Supervisor will
- need to register to in order to function
- """
- urls = self._get_socket_urls()
- return [
- WebsocketChannel(
- f"local_channel_{self.task_run_id}_{idx}",
- on_channel_open=on_channel_open,
- on_catastrophic_disconnect=on_catastrophic_disconnect,
- on_message=on_message,
- socket_url=url,
- )
- for idx, url in enumerate(urls)
- ]
-
- def download_file(self, target_filename: str, save_dir: str) -> None:
- """
- Local architects can just move from the local directory
- """
- assert self.running_dir is not None, "cannot download a file if not running"
- source_file = os.path.join("/tmp/", target_filename)
- dest_path = os.path.join(save_dir, target_filename)
- shutil.copy2(source_file, dest_path)
-
- def prepare(self) -> str:
- """Mark the preparation call"""
- self.server_dir = build_router(self.build_dir, self.task_run)
- return self.server_dir
-
- def deploy(self) -> str:
- """Deploy the server from a local folder for this task"""
- assert self.server_dir is not None, "Deploy called before prepare"
- self.running_dir = os.path.join(
- get_mephisto_tmp_dir(), f"local_server_{self.task_run_id}", "server"
- )
-
- shutil.copytree(self.server_dir, self.running_dir)
-
- return_dir = os.getcwd()
- os.chdir(self.running_dir)
- self.server_process = subprocess.Popen(
- ["node", "server.js"],
- preexec_fn=os.setpgrp,
- env=dict(os.environ, PORT=f"{self.port}"),
- )
- self.server_process_pid = self.server_process.pid
- os.chdir(return_dir)
-
- time.sleep(1)
- print("Server running locally with pid {}.".format(self.server_process_pid))
- host = self.hostname
- port = self.port
- if host is None:
- host = input(
- "Please enter the public server address, like https://hostname.com: "
- )
- self.hostname = host
- if port is None:
- port = input("Please enter the port given above, likely 3000: ")
- self.port = port
- return "{}:{}".format(host, port)
-
- def cleanup(self) -> None:
- """Cleanup the built directory"""
- assert self.server_dir is not None, "Cleanup called before prepare"
- sh.rm(shlex.split("-rf " + self.server_dir))
-
- def shutdown(self) -> None:
- """Find the server process, shut it down, then remove the build directory"""
- assert self.running_dir is not None, "shutdown called before deploy"
- if self.server_process is None:
- assert self.server_process_pid is not None, "No server id to kill"
- os.kill(self.server_process_pid, signal.SIGTERM)
- else:
- self.server_process.terminate()
- self.server_process.wait()
- sh.rm(shlex.split("-rf " + self.running_dir))
+from mephisto.abstractions.architects.local_architect import *
+import warnings
+
+warnings.warn(
+ "Import of architects from `mephisto.server.architects` is going away soon. "
+ "Please replace all of your imports from mephisto.server.architects. "
+ "to mephisto.abstractions.architects. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/architects/mock_architect.py b/mephisto/server/architects/mock_architect.py
index 0e25fc1cf..8818f88b6 100644
--- a/mephisto/server/architects/mock_architect.py
+++ b/mephisto/server/architects/mock_architect.py
@@ -4,366 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-
-import tornado
-from tornado.websocket import WebSocketHandler
-import os
-import threading
-import uuid
-import json
-import time
-
-from mephisto.data_model.architect import Architect, ArchitectArgs
-from dataclasses import dataclass, field
-from mephisto.data_model.packet import (
- PACKET_TYPE_ALIVE,
- PACKET_TYPE_NEW_WORKER,
- PACKET_TYPE_NEW_AGENT,
- PACKET_TYPE_AGENT_ACTION,
- PACKET_TYPE_SUBMIT_ONBOARDING,
- PACKET_TYPE_REQUEST_AGENT_STATUS,
- PACKET_TYPE_GET_INIT_DATA,
+from mephisto.abstractions.architects.mock_architect import *
+import warnings
+
+warnings.warn(
+ "Import of architects from `mephisto.server.architects` is going away soon. "
+ "Please replace all of your imports from mephisto.server.architects. "
+ "to mephisto.abstractions.architects. ",
+ PendingDeprecationWarning,
)
-from mephisto.core.registry import register_mephisto_abstraction
-from mephisto.server.channels.websocket_channel import WebsocketChannel
-from typing import List, Dict, Any, Optional, TYPE_CHECKING, Callable
-
-if TYPE_CHECKING:
- from mephisto.server.channels.channel import Channel
- from mephsito.data_model.packet import Packet
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.database import MephistoDB
- from argparse import _ArgumentGroup as ArgumentGroup
- from omegaconf import DictConfig
- from mephisto.data_model.blueprint import SharedTaskState
-
-MOCK_DEPLOY_URL = "MOCK_DEPLOY_URL"
-ARCHITECT_TYPE = "mock"
-
-
-def get_rand_id():
- return str(uuid.uuid4())
-
-
-@dataclass
-class MockArchitectArgs(ArchitectArgs):
- """Additional arguments for configuring a mock architect"""
-
- _architect_type: str = ARCHITECT_TYPE
- should_run_server: bool = field(
- default=False, metadata={"help": "Addressible location of the server"}
- )
- port: str = field(default="3000", metadata={"help": "Port to launch the server on"})
-
-
-class SocketHandler(WebSocketHandler):
- def __init__(self, *args, **kwargs):
- self.subs: Dict[int, "SocketHandler"] = kwargs.pop("subs")
- self.app: "MockServer" = kwargs.pop("app")
- self.sid = get_rand_id()
- super().__init__(*args, **kwargs)
-
- def open(self):
- """
- Opens a websocket and assigns a random UUID that is stored in the class-level
- `subs` variable.
- """
- if self.sid not in self.subs.values():
- self.subs[self.sid] = self
-
- def on_close(self):
- """
- Runs when a socket is closed.
- """
- del self.subs[self.sid]
-
- def on_message(self, message_text):
- """
- Callback that runs when a new message is received from a client See the
- chat_service README for the resultant message structure.
- Args:
- message_text: A stringified JSON object with a text or attachment key.
- `text` should contain a string message and `attachment` is a dict.
- See `WebsocketAgent.put_data` for more information about the
- attachment dict structure.
- """
- message = json.loads(message_text)
- if message["packet_type"] == PACKET_TYPE_ALIVE:
- self.app.last_alive_packet = message
- elif message["packet_type"] == PACKET_TYPE_AGENT_ACTION:
- self.app.actions_observed += 1
- elif message["packet_type"] != PACKET_TYPE_REQUEST_AGENT_STATUS:
- self.app.last_packet = message
-
- def check_origin(self, origin):
- return True
-
-
-class AliveHandler(tornado.web.RequestHandler):
- """Simple handler for is_alive"""
-
- def get(self, eids):
- pass # Default behavior returns 200
-
-
-class MockServer(tornado.web.Application):
- """
- Tornado-based server that with hooks for sending specific
- messages through socket connections and such
- """
-
- def __init__(self, port):
- self.subs = {}
- self.port = port
- self.running_instance = None
- self.last_alive_packet: Optional[Dict[str, Any]] = None
- self.actions_observed = 0
- self.last_packet: Optional[Dict[str, Any]] = None
- tornado_settings = {
- "autoescape": None,
- "debug": "/dbg/" in __file__,
- "compiled_template_cache": False,
- "static_url_prefix": "/static/",
- "debug": True,
- }
- handlers = [
- ("/socket", SocketHandler, {"subs": self.subs, "app": self}),
- ("/is_alive", AliveHandler, {}),
- ]
- super(MockServer, self).__init__(handlers, **tornado_settings)
-
- def __server_thread_fn(self):
- """
- Main loop for the application
- """
- self.running_instance = tornado.ioloop.IOLoop()
- http_server = tornado.httpserver.HTTPServer(self, max_buffer_size=1024 ** 3)
- http_server.listen(self.port)
- self.running_instance.start()
- http_server.stop()
-
- def _get_sub(self):
- """Return the subscriber socket to write to"""
- return list(self.subs.values())[0]
-
- def _send_message(self, message):
- """Send the given message back to the mephisto client"""
- failed_attempts = 0
- last_exception = None
- while failed_attempts < 5:
- try:
- socket = self._get_sub()
- message_json = json.dumps(message)
- socket.write_message(message_json)
- last_exception = None
- break
- except Exception as e:
- last_exception = e
- time.sleep(0.2)
- failed_attempts += 1
- finally:
- time.sleep(0.1)
- if last_exception is not None:
- raise last_exception
-
- def send_agent_act(self, agent_id, act_content):
- """
- Send a packet from the given agent with
- the given content
- """
- self._send_message(
- {
- "packet_type": PACKET_TYPE_AGENT_ACTION,
- "sender_id": agent_id,
- "receiver_id": "Mephisto",
- "data": act_content,
- }
- )
-
- def request_init_data(self, agent_id):
- """
- Send a packet from the given agent with
- the given content
- """
- self._send_message(
- {
- "packet_type": PACKET_TYPE_GET_INIT_DATA,
- "sender_id": agent_id,
- "receiver_id": "Mephisto",
- "data": {
- "request_id": agent_id + str(time.time()),
- "provider_data": {
- "agent_id": agent_id,
- },
- },
- }
- )
-
- def register_mock_agent(self, worker_id, agent_details):
- """
- Send a packet asking to register a mock agent.
- """
- self._send_message(
- {
- "packet_type": PACKET_TYPE_NEW_AGENT,
- "sender_id": "MockServer",
- "receiver_id": "Mephisto",
- "data": {
- "request_id": agent_details,
- "provider_data": {
- "worker_id": worker_id,
- "agent_registration_id": agent_details,
- },
- },
- }
- )
-
- def register_mock_agent_after_onboarding(self, worker_id, agent_id, onboard_data):
- """
- Send a packet asking to register a mock agent.
- """
- onboard_data["request_id"] = "1234"
- self._send_message(
- {
- "packet_type": PACKET_TYPE_SUBMIT_ONBOARDING,
- "sender_id": agent_id,
- "receiver_id": "Mephisto",
- "data": onboard_data,
- }
- )
-
- def register_mock_worker(self, worker_name):
- """
- send a packet asking to register a mock worker.
- """
- self._send_message(
- {
- "packet_type": PACKET_TYPE_NEW_WORKER,
- "sender_id": "MockServer",
- "receiver_id": "Mephisto",
- "data": {
- "request_id": worker_name,
- "provider_data": {"worker_name": worker_name},
- },
- }
- )
-
- def disconnect_mock_agent(self, agent_id):
- """
- Mark a mock agent as disconnected.
- """
- # TODO(#97) implement when handling disconnections
- pass
-
- def launch_mock(self):
- """
- Start the primary loop for this application
- """
- self.__server_thread = threading.Thread(target=self.__server_thread_fn)
- self.__server_thread.start()
-
- def shutdown_mock(self):
- """
- Defined to shutown the tornado application.
- """
-
- def stop_and_free():
- self.running_instance.stop()
-
- self.running_instance.add_callback(stop_and_free)
- self.__server_thread.join()
-
-
-@register_mephisto_abstraction()
-class MockArchitect(Architect):
- """
- The MockArchitect runs a mock server on the localhost so that
- we can send special packets and assert connections have been made
- """
-
- ArgsClass = MockArchitectArgs
- ARCHITECT_TYPE = ARCHITECT_TYPE
-
- def __init__(
- self,
- db: "MephistoDB",
- args: "DictConfig",
- shared_state: "SharedTaskState",
- task_run: "TaskRun",
- build_dir_root: str,
- ):
- """Create an architect for use in testing"""
- self.task_run = task_run
- self.build_dir = build_dir_root
- self.task_run_id = task_run.db_id
- self.should_run_server = args.architect.should_run_server
- self.port = args.architect.port
- self.server: Optional["MockServer"] = None
- # TODO(#97) track state in parent class?
- self.prepared = False
- self.deployed = False
- self.cleaned = False
- self.did_shutdown = False
-
- def _get_socket_urls(self) -> List[str]:
- """Return the path to the local server socket"""
- assert self.port is not None, "No ports for socket"
- return [f"ws://localhost:{self.port}/socket"]
-
- def get_channels(
- self,
- on_channel_open: Callable[[str], None],
- on_catastrophic_disconnect: Callable[[str], None],
- on_message: Callable[[str, "Packet"], None],
- ) -> List["Channel"]:
- """
- Return a list of all relevant channels that the Supervisor will
- need to register to in order to function
- """
- urls = self._get_socket_urls()
- return [
- WebsocketChannel(
- f"mock_channel_{self.task_run_id}_{idx}",
- on_channel_open=on_channel_open,
- on_catastrophic_disconnect=on_catastrophic_disconnect,
- on_message=on_message,
- socket_url=url,
- )
- for idx, url in enumerate(urls)
- ]
-
- def download_file(self, target_filename: str, save_dir: str) -> None:
- """
- Mock architects can just pretend to write a file
- """
- with open(os.path.join(save_dir, target_filename), "wb") as fp:
- fp.write(b"mock\n")
-
- def prepare(self) -> str:
- """Mark the preparation call"""
- self.prepared = True
- built_dir = os.path.join(
- self.build_dir, "mock_build_{}".format(self.task_run_id)
- )
- os.makedirs(built_dir)
- return built_dir
-
- def deploy(self) -> str:
- """Mock a deploy or deploy a mock server, depending on settings"""
- self.deployed = True
- if not self.should_run_server:
- return MOCK_DEPLOY_URL
- else:
- self.server = MockServer(self.port)
- self.server.launch_mock()
- return f"http://localhost:{self.port}/"
-
- def cleanup(self) -> None:
- """Mark the cleanup call"""
- self.cleaned = True
-
- def shutdown(self) -> None:
- """Mark the shutdown call"""
- self.did_shutdown = True
- if self.should_run_server and self.server is not None:
- self.server.shutdown_mock()
diff --git a/mephisto/server/architects/router/build_router.py b/mephisto/server/architects/router/build_router.py
index ea34aee21..84e4ddea7 100644
--- a/mephisto/server/architects/router/build_router.py
+++ b/mephisto/server/architects/router/build_router.py
@@ -4,88 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-import mephisto.server.architects.router as router_module
-import os
-import sh
-import shutil
-import shlex
-import subprocess
-import json
-
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
-
-ROUTER_ROOT_DIR = os.path.dirname(router_module.__file__)
-SERVER_SOURCE_ROOT = os.path.join(ROUTER_ROOT_DIR, "deploy")
-CROWD_SOURCE_PATH = "static/wrap_crowd_source.js"
-TASK_CONFIG_PATH = "static/task_config.json"
-
-
-def can_build(build_dir: str, task_run: "TaskRun") -> bool:
- """Determine if the build dir is properly formatted for
- being able to have the router built within. This is a
- validation step that should be run before build_router.
- """
- # TODO(#97) incorporate this step into the blueprint
- # task builder test, as once the task is built, it
- # should be able to have the server build as well.
- # TODO(#97) actually implement this when the full build
- # process for the router is decided
- return True
-
-
-def install_router_files() -> None:
- """
- Create a new build including the node_modules
- """
- return_dir = os.getcwd()
- os.chdir(SERVER_SOURCE_ROOT)
-
- packages_installed = subprocess.call(["npm", "install"])
- if packages_installed != 0:
- raise Exception(
- "please make sure npm is installed, otherwise view "
- "the above error for more info."
- )
- os.chdir(return_dir)
-
-
-def build_router(build_dir: str, task_run: "TaskRun") -> str:
- """
- Copy expected files from the router source into the build dir,
- using existing files in the build dir as replacements for the
- defaults if available
- """
- install_router_files()
-
- server_source_directory_path = SERVER_SOURCE_ROOT
- local_server_directory_path = os.path.join(build_dir, "router")
-
- # Delete old server files
- sh.rm(shlex.split("-rf " + local_server_directory_path))
-
- # Copy over a clean copy into the server directory
- shutil.copytree(server_source_directory_path, local_server_directory_path)
-
- # Copy the required wrap crowd source path
- local_crowd_source_path = os.path.join(
- local_server_directory_path, CROWD_SOURCE_PATH
- )
- crowd_provider = task_run.get_provider()
- shutil.copy2(crowd_provider.get_wrapper_js_path(), local_crowd_source_path)
-
- # Copy the task_run's json configuration
- local_task_config_path = os.path.join(local_server_directory_path, TASK_CONFIG_PATH)
- blueprint = task_run.get_blueprint()
- with open(local_task_config_path, "w+") as task_fp:
- json.dump(blueprint.get_frontend_args(), task_fp)
-
- # Consolidate task files as defined by the task
- TaskBuilderClass = blueprint.TaskBuilderClass
- task_builder = TaskBuilderClass(task_run, task_run.args)
-
- task_builder.build_in_dir(local_server_directory_path)
-
- return local_server_directory_path
+from mephisto.abstractions.architects.router.build_router import *
+import warnings
+
+warnings.warn(
+ "Import of router from `mephisto.server.architects` is going away soon. "
+ "Please replace all of your imports from mephisto.server.architects. "
+ "to mephisto.abstractions.architects. ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/blueprints/abstract/static_task/empty_task_builder.py b/mephisto/server/blueprints/abstract/static_task/empty_task_builder.py
index c9b0bd0e4..52bb5dcee 100644
--- a/mephisto/server/blueprints/abstract/static_task/empty_task_builder.py
+++ b/mephisto/server/blueprints/abstract/static_task/empty_task_builder.py
@@ -4,19 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import TaskBuilder
-
-
-class EmptyStaticTaskBuilder(TaskBuilder):
- """
- Abstract class for a task builder for static tasks
- """
-
- def build_in_dir(self, build_dir: str):
- """Build the frontend if it doesn't exist, then copy into the server directory"""
- raise AssertionError(
- "Classes that extend the abstract StaticBlueprint must define a custom "
- "TaskBuilder class that pulls the correct frontend together. Examples "
- "can be seen in the static_react_task and static_html_task folders. "
- "Note that extra static content will be provided in `args.blueprint.extra_source_dir` "
- )
+from mephisto.abstractions.blueprints.abstract.static_task.empty_task_builder import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/blueprints/abstract/static_task/static_agent_state.py b/mephisto/server/blueprints/abstract/static_task/static_agent_state.py
index 5b6fe5ed8..4bf4dfe69 100644
--- a/mephisto/server/blueprints/abstract/static_task/static_agent_state.py
+++ b/mephisto/server/blueprints/abstract/static_task/static_agent_state.py
@@ -4,112 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from typing import List, Dict, Optional, Any, TYPE_CHECKING
-from mephisto.data_model.blueprint import AgentState
-import os
-import json
-import time
-
-if TYPE_CHECKING:
- from mephisto.data_model.agent import Agent
- from mephisto.data_model.packet import Packet
-
-from mephisto.core.logger_core import get_logger
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-DATA_FILE = "agent_data.json"
-
-
-class StaticAgentState(AgentState):
- """
- Agent state for static tasks.
- """
-
- def _get_empty_state(self) -> Dict[str, Optional[Dict[str, Any]]]:
- return {
- "inputs": None,
- "outputs": None,
- "times": {"task_start": 0, "task_end": 0},
- }
-
- def __init__(self, agent: "Agent"):
- """
- Static agent states should store
- input dict -> output dict pairs to disc
- """
- self.agent = agent
- self.state: Dict[str, Optional[Dict[str, Any]]] = self._get_empty_state()
- self.load_data()
-
- def set_init_state(self, data: Any) -> bool:
- """Set the initial state for this agent"""
- if self.get_init_state() is not None:
- # Initial state is already set
- return False
- else:
- self.state["inputs"] = data
- times_dict = self.state["times"]
- # TODO(#103) this typing may be better handled another way
- assert isinstance(times_dict, dict)
- times_dict["task_start"] = time.time()
- self.save_data()
- return True
-
- def get_init_state(self) -> Optional[Dict[str, Any]]:
- """
- Return the initial state for this agent,
- None if no such state exists
- """
- if self.state["inputs"] is None:
- return None
- return self.state["inputs"].copy()
-
- def load_data(self) -> None:
- """Load data for this agent from disk"""
- data_dir = self.agent.get_data_dir()
- data_path = os.path.join(data_dir, DATA_FILE)
- if os.path.exists(data_path):
- with open(data_path, "r") as data_file:
- self.state = json.load(data_file)
- else:
- self.state = self._get_empty_state()
-
- def get_data(self) -> Dict[str, Any]:
- """Return dict of this agent's state"""
- return self.state.copy()
-
- def save_data(self) -> None:
- """Save static agent data to disk"""
- data_dir = self.agent.get_data_dir()
- os.makedirs(data_dir, exist_ok=True)
- out_filename = os.path.join(data_dir, DATA_FILE)
- with open(out_filename, "w+") as data_file:
- json.dump(self.state, data_file)
- logger.info(f"SAVED_DATA_TO_DISC at {out_filename}")
-
- def update_data(self, packet: "Packet") -> None:
- """
- Process the incoming data packet, and handle
- updating the state
- """
- assert (
- packet.data.get("MEPHISTO_is_submit") is True
- or packet.data.get("onboarding_data") is not None
- ), "Static tasks should only have final act"
-
- outputs: Dict[str, Any]
-
- if packet.data.get("onboarding_data") is not None:
- outputs = packet.data["onboarding_data"]
- else:
- outputs = packet.data["task_data"]
- times_dict = self.state["times"]
- # TODO(#013) this typing may be better handled another way
- assert isinstance(times_dict, dict)
- times_dict["task_end"] = time.time()
- if packet.data.get("files") != None:
- logger.info(f"Got files: {str(packet.data['files'])[:500]}")
- outputs["files"] = [f["filename"] for f in packet.data["files"]]
- self.state["outputs"] = outputs
- self.save_data()
+from mephisto.abstractions.blueprints.abstract.static_task.static_agent_state import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/blueprints/abstract/static_task/static_blueprint.py b/mephisto/server/blueprints/abstract/static_task/static_blueprint.py
index e8025750e..5ffd94770 100644
--- a/mephisto/server/blueprints/abstract/static_task/static_blueprint.py
+++ b/mephisto/server/blueprints/abstract/static_task/static_blueprint.py
@@ -4,185 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import (
- Blueprint,
- OnboardingRequired,
- BlueprintArgs,
- SharedTaskState,
-)
-from dataclasses import dataclass, field
-from omegaconf import MISSING, DictConfig
-from mephisto.data_model.assignment import InitializationData
-from mephisto.server.blueprints.abstract.static_task.static_agent_state import (
- StaticAgentState,
-)
-from mephisto.server.blueprints.abstract.static_task.static_task_runner import (
- StaticTaskRunner,
-)
-from mephisto.server.blueprints.abstract.static_task.empty_task_builder import (
- EmptyStaticTaskBuilder,
-)
-from mephisto.core.registry import register_mephisto_abstraction
-
-import os
-import time
-import csv
-import json
-
-from typing import ClassVar, List, Type, Any, Dict, Iterable, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.blueprint import (
- AgentState,
- TaskRunner,
- TaskBuilder,
- OnboardingAgent,
- )
- from mephisto.data_model.assignment import Assignment
- from mephisto.data_model.worker import Worker
- from argparse import _ArgumentGroup as ArgumentGroup
-
-
-@dataclass
-class SharedStaticTaskState(SharedTaskState):
- static_task_data: List[Any] = field(default_factory=list)
-
+from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import *
+import warnings
-@dataclass
-class StaticBlueprintArgs(BlueprintArgs):
- units_per_assignment: int = field(
- default=1, metadata={"help": "How many workers you want to do each assignment"}
- )
- extra_source_dir: str = field(
- default=MISSING,
- metadata={
- "help": (
- "Optional path to sources that the HTML may "
- "refer to (such as images/video/css/scripts)"
- )
- },
- )
- data_json: str = field(
- default=MISSING, metadata={"help": "Path to JSON file containing task data"}
- )
- data_jsonl: str = field(
- default=MISSING, metadata={"help": "Path to JSON-L file containing task data"}
- )
- data_csv: str = field(
- default=MISSING, metadata={"help": "Path to csv file containing task data"}
- )
- extra_source_dir: str = field(
- default=MISSING,
- metadata={
- "help": (
- "Optional path to sources that the HTML may "
- "refer to (such as images/video/css/scripts)"
- )
- },
- )
-
-
-class StaticBlueprint(Blueprint, OnboardingRequired):
- """
- Abstract blueprint for a task that runs without any extensive backend.
- These are generally one-off tasks sending data to the frontend and then
- awaiting a response.
- """
-
- AgentStateClass: ClassVar[Type["AgentState"]] = StaticAgentState
- OnboardingAgentStateClass: ClassVar[Type["AgentState"]] = StaticAgentState
- TaskBuilderClass: ClassVar[Type["TaskBuilder"]] = EmptyStaticTaskBuilder
- TaskRunnerClass: ClassVar[Type["TaskRunner"]] = StaticTaskRunner
- ArgsClass: ClassVar[Type["BlueprintArgs"]] = StaticBlueprintArgs
- supported_architects: ClassVar[List[str]] = ["mock"] # TODO update
-
- def __init__(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- super().__init__(task_run, args, shared_state)
- self.init_onboarding_config(task_run, args, shared_state)
-
- self._initialization_data_dicts: List[Dict[str, Any]] = []
- blue_args = args.blueprint
- if blue_args.get("data_csv", None) is not None:
- csv_file = os.path.expanduser(blue_args.data_csv)
- with open(csv_file, "r", encoding="utf-8-sig") as csv_fp:
- csv_reader = csv.reader(csv_fp)
- headers = next(csv_reader)
- for row in csv_reader:
- row_data = {}
- for i, col in enumerate(row):
- row_data[headers[i]] = col
- self._initialization_data_dicts.append(row_data)
- elif blue_args.get("data_json", None) is not None:
- json_file = os.path.expanduser(blue_args.data_json)
- with open(json_file, "r", encoding="utf-8-sig") as json_fp:
- json_data = json.loads(json_fp)
- for jd in json_data:
- self._initialization_data_dicts.append(jd)
- elif blue_args.get("data_jsonl", None) is not None:
- jsonl_file = os.path.expanduser(blue_args.data_jsonl)
- with open(jsonl_file, "r", encoding="utf-8-sig") as jsonl_fp:
- line = jsonl_fp.readline()
- while line:
- j = json.loads(line)
- self._initialization_data_dicts.append(j)
- line = jsonl_fp.readline()
- elif shared_state.static_task_data is not None:
- self._initialization_data_dicts = shared_state.static_task_data
- else:
- # instantiating a version of the blueprint, but not necessarily needing the data
- pass
-
- @classmethod
- def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
- """Ensure that the data can be properly loaded"""
- blue_args = args.blueprint
- if blue_args.get("data_csv", None) is not None:
- csv_file = os.path.expanduser(blue_args.data_csv)
- assert os.path.exists(
- csv_file
- ), f"Provided csv file {csv_file} doesn't exist"
- elif blue_args.get("data_json", None) is not None:
- json_file = os.path.expanduser(blue_args.data_json)
- assert os.path.exists(
- json_file
- ), f"Provided JSON file {json_file} doesn't exist"
- elif blue_args.get("data_jsonl", None) is not None:
- jsonl_file = os.path.expanduser(blue_args.data_jsonl)
- assert os.path.exists(
- jsonl_file
- ), f"Provided JSON-L file {jsonl_file} doesn't exist"
- elif shared_state.static_task_data is not None:
- assert (
- len(shared_state.static_task_data) > 0
- ), "Length of data dict provided was 0"
- else:
- raise AssertionError(
- "Must provide one of a data csv, json, json-L, or a list of tasks"
- )
-
- def get_initialization_data(self) -> Iterable["InitializationData"]:
- """
- Return the InitializationData retrieved from the specified stream
- """
- return [
- InitializationData(
- shared=d, unit_data=[{}] * self.args.blueprint.units_per_assignment
- )
- for d in self._initialization_data_dicts
- ]
-
- def validate_onboarding(
- self, worker: "Worker", onboarding_agent: "OnboardingAgent"
- ) -> bool:
- """
- Check the incoming onboarding data and evaluate if the worker
- has passed the qualification or not. Return True if the worker
- has qualified.
- """
- data = onboarding_agent.state.get_data()
- return self.shared_state.validate_onboarding(
- data
- ) # data["outputs"].get("success", True)
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/blueprints/abstract/static_task/static_task_runner.py b/mephisto/server/blueprints/abstract/static_task/static_task_runner.py
index ef1efd05b..3f1d48cf4 100644
--- a/mephisto/server/blueprints/abstract/static_task/static_task_runner.py
+++ b/mephisto/server/blueprints/abstract/static_task/static_task_runner.py
@@ -4,76 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import TaskRunner
-
-import os
-import time
-import threading
-
-from typing import ClassVar, List, Type, Any, Dict, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.assignment import Unit, InitializationData
- from mephisto.data_model.agent import Agent, OnboardingAgent
- from mephisto.data_model.blueprint import SharedTaskState
- from omegaconf import DictConfig
-
-
-SYSTEM_SENDER = "mephisto" # TODO(CLEAN) pull from somewhere
-
-
-class StaticTaskRunner(TaskRunner):
- """
- Task runner for a static task
-
- Static tasks always assume single unit assignments,
- as only one person can work on them at a time
- """
-
- def __init__(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- super().__init__(task_run, args, shared_state)
- self.is_concurrent = False
- self.assignment_duration_in_seconds = (
- task_run.get_task_config().assignment_duration_in_seconds
- )
-
- def get_init_data_for_agent(self, agent: "Agent") -> Dict[str, Any]:
- """
- Return the data for an agent already assigned to a particular unit
- """
- init_state = agent.state.get_init_state()
- if init_state is not None:
- # reconnecting agent, give what we've got
- return init_state
- else:
- assignment = agent.get_unit().get_assignment()
- assignment_data = self.get_data_for_assignment(assignment)
- agent.state.set_init_state(assignment_data.shared)
- return assignment_data.shared
-
- def run_onboarding(self, agent: "OnboardingAgent"):
- """
- Static onboarding flows eaxactly like a regular task, waiting for
- the submit to come through
- """
- agent_act = agent.act(timeout=self.assignment_duration_in_seconds)
-
- def cleanup_onboarding(self, agent: "OnboardingAgent"):
- """Nothing to clean up in a static onboarding"""
- return
-
- def run_unit(self, unit: "Unit", agent: "Agent") -> None:
- """
- Static runners will get the task data, send it to the user, then
- wait for the agent to act (the data to be completed)
- """
- # Frontend implicitly asks for the initialization data, so we just need
- # to wait for a response
- agent_act = agent.act(timeout=self.assignment_duration_in_seconds)
-
- def cleanup_unit(self, unit: "Unit") -> None:
- """There is currently no cleanup associated with killing an incomplete task"""
- return
+from mephisto.abstractions.blueprints.abstract.static_task.static_task_runner import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/blueprints/mock/mock_agent_state.py b/mephisto/server/blueprints/mock/mock_agent_state.py
index f56488d40..93aff14e4 100644
--- a/mephisto/server/blueprints/mock/mock_agent_state.py
+++ b/mephisto/server/blueprints/mock/mock_agent_state.py
@@ -4,56 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from typing import List, Optional, Dict, Any, TYPE_CHECKING
-from mephisto.data_model.blueprint import AgentState
-import os
-import json
-
-if TYPE_CHECKING:
- from mephisto.data_model.agent import Agent
- from mephisto.data_model.packet import Packet
-
-
-class MockAgentState(AgentState):
- """
- Mock agent state that is to be used for testing
- """
-
- def __init__(self, agent: "Agent"):
- """Mock agent states keep everything in local memory"""
- self.agent = agent
- self.state: Dict[str, Any] = {}
- self.init_state: Any = None
-
- def set_init_state(self, data: Any) -> bool:
- """Set the initial state for this agent"""
- if self.init_state is not None:
- # Initial state is already set
- return False
- else:
- self.init_state = data
- self.save_data()
- return True
-
- def get_init_state(self) -> Optional[Dict[str, Any]]:
- """
- Return the initial state for this agent,
- None if no such state exists
- """
- return self.init_state
-
- def load_data(self) -> None:
- """Mock agent states have no data stored"""
- pass
-
- def get_data(self) -> Dict[str, Any]:
- """Return dict of this agent's state"""
- return self.state
-
- def save_data(self) -> None:
- """Mock agents don't save data (yet)"""
- pass
-
- def update_data(self, packet: "Packet") -> None:
- """Put new data into this mock state"""
- self.state = packet.data
+from mephisto.abstractions.blueprints.mock.mock_agent_state import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/blueprints/mock/mock_blueprint.py b/mephisto/server/blueprints/mock/mock_blueprint.py
index 47541f911..2636dc722 100644
--- a/mephisto/server/blueprints/mock/mock_blueprint.py
+++ b/mephisto/server/blueprints/mock/mock_blueprint.py
@@ -4,90 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import (
- Blueprint,
- OnboardingRequired,
- BlueprintArgs,
- SharedTaskState,
+from mephisto.abstractions.blueprints.mock.mock_blueprint import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
)
-from dataclasses import dataclass, field
-from omegaconf import MISSING, DictConfig
-from mephisto.data_model.assignment import InitializationData
-from mephisto.server.blueprints.mock.mock_agent_state import MockAgentState
-from mephisto.server.blueprints.mock.mock_task_runner import MockTaskRunner
-from mephisto.server.blueprints.mock.mock_task_builder import MockTaskBuilder
-from mephisto.core.registry import register_mephisto_abstraction
-
-import os
-import time
-
-from typing import ClassVar, List, Type, Any, Dict, Iterable, TYPE_CHECKING, Optional
-
-if TYPE_CHECKING:
- from mephsito.data_model.agent import OnboardingAgent
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.blueprint import AgentState, TaskRunner, TaskBuilder
- from mephisto.data_model.assignment import Assignment
- from mephisto.data_model.worker import Worker
- from argparse import _ArgumentGroup as ArgumentGroup
-
-BLUEPRINT_TYPE = "mock"
-
-
-@dataclass
-class MockBlueprintArgs(BlueprintArgs):
- _blueprint_type: str = BLUEPRINT_TYPE
- num_assignments: int = field(
- default=MISSING,
- metadata={
- "help": "How many workers you want to do each assignment",
- "required": True,
- },
- )
- use_onboarding: bool = field(
- default=False, metadata={"help": "Whether onboarding should be required"}
- )
- timeout_time: int = field(
- default=0,
- metadata={"help": "Whether acts in the run assignment should have a timeout"},
- )
- is_concurrent: bool = field(
- default=True,
- metadata={"help": "Whether to run this mock task as a concurrent task or not"},
- )
-
-
-@register_mephisto_abstraction()
-class MockBlueprint(Blueprint, OnboardingRequired):
- """Mock of a task type, for use in testing"""
-
- AgentStateClass: ClassVar[Type["AgentState"]] = MockAgentState
- OnboardingAgentStateClass: ClassVar[Type["AgentState"]] = MockAgentState
- TaskBuilderClass: ClassVar[Type["TaskBuilder"]] = MockTaskBuilder
- TaskRunnerClass: ClassVar[Type["TaskRunner"]] = MockTaskRunner
- ArgsClass: ClassVar[Type["BlueprintArgs"]] = MockBlueprintArgs
- supported_architects: ClassVar[List[str]] = ["mock"]
- BLUEPRINT_TYPE = BLUEPRINT_TYPE
-
- def __init__(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- super().__init__(task_run, args, shared_state)
- self.init_onboarding_config(task_run, args, shared_state)
-
- def get_initialization_data(self) -> Iterable[InitializationData]:
- """
- Return the number of empty assignments specified in --num-assignments
- """
- return [
- MockTaskRunner.get_mock_assignment_data()
- for i in range(self.args.blueprint.num_assignments)
- ]
-
- def validate_onboarding(
- self, worker: "Worker", onboarding_agent: "OnboardingAgent"
- ) -> bool:
- """
- Onboarding validation for MockBlueprints just returns the 'should_pass' field
- """
- return onboarding_agent.state.get_data()["should_pass"]
diff --git a/mephisto/server/blueprints/mock/mock_task_builder.py b/mephisto/server/blueprints/mock/mock_task_builder.py
index ab3d82da1..a0281b74e 100644
--- a/mephisto/server/blueprints/mock/mock_task_builder.py
+++ b/mephisto/server/blueprints/mock/mock_task_builder.py
@@ -4,26 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import TaskBuilder
-
-import os
-import time
-
-from typing import ClassVar, List, Type, Any, Dict, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.assignment import Assignment
- from argparse import _ArgumentGroup as ArgumentGroup
-
-
-class MockTaskBuilder(TaskBuilder):
- """Builder for a mock task, for use in testing"""
-
- BUILT_FILE = "done.built"
- BUILT_MESSAGE = "built!"
-
- def build_in_dir(self, build_dir: str):
- """Mock tasks don't really build anything (yet)"""
- with open(os.path.join(build_dir, self.BUILT_FILE), "w+") as built_file:
- built_file.write(self.BUILT_MESSAGE)
+from mephisto.abstractions.blueprints.mock.mock_task_builder import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/blueprints/mock/mock_task_runner.py b/mephisto/server/blueprints/mock/mock_task_runner.py
index 03742e189..d275b08f4 100644
--- a/mephisto/server/blueprints/mock/mock_task_runner.py
+++ b/mephisto/server/blueprints/mock/mock_task_runner.py
@@ -4,108 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import TaskRunner, SharedTaskState
-from mephisto.data_model.assignment import InitializationData
-
-import os
-import time
-
-from typing import ClassVar, List, Type, Any, Dict, Union, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.assignment import Assignment, Unit
- from mephisto.data_model.agent import Agent, OnboardingAgent
- from argparse import _ArgumentGroup as ArgumentGroup
- from omegaconf import DictConfig
-
-
-class MockTaskRunner(TaskRunner):
- """Mock of a task runner, for use in testing"""
-
- def __init__(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- super().__init__(task_run, args, shared_state)
- self.timeout = args.blueprint.timeout_time
- self.tracked_tasks: Dict[str, Union["Assignment", "Unit"]] = {}
- self.is_concurrent = args.blueprint.get("is_concurrent", True)
- print(f"Blueprint is concurrent: {self.is_concurrent}, {args}")
-
- @staticmethod
- def get_mock_assignment_data() -> InitializationData:
- return InitializationData(shared={}, unit_data=[{}, {}])
-
- @staticmethod
- def get_data_for_assignment(assignment: "Assignment") -> InitializationData:
- """
- Mock tasks have no data unless given during testing
- """
- return MockTaskRunner.get_mock_assignment_data()
-
- def get_init_data_for_agent(self, agent: "Agent") -> Dict[str, Any]:
- """
- Return the data for an agent already assigned to a particular unit
- """
- # TODO(#97) implement
- pass
-
- def run_onboarding(self, onboarding_agent: "OnboardingAgent"):
- """
- Mock runners simply wait for an act to come in with whether
- or not onboarding is complete
- """
- packet = onboarding_agent.act(timeout=self.timeout)
- onboarding_agent.did_submit.set()
- onboarding_agent.mark_done()
-
- def run_unit(self, unit: "Unit", agent: "Agent"):
- """
- Mock runners will pass the agents for the given assignment
- all of the required messages to finish a task.
- """
- self.tracked_tasks[unit.db_id] = unit
- time.sleep(0.3)
- assigned_agent = unit.get_assigned_agent()
- assert assigned_agent is not None, "No agent was assigned"
- assert (
- assigned_agent.db_id == agent.db_id
- ), "Task was not given to assigned agent"
- packet = agent.act(timeout=self.timeout)
- if packet is not None:
- agent.observe(packet)
- agent.did_submit.set()
- agent.mark_done()
- del self.tracked_tasks[unit.db_id]
-
- def run_assignment(self, assignment: "Assignment", agents: List["Agent"]):
- """
- Mock runners will pass the agents for the given assignment
- all of the required messages to finish a task.
- """
- self.tracked_tasks[assignment.db_id] = assignment
- agent_dict = {a.db_id: a for a in agents}
- time.sleep(0.3)
- for unit in assignment.get_units():
- assigned_agent = unit.get_assigned_agent()
- assert assigned_agent is not None, "Task was not fully assigned"
- agent = agent_dict.get(assigned_agent.db_id)
- assert agent is not None, "Task was not launched with assigned agents"
- packet = agent.act(timeout=self.timeout)
- if packet is not None:
- agent.observe(packet)
- agent.did_submit.set()
- agent.mark_done()
- del self.tracked_tasks[assignment.db_id]
-
- def cleanup_assignment(self, assignment: "Assignment"):
- """No cleanup required yet for ending mock runs"""
- pass
-
- def cleanup_unit(self, unit: "Unit"):
- """No cleanup required yet for ending mock runs"""
- pass
-
- def cleanup_onboarding(self, onboarding_agent: "OnboardingAgent"):
- """No cleanup required yet for ending onboarding in mocks"""
- pass
+from mephisto.abstractions.blueprints.mock.mock_task_runner import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/blueprints/parlai_chat/parlai_chat_agent_state.py b/mephisto/server/blueprints/parlai_chat/parlai_chat_agent_state.py
index 8b8a34d43..839045189 100644
--- a/mephisto/server/blueprints/parlai_chat/parlai_chat_agent_state.py
+++ b/mephisto/server/blueprints/parlai_chat/parlai_chat_agent_state.py
@@ -4,118 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from typing import List, Optional, Dict, Any, Tuple, TYPE_CHECKING
-from mephisto.data_model.blueprint import AgentState
-from mephisto.data_model.packet import (
- PACKET_TYPE_AGENT_ACTION,
- PACKET_TYPE_UPDATE_AGENT_STATUS,
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_agent_state import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
)
-import os
-import json
-import time
-
-if TYPE_CHECKING:
- from mephisto.data_model.agent import Agent
- from mephisto.data_model.packet import Packet
-
-
-class ParlAIChatAgentState(AgentState):
- """
- Holds information about ParlAI-style chat. Data is stored in json files
- containing every act from the ParlAI world.
- """
-
- def __init__(self, agent: "Agent"):
- """
- Create an AgentState to track the state of an agent's work on a Unit
-
- Initialize with an existing file if it exists.
- """
- self.agent = agent
- data_file = self._get_expected_data_file()
- if os.path.exists(data_file):
- self.load_data()
- else:
- self.messages: List[Dict[str, Any]] = []
- self.init_data = None
- self.save_data()
-
- def set_init_state(self, data: Any) -> bool:
- """Set the initial state for this agent"""
- if self.init_data is not None:
- # Initial state is already set
- return False
- else:
- self.init_data = data
- self.save_data()
- return True
-
- def get_init_state(self) -> Optional[Dict[str, Any]]:
- """
- Return the initial state for this agent,
- None if no such state exists
- """
- if self.init_data is None:
- return None
- return {"task_data": self.init_data, "raw_messages": self.messages}
-
- def _get_expected_data_file(self) -> str:
- """Return the place we would expect to find data for this agent state"""
- agent_dir = self.agent.get_data_dir()
- os.makedirs(agent_dir, exist_ok=True)
- return os.path.join(agent_dir, "state.json")
-
- def load_data(self) -> None:
- """Load stored data from a file to this object"""
- agent_file = self._get_expected_data_file()
- with open(agent_file, "r") as state_json:
- state = json.load(state_json)
- self.messages = state["outputs"]["messages"]
- self.init_data = state["inputs"]
-
- def get_data(self) -> Dict[str, Any]:
- """Return dict with the messages of this agent"""
- return {"outputs": {"messages": self.messages}, "inputs": self.init_data}
-
- def get_parsed_data(self) -> Dict[str, Any]:
- """Return the formatted input, conversations, and final data"""
- init_data = self.init_data
- save_data = None
- messages = [
- m["data"]
- for m in self.messages
- if m["packet_type"] == PACKET_TYPE_AGENT_ACTION
- ]
- agent_name = None
- if len(messages) > 0:
- for m in self.messages:
- if m["packet_type"] == PACKET_TYPE_UPDATE_AGENT_STATUS:
- if "agent_display_name" in m["data"]["state"]:
- agent_name = m["data"]["state"]["agent_display_name"]
- break
- if "MEPHISTO_is_submit" in messages[-1]:
- messages = messages[:-1]
- if "WORLD_DATA" in messages[-1]:
- save_data = messages[-1]["WORLD_DATA"]
- messages = messages[:-1]
- return {
- "agent_name": agent_name,
- "initial_data": init_data,
- "messages": messages,
- "save_data": save_data,
- }
-
- def save_data(self) -> None:
- """Save all messages from this agent to """
- agent_file = self._get_expected_data_file()
- with open(agent_file, "w+") as state_json:
- json.dump(self.get_data(), state_json)
-
- def update_data(self, packet: "Packet") -> None:
- """
- Append the incoming packet as well as who it came from
- """
- message_data = packet.to_sendable_dict()
- message_data["timestamp"] = time.time()
- self.messages.append(message_data)
- self.save_data()
diff --git a/mephisto/server/blueprints/parlai_chat/parlai_chat_blueprint.py b/mephisto/server/blueprints/parlai_chat/parlai_chat_blueprint.py
index 30c192c99..4116289ca 100644
--- a/mephisto/server/blueprints/parlai_chat/parlai_chat_blueprint.py
+++ b/mephisto/server/blueprints/parlai_chat/parlai_chat_blueprint.py
@@ -4,279 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import (
- Blueprint,
- OnboardingRequired,
- BlueprintArgs,
- SharedTaskState,
-)
-from dataclasses import dataclass, field
-from mephisto.data_model.assignment import InitializationData
-from mephisto.server.blueprints.parlai_chat.parlai_chat_agent_state import (
- ParlAIChatAgentState,
-)
-from mephisto.server.blueprints.parlai_chat.parlai_chat_task_runner import (
- ParlAIChatTaskRunner,
-)
-from mephisto.server.blueprints.parlai_chat.parlai_chat_task_builder import (
- ParlAIChatTaskBuilder,
-)
-from mephisto.core.registry import register_mephisto_abstraction
-from omegaconf import DictConfig, MISSING
-
-import os
-import time
-import csv
-import sys
-
-from importlib import import_module
-
-from typing import ClassVar, List, Type, Any, Dict, Iterable, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.worker import Worker
- from mephisto.data_model.agent import Agent, OnboardingAgent
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.blueprint import AgentState, TaskRunner, TaskBuilder
- from mephisto.data_model.assignment import Assignment
- from argparse import _ArgumentGroup as ArgumentGroup
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_blueprint import *
+import warnings
-BLUEPRINT_TYPE = "parlai_chat"
-
-
-MISSING_SOMETHING_TEXT = (
- "
"
- "You didn't specify a task_description_file and also didn't override the "
- "frontend `TaskPreviewView` (if this is a preview) or the `TaskDescription` "
- "component (if this is in-task)."
- "
"
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
)
-
-
-@dataclass
-class SharedParlAITaskState(SharedTaskState):
- frontend_task_opts: Dict[str, Any] = field(default_factory=dict)
- world_opt: Dict[str, Any] = field(default_factory=dict)
- onboarding_world_opt: Dict[str, Any] = field(default_factory=dict)
-
-
-@dataclass
-class ParlAIChatBlueprintArgs(BlueprintArgs):
- _blueprint_type: str = BLUEPRINT_TYPE
- _group: str = field(
- default="ParlAIChatBlueprint",
- metadata={
- "help": """
- Tasks launched from static blueprints need a
- source html file to display to workers, as well as a csv
- containing values that will be inserted into templates in
- the html.
- """
- },
- )
- world_file: str = field(
- default=MISSING,
- metadata={"help": "Path to file containing ParlAI world", "required": True},
- )
- preview_source: str = field(
- default=MISSING,
- metadata={"help": "Optional path to source HTML file to preview the task"},
- )
- task_description_file: str = field(
- default=MISSING,
- metadata={
- "help": (
- "Path to file for the extended description of the task. "
- "Required if not providing a custom source bundle."
- )
- },
- )
- custom_source_bundle: str = field(
- default=MISSING,
- metadata={"help": "Optional path to a fully custom frontend bundle"},
- )
- custom_source_dir: str = field(
- default=MISSING,
- metadata={"help": "Optional path to a directory containing custom js code"},
- )
- extra_source_dir: str = field(
- default=MISSING,
- metadata={
- "help": (
- "Optional path to sources that the frontend may "
- "refer to (such as images/video/css/scripts)"
- )
- },
- )
- context_csv: str = field(
- default=MISSING,
- metadata={"help": "Optional path to csv containing task context"},
- )
- num_conversations: int = field(
- default=MISSING,
- metadata={
- "help": "Optional count of conversations to have if no context provided"
- },
- )
-
-
-@register_mephisto_abstraction()
-class ParlAIChatBlueprint(Blueprint, OnboardingRequired):
- """Blueprint for a task that runs a parlai chat """
-
- AgentStateClass: ClassVar[Type["AgentState"]] = ParlAIChatAgentState
- OnboardingAgentStateClass: ClassVar[Type["AgentState"]] = ParlAIChatAgentState
- TaskBuilderClass: ClassVar[Type["TaskBuilder"]] = ParlAIChatTaskBuilder
- TaskRunnerClass: ClassVar[Type["TaskRunner"]] = ParlAIChatTaskRunner
- ArgsClass = ParlAIChatBlueprintArgs
- SharedStateClass = SharedParlAITaskState
- supported_architects: ClassVar[List[str]] = [
- "mock",
- "heroku",
- "local",
- ] # TODO update?
- BLUEPRINT_TYPE = BLUEPRINT_TYPE
-
- def __init__(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- super().__init__(task_run, args, shared_state)
- self._initialization_data_dicts: List[Dict[str, Any]] = []
- self.init_onboarding_config(task_run, args, shared_state)
-
- if args.blueprint.get("context_csv", None) is not None:
- csv_file = os.path.expanduser(args.blueprint.context_csv)
- with open(csv_file, "r", encoding="utf-8-sig") as csv_fp:
- csv_reader = csv.reader(csv_fp)
- headers = next(csv_reader)
- for row in csv_reader:
- row_data: Dict[str, Any] = {}
- for i, col in enumerate(row):
- row_data[headers[i]] = col
- self._initialization_data_dicts.append(row_data)
- elif args.blueprint.get("num_conversations", None) is not None:
- self._initialization_data_dicts = [{}] * args.blueprint.num_conversations
- else:
- # TODO(#95) handle JSON and python dicts directly
- raise NotImplementedError(
- "Parsing parlai tasks directly from dicts or JSON is not supported yet"
- )
-
- world_file_path = os.path.expanduser(args.blueprint.world_file)
- world_module_path = world_file_path[:-3]
- sys.path.append(world_module_path)
- world_module_name = os.path.basename(world_file_path)[:-3]
- world_module = import_module(world_module_name)
- self.world_module = world_module
- assert hasattr(world_module, "make_world")
- assert hasattr(world_module, "get_world_params")
- self.agent_count = world_module.get_world_params()[ # type: ignore
- "agent_count"
- ]
-
- self.full_task_description = MISSING_SOMETHING_TEXT
- if args.blueprint.get("task_description_file", None) is not None:
- full_path = os.path.expanduser(args.blueprint.task_description_file)
- assert os.path.exists(
- full_path
- ), f"Target task description path {full_path} doesn't exist"
- with open(full_path, "r") as description_fp:
- self.full_task_description = description_fp.read()
-
- @classmethod
- def assert_task_args(
- cls, args: "DictConfig", shared_state: "SharedTaskState"
- ) -> None:
- """Ensure that arguments are properly configured to launch this task"""
- # assert world file is valid
- world_file_path = os.path.expanduser(args.blueprint.world_file)
- world_module_dir = os.path.dirname(world_file_path)
- assert os.path.exists(
- world_file_path
- ), f"Provided world path {world_file_path} doesn't exist"
- sys.path.append(world_module_dir)
- world_module_name = os.path.basename(world_file_path)[:-3]
- world_module = import_module(world_module_name)
- assert hasattr(
- world_module, "make_world"
- ), "Provided world file has no `make_world` method"
- assert hasattr(
- world_module, "get_world_params"
- ), "Provided world file has no `get_world_params` method"
-
- # assert some method for determining quantity of conversations
- if args.blueprint.get("context_csv", None) is not None:
- raise AssertionError(
- "Specifying task quantity via context csv is not yet implemented"
- )
- elif args.blueprint.get("num_conversations", None) is not None:
- assert (
- args.blueprint.num_conversations > 0
- ), "Must have at least one conversation"
- else:
- raise AssertionError(
- "Must specify one of --context-csv or --num-conversations"
- )
-
- if args.blueprint.get("custom_source_bundle", None) is not None:
- custom_source_file_path = os.path.expanduser(
- args.blueprint.custom_source_bundle
- )
- assert os.path.exists(
- custom_source_file_path
- ), f"Provided custom bundle doesn't exist at {custom_source_file_path}"
-
- if args.blueprint.get("custom_source_dir", None) is not None:
- custom_source_dir_path = os.path.expanduser(
- args.blueprint.custom_source_dir
- )
- assert os.path.exists(
- custom_source_dir_path
- ), f"Provided custom source dir doesn't exist at {custom_source_dir_path}"
-
- if args.blueprint.get("preview_source", None) is not None:
- preview_source_file = os.path.expanduser(args.blueprint.preview_source)
- assert os.path.exists(
- preview_source_file
- ), f"Provided preview source doesn't exist at {preview_source_file}"
-
- if args.blueprint.get("extra_source_dir", None) is not None:
- extra_source_dir = os.path.expanduser(args.blueprint.extra_source_dir)
- assert os.path.exists(
- extra_source_dir
- ), f"Provided extra resource dir doesn't exist at {extra_source_dir}"
-
- def get_frontend_args(self) -> Dict[str, Any]:
- """
- Specifies what options within a task_config should be fowarded
- to the client for use by the task's frontend
- """
- # TODO move frontend args in
- frontend_task_config = {
- "task_description": self.full_task_description,
- "frame_height": 650,
- "chat_title": self.args.task.task_title,
- "has_preview": self.args.blueprint.get("preview_source", None) is not None,
- "block_mobile": True,
- "frontend_task_opts": self.shared_state.frontend_task_opts,
- }
- frontend_task_config.update(super().get_frontend_args())
- return frontend_task_config
-
- def get_initialization_data(self) -> Iterable["InitializationData"]:
- """
- Return the InitializationData retrieved from the specified stream
- """
- return [
- InitializationData(shared=d, unit_data=[{}] * self.agent_count)
- for d in self._initialization_data_dicts
- ]
-
- def validate_onboarding(
- self, worker: "Worker", onboarding_agent: "OnboardingAgent"
- ) -> bool:
- if hasattr(self.world_module, "validate_onboarding"):
- return self.world_module.validate_onboarding( # type: ignore
- onboarding_agent.state.get_data()
- )
- return True
diff --git a/mephisto/server/blueprints/parlai_chat/parlai_chat_task_builder.py b/mephisto/server/blueprints/parlai_chat/parlai_chat_task_builder.py
index d7d4d9a22..63fee1ad4 100644
--- a/mephisto/server/blueprints/parlai_chat/parlai_chat_task_builder.py
+++ b/mephisto/server/blueprints/parlai_chat/parlai_chat_task_builder.py
@@ -4,180 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import TaskBuilder
-
-from distutils.dir_util import copy_tree
-import os
-import time
-import sh
-import shutil
-import subprocess
-
-from typing import ClassVar, List, Type, Any, Dict, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.assignment import Assignment
-
-PARLAI_TASK_DIR = os.path.dirname(__file__)
-FRONTEND_SOURCE_DIR = os.path.join(PARLAI_TASK_DIR, "webapp")
-FRONTEND_BUILD_DIR = os.path.join(FRONTEND_SOURCE_DIR, "build")
-
-BUILT_FILE = "done.built"
-CUSTOM_BUILD_DIRNAME = "_generated"
-
-
-class ParlAIChatTaskBuilder(TaskBuilder):
- """
- Builder for a parlai chat task, pulls the appropriate html,
- builds the frontend (if a build doesn't already exist),
- then puts the file into the server directory
- """
-
- BUILT_FILE = BUILT_FILE
- BUILT_MESSAGE = "built!"
-
- def rebuild_core(self):
- """Rebuild the frontend for this task"""
- return_dir = os.getcwd()
- os.chdir(FRONTEND_SOURCE_DIR)
- if os.path.exists(FRONTEND_BUILD_DIR):
- shutil.rmtree(FRONTEND_BUILD_DIR)
- packages_installed = subprocess.call(["npm", "install"])
- if packages_installed != 0:
- raise Exception(
- "please make sure npm is installed, otherwise view "
- "the above error for more info."
- )
-
- webpack_complete = subprocess.call(["npm", "run", "dev"])
- if webpack_complete != 0:
- raise Exception(
- "Webpack appears to have failed to build your "
- "frontend. See the above error for more information."
- )
- os.chdir(return_dir)
-
- def build_and_return_custom_bundle(self, custom_src_dir):
- """Locate all of the custom files used for a custom build, create
- a prebuild directory containing all of them, then build the
- custom source.
-
- Check dates to only go through this build process when files have changes
- """
- # TODO add custom component directories, and recursively check those
- TARGET_BUILD_FILES = {"main.js": "src/main.js", "package.json": "package.json"}
-
- prebuild_path = os.path.join(custom_src_dir, CUSTOM_BUILD_DIRNAME)
- build_path = os.path.join(prebuild_path, "build", "bundle.js")
-
- # see if we need to rebuild
- if os.path.exists(build_path):
- created_date = os.path.getmtime(build_path)
- up_to_date = True
- for fn in TARGET_BUILD_FILES.keys():
- possible_conflict = os.path.join(custom_src_dir, fn)
- if os.path.exists(possible_conflict):
- if os.path.getmtime(possible_conflict) > created_date:
- up_to_date = False
- break
- if up_to_date:
- return build_path
-
- # build anew
- REQUIRED_SOURCE_FILES = [
- ".babelrc",
- ".eslintrc",
- "package.json",
- "webpack.config.js",
- ]
- REQUIRED_SOURCE_DIRS = ["src"]
- if not os.path.exists(os.path.join(prebuild_path, "build")):
- os.makedirs(os.path.join(prebuild_path, "build"), exist_ok=True)
-
- # Copy default files
- for src_dir in REQUIRED_SOURCE_DIRS:
- src_path = os.path.join(FRONTEND_SOURCE_DIR, src_dir)
- dst_path = os.path.join(prebuild_path, src_dir)
- if os.path.exists(dst_path):
- shutil.rmtree(dst_path)
- shutil.copytree(src_path, dst_path)
- for src_file in REQUIRED_SOURCE_FILES:
- src_path = os.path.join(FRONTEND_SOURCE_DIR, src_file)
- dst_path = os.path.join(prebuild_path, src_file)
- shutil.copy2(src_path, dst_path)
-
- # copy custom files
- for src_file in TARGET_BUILD_FILES.keys():
- src_path = os.path.join(custom_src_dir, src_file)
- if os.path.exists(src_path):
- dst_path = os.path.join(prebuild_path, TARGET_BUILD_FILES[src_file])
- shutil.copy2(src_path, dst_path)
-
- # navigate and build
- return_dir = os.getcwd()
- os.chdir(prebuild_path)
- packages_installed = subprocess.call(["npm", "install"])
- if packages_installed != 0:
- raise Exception(
- "please make sure npm is installed, otherwise view "
- "the above error for more info."
- )
-
- webpack_complete = subprocess.call(["npm", "run", "dev"])
- if webpack_complete != 0:
- raise Exception(
- "Webpack appears to have failed to build your "
- "frontend. See the above error for more information."
- )
-
- # cleanup and return
- os.chdir(return_dir)
- return build_path
-
- def build_in_dir(self, build_dir: str):
- """Build the frontend if it doesn't exist, then copy into the server directory"""
- # Only build this task if it hasn't already been built
- if not os.path.exists(FRONTEND_BUILD_DIR):
- self.rebuild_core()
-
- custom_source_dir = self.args.blueprint.get("custom_source_dir", None)
- build_bundle = None
- if custom_source_dir is not None:
- custom_source_dir = os.path.expanduser(custom_source_dir)
- build_bundle = self.build_and_return_custom_bundle(custom_source_dir)
-
- # Copy over the preview file as preview.html, use the default if none specified
- target_resource_dir = os.path.join(build_dir, "static")
- preview_file = self.args.blueprint.get("preview_source", None)
- if preview_file is not None:
- use_preview_file = os.path.expanduser(preview_file)
- target_path = os.path.join(target_resource_dir, "preview.html")
- shutil.copy2(use_preview_file, target_path)
-
- # If any additional task files are required via a source_dir, copy those as well
- extra_dir_path = self.args.blueprint.get("extra_source_dir", None)
- if extra_dir_path is not None:
- extra_dir_path = os.path.expanduser(extra_dir_path)
- copy_tree(extra_dir_path, target_resource_dir)
-
- bundle_js_file = self.args.blueprint.get("custom_source_bundle", None)
- if bundle_js_file is None:
- if build_bundle is not None:
- bundle_js_file = build_bundle
- else:
- bundle_js_file = os.path.join(FRONTEND_BUILD_DIR, "bundle.js")
- target_path = os.path.join(target_resource_dir, "bundle.js")
- shutil.copy2(bundle_js_file, target_path)
-
- # Copy over the static files for this task:
- for fin_file in ["index.html", "notif.mp3"]:
- copied_static_file = os.path.join(
- FRONTEND_SOURCE_DIR, "src", "static", fin_file
- )
- target_path = os.path.join(target_resource_dir, fin_file)
- shutil.copy2(copied_static_file, target_path)
-
- # Write a built file confirmation
- with open(os.path.join(build_dir, self.BUILT_FILE), "w+") as built_file:
- built_file.write(self.BUILT_MESSAGE)
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_task_builder import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/blueprints/parlai_chat/parlai_chat_task_runner.py b/mephisto/server/blueprints/parlai_chat/parlai_chat_task_runner.py
index 78b811070..fe5a4c273 100644
--- a/mephisto/server/blueprints/parlai_chat/parlai_chat_task_runner.py
+++ b/mephisto/server/blueprints/parlai_chat/parlai_chat_task_runner.py
@@ -4,269 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import TaskRunner
-from mephisto.data_model.agent import Agent, OnboardingAgent
-
-try:
- from parlai.core.agents import Agent as ParlAIAgent
- from parlai.core.message import Message
-except:
-
- class ParlAIAgent:
- def __init__(self, *args, **kwargs):
- raise NotImplementedError(
- "You need to install ParlAI to use this blueprint"
- )
-
- class Message:
- def __init__(self, *args, **kwargs):
- raise NotImplementedError(
- "You need to install ParlAI to use this blueprint"
- )
-
- pass # ParlAI is not installed. TODO remove when we move this blueprint to ParlAI
-
-from mephisto.data_model.packet import (
- Packet,
- PACKET_TYPE_AGENT_ACTION,
- PACKET_TYPE_UPDATE_AGENT_STATUS,
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_task_runner import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
)
-
-from importlib import import_module
-
-import os
-import sh
-import shlex
-import shutil
-import subprocess
-import sys
-from uuid import uuid4
-
-from typing import ClassVar, List, Type, Any, Dict, Union, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.blueprint import AgentState
- from mephsito.data_model.assignment import Assignment
-
-
-class MephistoAgentWrapper(ParlAIAgent):
- """
- Class that wraps a mephisto agent to be used as an
- agent in ParlAI worlds
- """
-
- def __init__(self, agent: Union[Agent, OnboardingAgent]):
- self.mephisto_agent = agent
- self.__agent_id = "unnamed agent"
- self.__mephisto_agent_id = agent.get_agent_id()
-
- @property
- def agent_id(self):
- """
- Agent IDs in ParlAI are used to identify the speaker,
- and often are a label like "teacher"
- """
- return self.__agent_id
-
- @agent_id.setter
- def agent_id(self, new_agent_id: str):
- """
- We want to be able to display these labels to the
- frontend users, so when these are updated by a
- world we forward that to the frontend
- """
- packaged_act = Packet(
- packet_type=PACKET_TYPE_UPDATE_AGENT_STATUS,
- sender_id="mephisto",
- receiver_id=self.__mephisto_agent_id,
- data={"state": {"agent_display_name": new_agent_id}},
- )
- self.mephisto_agent.observe(packaged_act)
- self.__agent_id = new_agent_id
-
- def act(self, timeout=None):
- """
- ParlAI Agents send an act dict, we must convert this
- """
- if timeout is None:
- gotten_act = self.mephisto_agent.act()
- else:
- gotten_act = self.mephisto_agent.act(timeout=timeout)
- if gotten_act is None:
- return None
- parsed_act = gotten_act.data
- parsed_act["id"] = self.__agent_id
- return Message(parsed_act)
-
- def observe(self, act):
- """
- ParlAI Agents observe a dict, we must convert these to packets?
- """
- if act.get("message_id") is None:
- act["message_id"] = str(uuid4())
- packaged_act = Packet(
- packet_type=PACKET_TYPE_AGENT_ACTION,
- sender_id="mephisto",
- receiver_id=self.__mephisto_agent_id,
- data=act,
- )
- self.mephisto_agent.observe(packaged_act)
-
-
-class ParlAIChatTaskRunner(TaskRunner):
- """
- Task runner for a parlai chat task
- """
-
- def __init__(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- super().__init__(task_run, args, shared_state)
- world_file_path = os.path.expanduser(args.blueprint.world_file)
- world_module_path = world_file_path[:-3]
- sys.path.append(world_module_path)
- world_module_name = os.path.basename(world_file_path)[:-3]
- self.parlai_world_module = import_module(world_module_name)
- world_params = self.parlai_world_module.get_world_params()
- self.is_concurrent = world_params["agent_count"] > 1
- self.id_to_worlds: Dict[str, Any] = {}
-
- def get_init_data_for_agent(self, agent: "Agent") -> Dict[str, Any]:
- """
- Return the data for an agent already assigned to a particular unit
- """
- init_state = agent.state.get_init_state()
- if init_state is not None:
- # reconnecting agent, give what we've got
- return init_state
- else:
- assignment = agent.get_unit().get_assignment()
- assignment_data = self.get_data_for_assignment(assignment)
- agent.state.set_init_state(assignment_data.shared)
- new_state = agent.state.get_init_state()
- assert new_state is not None, "Recently initialized state still None"
- return new_state
-
- def get_world_id(self, world_type: str, extra_id: str) -> str:
- """Get a world id specific to the given world type"""
- return f"{world_type}-{extra_id}"
-
- def run_onboarding(self, agent: "OnboardingAgent") -> None:
- """
- ParlAI Onboarding will initialize an onboarding
- world, then run it to completion if possible
- """
- opt: Dict[str, Any] = self.shared_state.onboarding_world_opt
- parlai_agent = MephistoAgentWrapper(agent)
- world = self.parlai_world_module.make_onboarding_world( # type: ignore
- opt, parlai_agent
- )
- world_id = self.get_world_id("onboard", agent.get_agent_id())
- self.id_to_worlds[world_id] = world
- while (
- not world.episode_done()
- and agent.get_agent_id() in self.running_onboardings
- ):
- world.parley()
- world.shutdown()
- if hasattr(world, "prep_save_data"):
- agent.observe(
- Packet(
- packet_type=PACKET_TYPE_AGENT_ACTION,
- sender_id="mephisto",
- receiver_id=agent.db_id,
- data={
- "id": "SUBMIT_WORLD_DATA",
- "WORLD_DATA": world.prep_save_data([parlai_agent]),
- "text": "",
- },
- )
- )
-
- def cleanup_onboarding(self, agent: "OnboardingAgent") -> None:
- """Shutdown the world"""
- onboarding_id = agent.get_agent_id()
- world_id = self.get_world_id("onboard", onboarding_id)
- self.id_to_worlds[world_id].shutdown()
- del self.id_to_worlds[world_id]
-
- def run_assignment(self, assignment: "Assignment", agents: List["Agent"]) -> None:
- """
- ParlAI runners will initialize a task world, then run them to completion
- if possible
- """
- for agent in agents:
- assert agent is not None, "task was not fully assigned"
- opt: Dict[str, Any] = self.shared_state.world_opt
- parlai_agents = [MephistoAgentWrapper(a) for a in agents]
- world = self.parlai_world_module.make_world(opt, parlai_agents) # type: ignore
- world_id = self.get_world_id("assignment", assignment.db_id)
- self.id_to_worlds[world_id] = world
- while not world.episode_done() and assignment.db_id in self.running_assignments:
- world.parley()
-
- # TODO(WISH) it would be nice to have individual agents be able to submit their
- # final things without needing to wait for their partner, such
- # as if one needs to rate and the other doesn't
-
- world.shutdown()
- if hasattr(world, "prep_save_data"):
- for idx in range(len(parlai_agents)):
- agents[idx].observe(
- Packet(
- packet_type=PACKET_TYPE_AGENT_ACTION,
- sender_id="mephisto",
- receiver_id=agents[idx].db_id,
- data={
- "id": "SUBMIT_WORLD_DATA",
- "WORLD_DATA": world.prep_save_data([parlai_agents[idx]]),
- "text": "",
- },
- )
- )
-
- def cleanup_assignment(self, assignment: "Assignment") -> None:
- """Handle cleanup for a specific assignment"""
- world_id = self.get_world_id("assignment", assignment.db_id)
- self.id_to_worlds[world_id].shutdown()
- del self.id_to_worlds[world_id]
-
- def run_unit(self, unit: "Unit", agent: "Agent") -> None:
- """
- ParlAI runners will initialize a task world, then run them to completion
- if possible
- """
- agents = [agent]
- opt: Dict[str, Any] = self.shared_state.world_opt
- parlai_agents = [MephistoAgentWrapper(a) for a in agents]
- world = self.parlai_world_module.make_world(opt, parlai_agents) # type: ignore
- world_id = self.get_world_id("unit", unit.db_id)
- self.id_to_worlds[world_id] = world
- while not world.episode_done() and unit.db_id in self.running_units:
- world.parley()
-
- # TODO(WISH) it would be nice to have individual agents be able to submit their
- # final things without needing to wait for their partner, such
- # as if one needs to rate and the other doesn't
-
- world.shutdown()
- if hasattr(world, "prep_save_data"):
- agent.observe(
- Packet(
- packet_type=PACKET_TYPE_AGENT_ACTION,
- sender_id="mephisto",
- receiver_id=agent.db_id,
- data={
- "id": "SUBMIT_WORLD_DATA",
- "WORLD_DATA": world.prep_save_data(parlai_agents),
- "text": "",
- },
- )
- )
-
- def cleanup_unit(self, unit: "Unit") -> None:
- """Handle cleanup for a specific unit"""
- world_id = self.get_world_id("unit", unit.db_id)
- self.id_to_worlds[world_id].shutdown()
- del self.id_to_worlds[world_id]
diff --git a/mephisto/server/blueprints/static_react_task/static_react_blueprint.py b/mephisto/server/blueprints/static_react_task/static_react_blueprint.py
index b2e6ca124..9b59d2478 100644
--- a/mephisto/server/blueprints/static_react_task/static_react_blueprint.py
+++ b/mephisto/server/blueprints/static_react_task/static_react_blueprint.py
@@ -4,91 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.assignment import InitializationData
-from dataclasses import dataclass, field
-from omegaconf import MISSING
-from mephisto.server.blueprints.abstract.static_task.static_blueprint import (
- StaticBlueprint,
- StaticBlueprintArgs,
+from mephisto.abstractions.blueprints.static_react_task.static_react_blueprint import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
)
-from mephisto.server.blueprints.static_react_task.static_react_task_builder import (
- StaticReactTaskBuilder,
-)
-from mephisto.core.registry import register_mephisto_abstraction
-
-import os
-import time
-import csv
-
-from typing import ClassVar, List, Type, Any, Dict, Iterable, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.blueprint import AgentState, TaskRunner, TaskBuilder
- from mephisto.data_model.assignment import Assignment
- from argparse import _ArgumentGroup as ArgumentGroup
-
-BLUEPRINT_TYPE = "static_react_task"
-
-
-@dataclass
-class StaticReactBlueprintArgs(StaticBlueprintArgs):
- """
- StaticReactBlueprint: Tasks launched from static blueprints need
- a prebuilt javascript bundle containing the task. We suggest building
- with our provided useMephistoTask hook.
- """
-
- _blueprint_type: str = BLUEPRINT_TYPE
- _group: str = field(
- default="StaticReactBlueprint",
- metadata={
- "help": """
- Tasks launched from static blueprints need
- a prebuilt javascript bundle containing the task. We suggest building
- with our provided useMephistoTask hook.
- """
- },
- )
- task_source: str = field(
- default=MISSING,
- metadata={
- "help": "Path to file containing javascript bundle for the task",
- "required": True,
- },
- )
-
-
-@register_mephisto_abstraction()
-class StaticReactBlueprint(StaticBlueprint):
- """Blueprint for a task that runs off of a built react javascript bundle"""
-
- TaskBuilderClass: ClassVar[Type["TaskBuilder"]] = StaticReactTaskBuilder
- ArgsClass = StaticReactBlueprintArgs
- BLUEPRINT_TYPE = BLUEPRINT_TYPE
-
- def __init__(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- super().__init__(task_run, args, shared_state)
- self.js_bundle = os.path.expanduser(args.blueprint.task_source)
- if not os.path.exists(self.js_bundle):
- raise FileNotFoundError(
- f"Specified bundle file {self.js_bundle} was not found from {os.getcwd()}"
- )
-
- @classmethod
- def assert_task_args(
- cls, args: "DictConfig", shared_state: "SharedTaskState"
- ) -> None:
- """Ensure that static requirements are fulfilled, and source file exists"""
- super().assert_task_args(args, shared_state)
-
- found_task_source = args.blueprint.task_source
- assert (
- found_task_source is not None
- ), "Must provide a path to a javascript bundle in `task_source`"
- found_task_path = os.path.expanduser(found_task_source)
- assert os.path.exists(
- found_task_path
- ), f"Provided task source {found_task_path} does not exist."
diff --git a/mephisto/server/blueprints/static_react_task/static_react_task_builder.py b/mephisto/server/blueprints/static_react_task/static_react_task_builder.py
index 5cf790721..56f822977 100644
--- a/mephisto/server/blueprints/static_react_task/static_react_task_builder.py
+++ b/mephisto/server/blueprints/static_react_task/static_react_task_builder.py
@@ -4,37 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import TaskBuilder
-
-from distutils.dir_util import copy_tree
-import os
-import shutil
-
-
-class StaticReactTaskBuilder(TaskBuilder):
- """
- Builder for a static task, puts required files into
- the server directory for deployment.
- """
-
- BUILT_FILE = "done.built"
- BUILT_MESSAGE = "built!"
-
- def build_in_dir(self, build_dir: str):
- """Build the frontend if it doesn't exist, then copy into the server directory"""
- target_resource_dir = os.path.join(build_dir, "static")
-
- # If any additional task files are required via a source_dir, copy those as well
- extra_dir_path = self.args.blueprint.get("extra_source_dir", None)
- if extra_dir_path is not None:
- extra_dir_path = os.path.expanduser(extra_dir_path)
- copy_tree(extra_dir_path, target_resource_dir)
-
- # Copy the built core and the given task file to the target path
- use_bundle = os.path.expanduser(self.args.blueprint.task_source)
- target_path = os.path.join(target_resource_dir, "bundle.js")
- shutil.copy2(use_bundle, target_path)
-
- # Write a built file confirmation
- with open(os.path.join(build_dir, self.BUILT_FILE), "w+") as built_file:
- built_file.write(self.BUILT_MESSAGE)
+from mephisto.abstractions.blueprints.static_react_task.static_react_task_builder import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/blueprints/static_task/static_html_blueprint.py b/mephisto/server/blueprints/static_task/static_html_blueprint.py
index 48b4cc159..9c9b0a963 100644
--- a/mephisto/server/blueprints/static_task/static_html_blueprint.py
+++ b/mephisto/server/blueprints/static_task/static_html_blueprint.py
@@ -4,151 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.server.blueprints.abstract.static_task.static_blueprint import (
- StaticBlueprint,
- StaticBlueprintArgs,
+from mephisto.abstractions.blueprints.static_html_task.static_html_blueprint import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
)
-from dataclasses import dataclass, field
-from omegaconf import MISSING, DictConfig
-from mephisto.server.blueprints.static_task.static_html_task_builder import (
- StaticHTMLTaskBuilder,
-)
-from mephisto.core.registry import register_mephisto_abstraction
-
-import os
-import time
-import csv
-
-from typing import ClassVar, List, Type, Any, Dict, Iterable, Optional, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.blueprint import (
- AgentState,
- TaskRunner,
- TaskBuilder,
- SharedTaskState,
- )
- from mephisto.data_model.assignment import Assignment
- from mephisto.data_model.agent import OnboardingAgent
- from mephisto.data_model.worker import Worker
- from argparse import _ArgumentGroup as ArgumentGroup
-
-BLUEPRINT_TYPE = "static_task"
-
-
-@dataclass
-class StaticHTMLBlueprintArgs(StaticBlueprintArgs):
- """
- Adds required options for StaticBlueprints.
-
- task_source points to the file intending to be deployed for this task
- data_csv has the data to be deployed for this task.
- """
-
- _blueprint_type: str = BLUEPRINT_TYPE
- _group: str = field(
- default="StaticBlueprint",
- metadata={
- "help": (
- "Tasks launched from static blueprints need a "
- "source html file to display to workers, as well as a csv "
- "containing values that will be inserted into templates in "
- "the html. "
- )
- },
- )
- task_source: str = field(
- default=MISSING,
- metadata={
- "help": "Path to source HTML file for the task being run",
- "required": True,
- },
- )
- preview_source: Optional[str] = field(
- default=MISSING,
- metadata={"help": "Optional path to source HTML file to preview the task"},
- )
- onboarding_source: Optional[str] = field(
- default=MISSING,
- metadata={"help": "Optional path to source HTML file to onboarding the task"},
- )
-
-
-@register_mephisto_abstraction()
-class StaticHTMLBlueprint(StaticBlueprint):
- """Blueprint for a task that runs off of a built react javascript bundle"""
-
- TaskBuilderClass = StaticHTMLTaskBuilder
- ArgsClass = StaticHTMLBlueprintArgs
- BLUEPRINT_TYPE = BLUEPRINT_TYPE
-
- def __init__(
- self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"
- ):
- super().__init__(task_run, args, shared_state)
- self.html_file = os.path.expanduser(args.blueprint.task_source)
- if not os.path.exists(self.html_file):
- raise FileNotFoundError(
- f"Specified html file {self.html_file} was not found from {os.getcwd()}"
- )
-
- self.onboarding_html_file = args.blueprint.get("onboarding_source", None)
- if self.onboarding_html_file is not None:
- self.onboarding_html_file = os.path.expanduser(self.onboarding_html_file)
- if not os.path.exists(self.onboarding_html_file):
- raise FileNotFoundError(
- f"Specified onboarding html file {self.onboarding_html_file} was not found from {os.getcwd()}"
- )
-
- task_file_name = os.path.basename(self.html_file)
- for entry in self._initialization_data_dicts:
- entry["html"] = task_file_name
-
- @classmethod
- def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
- """Ensure that the data can be properly loaded"""
- blue_args = args.blueprint
- if blue_args.get("data_csv", None) is not None:
- csv_file = os.path.expanduser(blue_args.data_csv)
- assert os.path.exists(
- csv_file
- ), f"Provided csv file {csv_file} doesn't exist"
- elif blue_args.get("data_json", None) is not None:
- json_file = os.path.expanduser(blue_args.data_json)
- assert os.path.exists(
- json_file
- ), f"Provided JSON file {json_file} doesn't exist"
- elif blue_args.get("data_jsonl", None) is not None:
- jsonl_file = os.path.expanduser(blue_args.data_jsonl)
- assert os.path.exists(
- jsonl_file
- ), f"Provided JSON-L file {jsonl_file} doesn't exist"
- elif shared_state.static_task_data is not None:
- assert (
- len(shared_state.static_task_data) > 0
- ), "Length of data dict provided was 0"
- else:
- raise AssertionError(
- "Must provide one of a data csv, json, json-L, or a list of tasks"
- )
-
- if blue_args.get("onboarding_qualification", None) is not None:
- assert blue_args.get("onboarding_source", None) is not None, (
- "Must use onboarding html with an onboarding qualification to "
- "use onboarding."
- )
- assert shared_state.validate_onboarding is not None, (
- "Must use an onboarding validation function to use onboarding "
- "with static tasks."
- )
-
- def validate_onboarding(
- self, worker: "Worker", onboarding_agent: "OnboardingAgent"
- ) -> bool:
- """
- Check the incoming onboarding data and evaluate if the worker
- has passed the qualification or not. Return True if the worker
- has qualified.
- """
- return self.shared_state.validate_onboarding(onboarding_agent.state.get_data())
diff --git a/mephisto/server/blueprints/static_task/static_html_task_builder.py b/mephisto/server/blueprints/static_task/static_html_task_builder.py
index 7bcab8b59..c66cccc58 100644
--- a/mephisto/server/blueprints/static_task/static_html_task_builder.py
+++ b/mephisto/server/blueprints/static_task/static_html_task_builder.py
@@ -4,95 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from mephisto.data_model.blueprint import TaskBuilder
-
-from distutils.dir_util import copy_tree
-import os
-import time
-import sh
-import shutil
-import subprocess
-
-from typing import ClassVar, List, Type, Any, Dict, TYPE_CHECKING
-
-if TYPE_CHECKING:
- from mephisto.data_model.task import TaskRun
- from mephisto.data_model.assignment import Assignment
-
-STATIC_TASK_DIR = os.path.dirname(__file__)
-FRONTEND_SOURCE_DIR = os.path.join(STATIC_TASK_DIR, "source")
-FRONTEND_BUILD_DIR = os.path.join(FRONTEND_SOURCE_DIR, "build")
-
-
-class StaticHTMLTaskBuilder(TaskBuilder):
- """
- Builder for a static task, pulls the appropriate html,
- builds the frontend (if a build doesn't already exist),
- then puts the file into the server directory
- """
-
- BUILT_FILE = "done.built"
- BUILT_MESSAGE = "built!"
-
- def rebuild_core(self):
- """Rebuild the frontend for this task"""
- return_dir = os.getcwd()
- os.chdir(FRONTEND_SOURCE_DIR)
- if os.path.exists(FRONTEND_BUILD_DIR):
- shutil.rmtree(FRONTEND_BUILD_DIR)
- packages_installed = subprocess.call(["npm", "install"])
- if packages_installed != 0:
- raise Exception(
- "please make sure npm is installed, otherwise view "
- "the above error for more info."
- )
-
- webpack_complete = subprocess.call(["npm", "run", "dev"])
- if webpack_complete != 0:
- raise Exception(
- "Webpack appears to have failed to build your "
- "frontend. See the above error for more information."
- )
- os.chdir(return_dir)
-
- def build_in_dir(self, build_dir: str):
- """Build the frontend if it doesn't exist, then copy into the server directory"""
- # Only build this task if it hasn't already been built
- if True: # not os.path.exists(FRONTEND_BUILD_DIR):
- self.rebuild_core()
-
- # Copy the built core and the given task file to the target path
- use_html_file = os.path.expanduser(self.args.blueprint["task_source"])
-
- target_resource_dir = os.path.join(build_dir, "static")
- file_name = os.path.basename(use_html_file)
- target_path = os.path.join(target_resource_dir, file_name)
- shutil.copy2(use_html_file, target_path)
-
- # Copy over the preview file as preview.html, default to the task file if none specified
- preview_file = self.args.blueprint.get("preview_source") or use_html_file
- use_preview_file = os.path.expanduser(preview_file)
-
- target_path = os.path.join(target_resource_dir, "preview.html")
- shutil.copy2(use_preview_file, target_path)
-
- # Copy over the onboarding file as onboarding.html if it's specified
- onboarding_html_file = self.args.blueprint.get("onboarding_source", None)
- if onboarding_html_file is not None:
- onboarding_html_file = os.path.expanduser(onboarding_html_file)
- target_path = os.path.join(target_resource_dir, "onboarding.html")
- shutil.copy2(onboarding_html_file, target_path)
-
- # If any additional task files are required via a source_dir, copy those as well
- extra_dir_path = self.args.blueprint.get("extra_source_dir")
- if extra_dir_path is not None:
- extra_dir_path = os.path.expanduser(extra_dir_path)
- copy_tree(extra_dir_path, target_resource_dir)
-
- bundle_js_file = os.path.join(FRONTEND_BUILD_DIR, "bundle.js")
- target_path = os.path.join(target_resource_dir, "bundle.js")
- shutil.copy2(bundle_js_file, target_path)
-
- # Write a built file confirmation
- with open(os.path.join(build_dir, self.BUILT_FILE), "w+") as built_file:
- built_file.write(self.BUILT_MESSAGE)
+from mephisto.abstractions.blueprints.static_html_task.static_html_task_builder import *
+import warnings
+
+warnings.warn(
+ "Import of blueprints from `mephisto.server.blueprints` is going away soon. "
+ "Please replace all of your imports from mephisto.server.blueprints "
+ "to mephisto.abstractions.blueprints ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/channels/channel.py b/mephisto/server/channels/channel.py
index e9bd52bc1..26f940042 100644
--- a/mephisto/server/channels/channel.py
+++ b/mephisto/server/channels/channel.py
@@ -4,83 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from abc import ABC, abstractmethod
-
-from typing import Callable
-from mephisto.data_model.packet import Packet
-
-STATUS_CHECK_TIME = 4
-
-
-class Channel(ABC):
- """
- Manages the API between the Supervisor and the server that is produced
- by the architect.
-
- Should be able to be configured by an architect, and used to communicate
- with that server based on the queries that a Supervisor needs to run a job
- """
-
- def __init__(
- self,
- channel_id: str,
- on_channel_open: Callable[[str], None],
- on_catastrophic_disconnect: Callable[[str], None],
- on_message: Callable[[str, Packet], None],
- ):
- """
- Create a channel by the given id, and initialize any resources that
- will later be required during the `open` call.
-
- Children classes will likely need to accept additional parameters
-
- on_channel_open should be called when the channel is first alive.
- It takes the channel id as the only argument.
- on_catastrophic_disconnect should only be called if the channel
- is entirely unable to connect to the server and any ongoing
- jobs should be killed.
- It takes the channel id as the only argument.
- on_message should be called whenever this channel receives a message
- from the server.
- It takes the channel id as the first argument and the received
- packet as the second argument.
- """
- self.channel_id = channel_id
- self.on_channel_open = on_channel_open
- self.on_catastrophic_disconnect = on_catastrophic_disconnect
- self.on_message = on_message
-
- @abstractmethod
- def is_closed(self):
- """
- Return whether or not this connection has been explicitly closed
- by the supervisor or another source.
- """
-
- @abstractmethod
- def close(self):
- """
- Close this channel, and ensure that all threads and surrounding
- resources are cleaned up
- """
-
- @abstractmethod
- def is_alive(self):
- """
- Return if this channel is actively able to send/recieve messages.
- Should be False until a connection has been established with the
- server.
- """
-
- @abstractmethod
- def open(self):
- """
- Do whatever is necessary to 'connect' this socket to the server
- """
-
- @abstractmethod
- def send(self, packet: "Packet") -> bool:
- """
- Send the packet given to the intended recipient.
- Return True on success and False on failure.
- """
+from mephisto.abstractions.channel import *
+import warnings
+
+warnings.warn(
+ "Import of the base Channel from `mephisto.server.channels.channel` is going away soon. "
+ "Please replace all of your imports from mephisto.server.channels.channel "
+ "to mephisto.abstractions.channel ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/server/channels/websocket_channel.py b/mephisto/server/channels/websocket_channel.py
index df94d16e2..851a4d557 100644
--- a/mephisto/server/channels/websocket_channel.py
+++ b/mephisto/server/channels/websocket_channel.py
@@ -4,172 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from typing import Callable, Optional
-from mephisto.data_model.packet import Packet
-from mephisto.server.channels.channel import Channel, STATUS_CHECK_TIME
-
-import errno
-import websocket
-import threading
-import json
-import time
-
-from mephisto.core.logger_core import get_logger
-
-logger = get_logger(name=__name__, verbose=True, level="info")
-
-
-class WebsocketChannel(Channel):
- """
- Channel for communicating with a server via websockets.
- """
-
- def __init__(
- self,
- channel_id: str,
- on_channel_open: Callable[[str], None],
- on_catastrophic_disconnect: Callable[[str], None],
- on_message: Callable[[str, Packet], None],
- socket_url: str,
- ):
- """
- Create a channel by the given name, and initialize any resources that
- will later be required during the `open` call.
-
- Requires a socket_url to connect with.
- """
- super().__init__(
- channel_id=channel_id,
- on_channel_open=on_channel_open,
- on_catastrophic_disconnect=on_catastrophic_disconnect,
- on_message=on_message,
- )
- self.socket_url = socket_url
- self.socket: Optional[websocket.WebSocketApp] = None
- self.thread: Optional[threading.Thread] = None
- self._is_alive = False
- self._is_closed = False
-
- def is_closed(self):
- """
- Return whether or not this connection has been explicitly closed
- by the supervisor or another source.
- """
- return self._is_closed
-
- def close(self):
- """
- Close this channel, and ensure that all threads and surrounding
- resources are cleaned up
- """
- self._is_closed = True
- try:
- self.socket.close()
- except Exception:
- # socket already closed
- pass
- self._is_alive = False
- if self.thread is not None:
- self.thread.join()
-
- def is_alive(self):
- """Return if this channel is actively able to send/recieve messages."""
- return self._is_alive
-
- def open(self):
- """Set up a socket handling thread."""
-
- def on_socket_open(*args):
- self._is_alive = True
- self.on_channel_open(self.channel_id)
- logger.info(f"channel open {args}")
-
- def on_error(ws, error):
- if hasattr(error, "errno"):
- if error.errno == errno.ECONNREFUSED:
- # TODO(CLEAN) replace with channel exception
- raise Exception(
- f"Socket {self.socket_url} refused connection, cancelling"
- )
- else:
- logger.error(f"Socket logged error: {error}")
- if isinstance(error, websocket._exceptions.WebSocketException):
- return
-
- import traceback
-
- traceback.print_exc()
- try:
- # Close the socket to attempt to reconnect
- self.socket.close()
- self.socket.keep_running = False
- except Exception:
- # TODO(CLEAN) only catch socket closed connection
- # Already closed
- pass
-
- def on_disconnect(*args):
- """Disconnect event is a no-op for us, as the server reconnects
- automatically on a retry.
- """
- # TODO(OWN) we need to set a timeout for reconnecting to the server,
- # if it fails it's time to call on_catastrophic_disconnect
- pass
-
- def on_message(*args):
- """Incoming message handler defers to the internal handler"""
- try:
- packet_dict = json.loads(args[1])
- packet = Packet.from_dict(packet_dict)
- self.on_message(self.channel_id, packet)
- except Exception as e:
- # TODO(CLEAN) properly handle only failed from_dict calls
- logger.exception(repr(e), exc_info=True)
- raise
-
- def run_socket(*args):
- while not self._is_closed:
- try:
- socket = websocket.WebSocketApp(
- self.socket_url,
- on_message=on_message,
- on_error=on_error,
- on_close=on_disconnect,
- )
- self.socket = socket
- socket.on_open = on_socket_open
- socket.run_forever(ping_interval=8 * STATUS_CHECK_TIME)
- except Exception as e:
- logger.exception(
- f"Socket error {repr(e)}, attempting restart", exc_info=True
- )
- time.sleep(0.2)
-
- # Start listening thread
- self.thread = threading.Thread(
- target=run_socket, name=f"socket-thread-{self.socket_url}"
- )
- self.thread.start()
-
- def send(self, packet: "Packet") -> bool:
- """
- Send the packet given to the intended recipient.
- Return True on success and False on failure.
- """
- if self.socket is None:
- return False
- try:
- data = packet.to_sendable_dict()
- self.socket.send(json.dumps(data))
- except websocket.WebSocketConnectionClosedException:
- # The channel died mid-send, wait for it to come back up
- return False
- except BrokenPipeError:
- # The channel died mid-send, wait for it to come back up
- return False
- except Exception as e:
- logger.exception(
- f"Unexpected socket error occured: {repr(e)}", exc_info=True
- )
- return False
- return True
+from mephisto.abstractions.architects.channels.websocket_channel import *
+import warnings
+
+warnings.warn(
+ "Import of the base Channel from `mephisto.server.channels.websocket_channel` is going away soon. "
+ "Please replace all of your imports from mephisto.server.channels.websocket_channel "
+ "to mephisto.abstractions.architects.channels.websocket_channel ",
+ PendingDeprecationWarning,
+)
diff --git a/mephisto/tasks/README.md b/mephisto/tasks/README.md
deleted file mode 100644
index 5cce7d626..000000000
--- a/mephisto/tasks/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# Tasks
-This folder is the default folder where personal tasks are stored. It is particularly useful for one-off tasks, or things that derive and only mildly personalize the tasks in the gallery. More fully fleshed out (and lightly tested) tasks should can be promoted to the gallery by following the instructions in the CONTRIBUTING guide.
diff --git a/mephisto/tools/__init__.py b/mephisto/tools/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/mephisto/tools/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/mephisto/tools/data_browser.py b/mephisto/tools/data_browser.py
new file mode 100644
index 000000000..80d83a2fa
--- /dev/null
+++ b/mephisto/tools/data_browser.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from mephisto.abstractions.database import MephistoDB
+from mephisto.data_model.assignment import Unit
+from mephisto.data_model.task_run import TaskRun
+from mephisto.abstractions.blueprint import AgentState
+from mephisto.data_model.agent import Agent
+
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
+from mephisto.data_model.constants.assignment_state import AssignmentState
+
+from typing import List, Optional, Any, Dict
+
+
+class DataBrowser:
+ """
+ Class with convenience methods for getting completed data
+ back from runs to parse and manage with other scripts
+ """
+
+ def __init__(self, db=None):
+ if db is None:
+ db = LocalMephistoDB()
+ self.db = db
+
+ def _get_units_for_task_runs(self, task_runs: List[TaskRun]) -> List[Unit]:
+ units = []
+ for task_run in task_runs:
+ assignments = task_run.get_assignments()
+ for assignment in assignments:
+ found_units = assignment.get_units()
+ for unit in found_units:
+ if unit.get_status() in [
+ AssignmentState.COMPLETED,
+ AssignmentState.ACCEPTED,
+ AssignmentState.REJECTED,
+ AssignmentState.SOFT_REJECTED,
+ ]:
+ units.append(unit)
+ return units
+
+ def get_units_for_task_name(self, task_name: str) -> List[Unit]:
+ tasks = self.db.find_tasks(task_name=task_name)
+ assert len(tasks) >= 1, f"No task found under name {task_name}"
+ task_runs = self.db.find_task_runs(task_id=tasks[0].db_id)
+ return self._get_units_for_task_runs(task_runs)
+
+ def get_units_for_run_id(self, run_id: str) -> List[Unit]:
+ task_run = TaskRun(self.db, run_id)
+ return self._get_units_for_task_runs([task_run])
+
+ def get_data_from_unit(self, unit: Unit) -> Dict[str, Any]:
+ agent = unit.get_assigned_agent()
+ assert (
+ agent is not None
+ ), f"Trying to get completed data from unassigned unit {unit}"
+ return {
+ "worker_id": agent.worker_id,
+ "unit_id": unit.db_id,
+ "assignment_id": unit.assignment_id,
+ "status": agent.db_status,
+ "data": agent.state.get_parsed_data(),
+ "task_start": agent.state.get_task_start(),
+ "task_end": agent.state.get_task_end(),
+ }
diff --git a/mephisto/tools/scripts.py b/mephisto/tools/scripts.py
new file mode 100644
index 000000000..458c5a6f4
--- /dev/null
+++ b/mephisto/tools/scripts.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Utilities that are useful for Mephisto-related scripts.
+"""
+
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
+from mephisto.operations.utils import get_mock_requester, get_root_data_dir
+
+from omegaconf import DictConfig, OmegaConf
+
+import argparse
+from typing import Tuple, Dict, Any, TYPE_CHECKING
+import os
+
+if TYPE_CHECKING:
+ from mephisto.abstractions.database import MephistoDB
+
+
+def load_db_and_process_config(
+ cfg: DictConfig, print_config=False
+) -> Tuple["MephistoDB", DictConfig]:
+ """
+ Using a Hydra DictConfig built from a RunScriptConfig,
+ load the desired MephistoDB and
+ validate the config against the database contents, then
+ return the database and validated config.
+
+ Takes in an option to print out the configuration before returning
+ """
+ db = get_db_from_config(cfg)
+ valid_config = augment_config_from_db(cfg, db)
+ if print_config:
+ print(OmegaConf.to_yaml(valid_config))
+ return db, valid_config
+
+
+def get_db_from_config(cfg: DictConfig) -> "MephistoDB":
+ """
+ Get a MephistoDB from the given configuration. As of now
+ this defaults to a LocalMephistoDB
+ """
+ datapath = cfg.mephisto.get("datapath", None)
+
+ if datapath is None:
+ datapath = get_root_data_dir()
+
+ database_path = os.path.join(datapath, "database.db")
+ return LocalMephistoDB(database_path=database_path)
+
+
+def augment_config_from_db(script_cfg: DictConfig, db: "MephistoDB") -> DictConfig:
+ """
+ Check the database for validity of the incoming MephistoConfig, ensure
+ that the config has all the necessary fields set.
+ """
+ cfg = script_cfg.mephisto
+ requester_name = cfg.provider.get("requester_name", None)
+ provider_type = cfg.provider.get("_provider_type", None)
+ architect_type = cfg.architect.get("_architect_type", None)
+
+ if requester_name is None:
+ if provider_type is None:
+ print("No requester specified, defaulting to mock")
+ provider_type = "mock"
+ if provider_type == "mock":
+ req = get_mock_requester(db)
+ requester_name = req.requester_name
+ else:
+ reqs = db.find_requesters(provider_type=provider_type)
+ # TODO (#93) proper logging
+ if len(reqs) == 0:
+ print(
+ f"No requesters found for provider type {provider_type}, please "
+ f"register one. You can register with `mephisto register {provider_type}`, "
+ f"or `python mephisto/client/cli.py register {provider_type}` if you haven't "
+ "installed Mephisto using poetry."
+ )
+ exit(1)
+ elif len(reqs) == 1:
+ req = reqs[0]
+ requester_name = req.requester_name
+ print(
+ f"Found one `{provider_type}` requester to launch with: {requester_name}"
+ )
+ else:
+ req = reqs[-1]
+ requester_name = req.requester_name
+ print(
+ f"Found many `{provider_type}` requesters to launch with, "
+ f"choosing the most recent: {requester_name}"
+ )
+ else:
+ # Ensure provided requester exists
+ reqs = db.find_requesters(requester_name=requester_name)
+ if len(reqs) == 0:
+ print(
+ f"No requesters found under name {requester_name}, "
+ "have you registered with `mephisto register`?"
+ )
+ exit(1)
+ provider_type = reqs[0].provider_type
+
+ if provider_type in ["mturk"]:
+ input(
+ f"This task is going to launch live on {provider_type}, press enter to continue: "
+ )
+ if provider_type in ["mturk_sandbox", "mturk"] and architect_type != "heroku":
+ input(
+ f"This task is going to launch live on {provider_type}, but your "
+ f"provided architect is {architect_type}, are you sure you "
+ "want to do this? : "
+ )
+
+ cfg.provider.requester_name = requester_name
+ cfg.provider._provider_type = provider_type
+ return script_cfg
diff --git a/mephisto/utils/scripts.py b/mephisto/utils/scripts.py
index c85a5d59e..21b48a0c1 100644
--- a/mephisto/utils/scripts.py
+++ b/mephisto/utils/scripts.py
@@ -3,118 +3,13 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-"""
-Utilities that are useful for Mephisto-related scripts.
-"""
-from mephisto.core.local_database import LocalMephistoDB
-from mephisto.core.utils import get_mock_requester, get_root_data_dir
+from mephisto.tools.scripts import *
+import warnings
-from omegaconf import DictConfig, OmegaConf
-
-import argparse
-from typing import Tuple, Dict, Any, TYPE_CHECKING
-import os
-
-if TYPE_CHECKING:
- from mephisto.data_model.database import MephistoDB
-
-
-def load_db_and_process_config(
- cfg: DictConfig, print_config=False
-) -> Tuple["MephistoDB", DictConfig]:
- """
- Using a Hydra DictConfig built from a RunScriptConfig,
- load the desired MephistoDB and
- validate the config against the database contents, then
- return the database and validated config.
-
- Takes in an option to print out the configuration before returning
- """
- db = get_db_from_config(cfg)
- valid_config = augment_config_from_db(cfg, db)
- if print_config:
- print(OmegaConf.to_yaml(valid_config))
- return db, valid_config
-
-
-def get_db_from_config(cfg: DictConfig) -> "MephistoDB":
- """
- Get a MephistoDB from the given configuration. As of now
- this defaults to a LocalMephistoDB
- """
- datapath = cfg.mephisto.get("datapath", None)
-
- if datapath is None:
- datapath = get_root_data_dir()
-
- database_path = os.path.join(datapath, "database.db")
- return LocalMephistoDB(database_path=database_path)
-
-
-def augment_config_from_db(script_cfg: DictConfig, db: "MephistoDB") -> DictConfig:
- """
- Check the database for validity of the incoming MephistoConfig, ensure
- that the config has all the necessary fields set.
- """
- cfg = script_cfg.mephisto
- requester_name = cfg.provider.get("requester_name", None)
- provider_type = cfg.provider.get("_provider_type", None)
- architect_type = cfg.architect.get("_architect_type", None)
-
- if requester_name is None:
- if provider_type is None:
- print("No requester specified, defaulting to mock")
- provider_type = "mock"
- if provider_type == "mock":
- req = get_mock_requester(db)
- requester_name = req.requester_name
- else:
- reqs = db.find_requesters(provider_type=provider_type)
- # TODO (#93) proper logging
- if len(reqs) == 0:
- print(
- f"No requesters found for provider type {provider_type}, please "
- f"register one. You can register with `mephisto register {provider_type}`, "
- f"or `python mephisto/client/cli.py register {provider_type}` if you haven't "
- "installed Mephisto using poetry."
- )
- exit(1)
- elif len(reqs) == 1:
- req = reqs[0]
- requester_name = req.requester_name
- print(
- f"Found one `{provider_type}` requester to launch with: {requester_name}"
- )
- else:
- req = reqs[-1]
- requester_name = req.requester_name
- print(
- f"Found many `{provider_type}` requesters to launch with, "
- f"choosing the most recent: {requester_name}"
- )
- else:
- # Ensure provided requester exists
- reqs = db.find_requesters(requester_name=requester_name)
- if len(reqs) == 0:
- print(
- f"No requesters found under name {requester_name}, "
- "have you registered with `mephisto register`?"
- )
- exit(1)
- provider_type = reqs[0].provider_type
-
- if provider_type in ["mturk"]:
- input(
- f"This task is going to launch live on {provider_type}, press enter to continue: "
- )
- if provider_type in ["mturk_sandbox", "mturk"] and architect_type != "heroku":
- input(
- f"This task is going to launch live on {provider_type}, but your "
- f"provided architect is {architect_type}, are you sure you "
- "want to do this? : "
- )
-
- cfg.provider.requester_name = requester_name
- cfg.provider._provider_type = provider_type
- return script_cfg
+warnings.warn(
+ "Import of script tools from `mephisto.utils` is going away soon. "
+ "Please replace all of your imports from mephisto.utils.scripts "
+ "to mephisto.tools.scripts ",
+ PendingDeprecationWarning,
+)
diff --git a/test/abstractions/__init__.py b/test/abstractions/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/test/abstractions/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/test/abstractions/architects/__init__.py b/test/abstractions/architects/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/test/abstractions/architects/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/test/server/architects/test_heroku_architect.py b/test/abstractions/architects/test_heroku_architect.py
similarity index 88%
rename from test/server/architects/test_heroku_architect.py
rename to test/abstractions/architects/test_heroku_architect.py
index 280540d65..faa348e0f 100644
--- a/test/server/architects/test_heroku_architect.py
+++ b/test/abstractions/architects/test_heroku_architect.py
@@ -10,18 +10,18 @@
from typing import Type, ClassVar, Optional
from mephisto.data_model.test.architect_tester import ArchitectTests
-from mephisto.server.architects.heroku_architect import (
+from mephisto.abstractions.architects.heroku_architect import (
HerokuArchitect,
HerokuArchitectArgs,
)
-from mephisto.data_model.database import MephistoDB
-from mephisto.data_model.architect import Architect
-from mephisto.data_model.assignment_state import AssignmentState
+from mephisto.abstractions.database import MephistoDB
+from mephisto.abstractions.architect import Architect
+from mephisto.data_model.constants.assignment_state import AssignmentState
from omegaconf import OmegaConf
-from mephisto.core.hydra_config import MephistoConfig
-from mephisto.data_model.blueprint import SharedTaskState
+from mephisto.operations.hydra_config import MephistoConfig
+from mephisto.abstractions.blueprint import SharedTaskState
# TODO(#104) these tests should be marked as nightly's rather than on every run?
# Maybe with some kind of LONG TEST flag? Investigate
diff --git a/test/server/architects/test_local_architect.py b/test/abstractions/architects/test_local_architect.py
similarity index 81%
rename from test/server/architects/test_local_architect.py
rename to test/abstractions/architects/test_local_architect.py
index 42ef7ea90..dfaafefd4 100644
--- a/test/server/architects/test_local_architect.py
+++ b/test/abstractions/architects/test_local_architect.py
@@ -13,21 +13,21 @@
from typing import Type, ClassVar, Optional
from mephisto.data_model.test.architect_tester import ArchitectTests
-from mephisto.server.architects.local_architect import (
+from mephisto.abstractions.architects.local_architect import (
LocalArchitect,
LocalArchitectArgs,
)
-from mephisto.data_model.database import MephistoDB
-from mephisto.data_model.architect import Architect
-from mephisto.data_model.assignment_state import AssignmentState
-from mephisto.server.blueprints.mock.mock_blueprint import MockBlueprint
-from mephisto.server.blueprints.mock.mock_task_builder import MockTaskBuilder
-from mephisto.server.blueprints.mock.mock_task_runner import MockTaskRunner
+from mephisto.abstractions.database import MephistoDB
+from mephisto.abstractions.architect import Architect
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.abstractions.blueprints.mock.mock_blueprint import MockBlueprint
+from mephisto.abstractions.blueprints.mock.mock_task_builder import MockTaskBuilder
+from mephisto.abstractions.blueprints.mock.mock_task_runner import MockTaskRunner
from omegaconf import OmegaConf
-from mephisto.core.hydra_config import MephistoConfig
-from mephisto.data_model.blueprint import SharedTaskState
+from mephisto.operations.hydra_config import MephistoConfig
+from mephisto.abstractions.blueprint import SharedTaskState
class LocalArchitectTests(ArchitectTests):
diff --git a/test/server/architects/test_mock_architect.py b/test/abstractions/architects/test_mock_architect.py
similarity index 74%
rename from test/server/architects/test_mock_architect.py
rename to test/abstractions/architects/test_mock_architect.py
index 849c35e3f..a7353a3e3 100644
--- a/test/server/architects/test_mock_architect.py
+++ b/test/abstractions/architects/test_mock_architect.py
@@ -11,14 +11,17 @@
from typing import Type, ClassVar
from mephisto.data_model.test.architect_tester import ArchitectTests
-from mephisto.server.architects.mock_architect import MockArchitect, MOCK_DEPLOY_URL
-
-from mephisto.data_model.database import MephistoDB
-from mephisto.data_model.architect import Architect
-from mephisto.data_model.assignment_state import AssignmentState
-from mephisto.server.blueprints.mock.mock_blueprint import MockBlueprint
-from mephisto.server.blueprints.mock.mock_task_builder import MockTaskBuilder
-from mephisto.server.blueprints.mock.mock_task_runner import MockTaskRunner
+from mephisto.abstractions.architects.mock_architect import (
+ MockArchitect,
+ MOCK_DEPLOY_URL,
+)
+
+from mephisto.abstractions.database import MephistoDB
+from mephisto.abstractions.architect import Architect
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.abstractions.blueprints.mock.mock_blueprint import MockBlueprint
+from mephisto.abstractions.blueprints.mock.mock_task_builder import MockTaskBuilder
+from mephisto.abstractions.blueprints.mock.mock_task_runner import MockTaskRunner
class MockArchitectTests(ArchitectTests):
diff --git a/test/abstractions/blueprints/__init__.py b/test/abstractions/blueprints/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/test/abstractions/blueprints/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/test/server/blueprints/test_mock_blueprint.py b/test/abstractions/blueprints/test_mock_blueprint.py
similarity index 79%
rename from test/server/blueprints/test_mock_blueprint.py
rename to test/abstractions/blueprints/test_mock_blueprint.py
index 9c3d05383..8992de30b 100644
--- a/test/server/blueprints/test_mock_blueprint.py
+++ b/test/abstractions/blueprints/test_mock_blueprint.py
@@ -11,22 +11,29 @@
from typing import Type, ClassVar
from mephisto.data_model.test.blueprint_tester import BlueprintTests
-from mephisto.data_model.assignment_state import AssignmentState
-from mephisto.server.blueprints.mock.mock_blueprint import MockBlueprint
-from mephisto.server.blueprints.mock.mock_task_builder import MockTaskBuilder
-from mephisto.server.blueprints.mock.mock_task_runner import MockTaskRunner
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.abstractions.blueprints.mock.mock_blueprint import MockBlueprint
+from mephisto.abstractions.blueprints.mock.mock_task_builder import MockTaskBuilder
+from mephisto.abstractions.blueprints.mock.mock_task_runner import MockTaskRunner
-from mephisto.core.local_database import LocalMephistoDB
-from mephisto.data_model.blueprint import Blueprint, AgentState, TaskRunner, TaskBuilder
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
+from mephisto.abstractions.blueprint import (
+ Blueprint,
+ AgentState,
+ TaskRunner,
+ TaskBuilder,
+)
from mephisto.data_model.assignment import Assignment
-from mephisto.data_model.task import TaskRun
+from mephisto.data_model.task_run import TaskRun
from mephisto.data_model.test.utils import get_test_task_run
# TODO(#97) Update supervisor to be able to provide mock setups to test against a blueprint
-from mephisto.providers.mock.mock_agent import MockAgent
-from mephisto.providers.mock.mock_unit import MockUnit
-from mephisto.providers.mock.mock_worker import MockWorker
-from mephisto.providers.mock.provider_type import PROVIDER_TYPE as MOCK_PROVIDER_TYPE
+from mephisto.abstractions.providers.mock.mock_agent import MockAgent
+from mephisto.abstractions.providers.mock.mock_unit import MockUnit
+from mephisto.abstractions.providers.mock.mock_worker import MockWorker
+from mephisto.abstractions.providers.mock.provider_type import (
+ PROVIDER_TYPE as MOCK_PROVIDER_TYPE,
+)
class MockBlueprintTests(BlueprintTests):
diff --git a/test/abstractions/providers/__init__.py b/test/abstractions/providers/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/test/abstractions/providers/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/test/abstractions/providers/mturk/__init__.py b/test/abstractions/providers/mturk/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/test/abstractions/providers/mturk/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/test/providers/mturk/test_mturk.py b/test/abstractions/providers/mturk/test_mturk.py
similarity index 92%
rename from test/providers/mturk/test_mturk.py
rename to test/abstractions/providers/mturk/test_mturk.py
index ab64955bb..14b4c4bfe 100644
--- a/test/providers/mturk/test_mturk.py
+++ b/test/abstractions/providers/mturk/test_mturk.py
@@ -11,8 +11,8 @@
import time
import pytest
-from mephisto.core.local_database import LocalMephistoDB
-from mephisto.providers.mturk.mturk_worker import MTurkWorker
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
+from mephisto.abstractions.providers.mturk.mturk_worker import MTurkWorker
from mephisto.data_model.worker import Worker
diff --git a/test/abstractions/providers/mturk_sandbox/__init__.py b/test/abstractions/providers/mturk_sandbox/__init__.py
new file mode 100644
index 000000000..240697e32
--- /dev/null
+++ b/test/abstractions/providers/mturk_sandbox/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/test/providers/mturk_sandbox/test_mturk_provider.py b/test/abstractions/providers/mturk_sandbox/test_mturk_provider.py
similarity index 92%
rename from test/providers/mturk_sandbox/test_mturk_provider.py
rename to test/abstractions/providers/mturk_sandbox/test_mturk_provider.py
index c981f2e0e..ee3e74b0d 100644
--- a/test/providers/mturk_sandbox/test_mturk_provider.py
+++ b/test/abstractions/providers/mturk_sandbox/test_mturk_provider.py
@@ -14,14 +14,18 @@
from typing import Type
from mephisto.data_model.test.utils import get_test_requester
from mephisto.data_model.test.crowd_provider_tester import CrowdProviderTests
-from mephisto.data_model.crowd_provider import CrowdProvider
-from mephisto.providers.mturk_sandbox.sandbox_mturk_provider import SandboxMTurkProvider
-from mephisto.providers.mturk_sandbox.sandbox_mturk_requester import (
+from mephisto.abstractions.crowd_provider import CrowdProvider
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_provider import (
+ SandboxMTurkProvider,
+)
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_requester import (
SandboxMTurkRequester,
)
-from mephisto.providers.mturk_sandbox.sandbox_mturk_worker import SandboxMTurkWorker
+from mephisto.abstractions.providers.mturk_sandbox.sandbox_mturk_worker import (
+ SandboxMTurkWorker,
+)
-from mephisto.providers.mturk.mturk_utils import (
+from mephisto.abstractions.providers.mturk.mturk_utils import (
delete_qualification,
find_qualification,
)
diff --git a/test/core/test_database.py b/test/core/test_database.py
index 3d3102d91..d871e52ff 100644
--- a/test/core/test_database.py
+++ b/test/core/test_database.py
@@ -10,7 +10,7 @@
import tempfile
from mephisto.data_model.test.data_model_database_tester import BaseDatabaseTests
-from mephisto.core.local_database import LocalMephistoDB
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
class TestLocalMephistoDB(BaseDatabaseTests):
diff --git a/test/core/test_operator.py b/test/core/test_operator.py
index 730be2af6..c61263cb0 100644
--- a/test/core/test_operator.py
+++ b/test/core/test_operator.py
@@ -14,13 +14,16 @@
import threading
from mephisto.data_model.test.utils import get_test_requester
-from mephisto.data_model.assignment_state import AssignmentState
-from mephisto.core.local_database import LocalMephistoDB
-from mephisto.core.operator import Operator
-from mephisto.server.architects.mock_architect import MockArchitect, MockArchitectArgs
-from mephisto.core.hydra_config import MephistoConfig
-from mephisto.providers.mock.mock_provider import MockProviderArgs
-from mephisto.server.blueprints.mock.mock_blueprint import MockBlueprintArgs
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
+from mephisto.operations.operator import Operator
+from mephisto.abstractions.architects.mock_architect import (
+ MockArchitect,
+ MockArchitectArgs,
+)
+from mephisto.operations.hydra_config import MephistoConfig
+from mephisto.abstractions.providers.mock.mock_provider import MockProviderArgs
+from mephisto.abstractions.blueprints.mock.mock_blueprint import MockBlueprintArgs
from mephisto.data_model.task_config import TaskConfigArgs
from omegaconf import OmegaConf
diff --git a/test/core/test_supervisor.py b/test/core/test_supervisor.py
index ddad3cb90..17170547f 100644
--- a/test/core/test_supervisor.py
+++ b/test/core/test_supervisor.py
@@ -13,23 +13,26 @@
from typing import List
-from mephisto.server.blueprints.mock.mock_blueprint import MockBlueprint
-from mephisto.server.blueprints.mock.mock_task_runner import MockTaskRunner
-from mephisto.server.architects.mock_architect import MockArchitect
-from mephisto.providers.mock.mock_provider import MockProvider
-from mephisto.core.local_database import LocalMephistoDB
-from mephisto.core.task_launcher import TaskLauncher
+from mephisto.abstractions.blueprints.mock.mock_blueprint import MockBlueprint
+from mephisto.abstractions.blueprints.mock.mock_task_runner import MockTaskRunner
+from mephisto.abstractions.architects.mock_architect import MockArchitect
+from mephisto.abstractions.providers.mock.mock_provider import MockProvider
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
+from mephisto.operations.task_launcher import TaskLauncher
from mephisto.data_model.test.utils import get_test_task_run
from mephisto.data_model.assignment import InitializationData
-from mephisto.data_model.task import TaskRun
-from mephisto.core.supervisor import Supervisor, Job
-from mephisto.data_model.blueprint import SharedTaskState
-
-
-from mephisto.server.architects.mock_architect import MockArchitect, MockArchitectArgs
-from mephisto.core.hydra_config import MephistoConfig
-from mephisto.providers.mock.mock_provider import MockProviderArgs
-from mephisto.server.blueprints.mock.mock_blueprint import MockBlueprintArgs
+from mephisto.data_model.task_run import TaskRun
+from mephisto.operations.supervisor import Supervisor, Job
+from mephisto.abstractions.blueprint import SharedTaskState
+
+
+from mephisto.abstractions.architects.mock_architect import (
+ MockArchitect,
+ MockArchitectArgs,
+)
+from mephisto.operations.hydra_config import MephistoConfig
+from mephisto.abstractions.providers.mock.mock_provider import MockProviderArgs
+from mephisto.abstractions.blueprints.mock.mock_blueprint import MockBlueprintArgs
from mephisto.data_model.task_config import TaskConfigArgs
from omegaconf import OmegaConf
diff --git a/test/core/test_task_launcher.py b/test/core/test_task_launcher.py
index 78fc185eb..4c5c36358 100644
--- a/test/core/test_task_launcher.py
+++ b/test/core/test_task_launcher.py
@@ -12,15 +12,15 @@
import time
from mephisto.data_model.test.utils import get_test_task_run
-from mephisto.core.local_database import LocalMephistoDB
-from mephisto.core.task_launcher import TaskLauncher
+from mephisto.abstractions.databases.local_database import LocalMephistoDB
+from mephisto.operations.task_launcher import TaskLauncher
from mephisto.data_model.assignment import InitializationData
-from mephisto.data_model.assignment_state import AssignmentState
-from mephisto.data_model.task import TaskRun
+from mephisto.data_model.constants.assignment_state import AssignmentState
+from mephisto.data_model.task_run import TaskRun
-from mephisto.providers.mock.mock_provider import MockProvider
-from mephisto.server.blueprints.mock.mock_blueprint import MockBlueprint
-from mephisto.server.blueprints.mock.mock_task_runner import MockTaskRunner
+from mephisto.abstractions.providers.mock.mock_provider import MockProvider
+from mephisto.abstractions.blueprints.mock.mock_blueprint import MockBlueprint
+from mephisto.abstractions.blueprints.mock.mock_task_runner import MockTaskRunner
MAX_WAIT_TIME_UNIT_LAUNCH = 15
NUM_GENERATED_ASSIGNMENTS = 10