From 7434f33b1a567bf31ebd225ef431fee2cbfa0dfe Mon Sep 17 00:00:00 2001 From: Paul Abumov Date: Mon, 8 Jul 2024 17:16:46 -0400 Subject: [PATCH] Added example of GoldUnit usage, updated the docs --- .../how_to_use/worker_quality/using_golds.md | 46 ++++- examples/form_composer_demo/README.md | 1 + .../simple/gold_units/gold_units_data.json | 177 ++++++++++++++++++ .../gold_units/gold_units_validation.py | 95 ++++++++++ .../example_local_mock_with_gold_unit.yaml | 32 ++++ .../run_task_with_gold_unit.py | 109 +++++++++++ examples/remote_procedure/mnist/run_task.py | 7 +- .../webapp/src/components/core_components.jsx | 11 +- .../blueprints/mixins/use_gold_unit.py | 124 +++++++----- 9 files changed, 540 insertions(+), 62 deletions(-) create mode 100644 examples/form_composer_demo/data/simple/gold_units/gold_units_data.json create mode 100644 examples/form_composer_demo/data/simple/gold_units/gold_units_validation.py create mode 100644 examples/form_composer_demo/hydra_configs/conf/example_local_mock_with_gold_unit.yaml create mode 100644 examples/form_composer_demo/run_task_with_gold_unit.py diff --git a/docs/web/docs/guides/how_to_use/worker_quality/using_golds.md b/docs/web/docs/guides/how_to_use/worker_quality/using_golds.md index 0de103c11..b4877106f 100644 --- a/docs/web/docs/guides/how_to_use/worker_quality/using_golds.md +++ b/docs/web/docs/guides/how_to_use/worker_quality/using_golds.md @@ -11,7 +11,7 @@ import Link from '@docusaurus/Link'; # Check against standards with Gold Labels -Gold labeling is commonly used for ensuring worker quality over the full duration of a task. It's valuable as an automated measure to track the consistency your workers. For this Mephisto provides the `UseGoldUnit` blueprint mixin. +Gold labeling is commonly used for ensuring worker quality over the full duration of a task. It's valuable as an automated measure to track the consistency your workers. For this Mephisto provides the `UseGoldUnit` blueprint mixin. ## Basic configuration @@ -20,8 +20,10 @@ There are a few primary configuration parts for using gold units: - Hydra args - `blueprint.gold_qualification_base`: A string representing the base qualification that required qualifications keeping track of success will be built from. - `blueprint.use_golds`: Set to `True` to enable the feature. - - `min_golds`: An int for the minimum number of golds a worker needs to complete for the first time before receiving real units. - - `max_incorrect_golds`: An int for the number of golds a worker can get incorrect before being disqualified from this task. + - `blueprint.max_gold_units`: The maximum number of additional units you will pay out for evaluating on gold units. Note that you do pay for gold units, they are just like any other units. + - `blueprint.min_golds`: An int for the minimum number of golds a worker needs to complete for the first time before receiving real units. + - `blueprint.max_incorrect_golds`: An int for the number of golds a worker can get incorrect before being disqualified from this task. + - `task.allowed_concurrent`: Can only run this task type with one allowed concurrent unit at a time per worker, to ensure golds are completed in order. - `GoldUnitSharedState`: - `get_gold_for_worker`: A factory that generates input data for a gold unit for a worker. Explained in-depth below. @@ -36,10 +38,10 @@ def validate_gold_unit(unit: "Unit"): data = agent.state.get_data() return data['outputs']['val'] == gold_ans[data['inputs']['ans_key']] -shared_state = SharedTaskState( +shared_state = SharedStaticTaskState( ... - get_gold_for_worker=get_gold_factory(gold_data) - on_unit_submitted=UseGoldUnit.create_validation_function(cfg.mephisto, validate_gold_unit) + get_gold_for_worker=get_gold_factory(gold_data), + on_unit_submitted=UseGoldUnit.create_validation_function(cfg.mephisto, validate_gold_unit), ) shared_state.qualifications += UseGoldUnit.get_mixin_qualifications(cfg.mephisto, shared_state) ... @@ -51,12 +53,42 @@ The core functionality to provide to your `SharedTaskState` to enable gold units We provide a helper `get_gold_factory` method which takes in a list of _all_ possible gold data inputs, and returns a factory that randomly selects a gold not yet completed by the given worker. This should be sufficient for most cases, though you can write your own factory if you want to be even more specific about how you assign golds. +## Example project + +You can run an example project to try gold units for yourself. + +```shell +docker-compose -f docker/docker-compose.dev.yml up +docker exec -it mephisto_dc bash +cd /mephisto/examples/form_composer_demo +python ./run_task_with_gold_unit.py +``` + +The first unit that you will see will be the gold one. +To get past these example gold units, provide these predefined values: + +- `First name` - type "First" +- `Last name` - type "Last" +- `Email address for Mephisto` - type "gold_user@mephisto.ai" +- `Country` - select "United States of America" +- `Language` - select "English" and "Spanish" +- `Biography since age of 18` - type a string that is longer than 10 chars, contains a word "Gold" and does not contain a word "Bad" + +### Understanding the code + +For an in-depth look at code underlying this example, you can read these Python files in `examples/form_composer_demo` directory: + +- `run_task_with_gold_unit.py` - script to configure and launch this Task +- `hydra_configs/conf/example_local_mock_with_gold_unit.yaml` - YAML configuration for this Task +- `data/simple/gold_units/gold_units_data.json` - configuration for form that will be used specifically for gold units +- `data/simple/gold_units/gold_units_validation.py` - logic of validating worker's output in gold unit form + ## Advanced configuration There are additional arguments that you can use for more advanced configuration of gold units: There are a few primary configuration parts for using gold units: - `GoldUnitSharedState`: - - `worker_needs_gold`: A function that, given the counts of completed, correct, and incorrect golds for a given worker, as well as the minimum number of required golds, returns whether or not the worker should be shown a gold task. + - `worker_needs_gold`: A function that, given the counts of completed, correct, and incorrect golds for a given worker, as well as the minimum number of required golds, returns whether or not the worker should be shown a gold task. - `worker_qualifies`: A function that, given the counts of completed, correct, and incorrect golds for a given worker, as well as the maximum number of incorrect, returns whether or not the worker is eligible to work on the task. ### `worker_needs_gold` diff --git a/examples/form_composer_demo/README.md b/examples/form_composer_demo/README.md index 241ad0afe..02122efa1 100644 --- a/examples/form_composer_demo/README.md +++ b/examples/form_composer_demo/README.md @@ -11,6 +11,7 @@ These form-based questionnaires are example of FormComposer task generator. - Dynamic form: `cd /mephisto/examples/form_composer_demo && python ./run_task_dynamic.py` - Dynamic form with Prolific on EC2: `cd /mephisto/examples/form_composer_demo && python ./run_task_dynamic_ec2_prolific.py` - Dynamic form with Mturk on EC2: `cd /mephisto/examples/form_composer_demo && python ./run_task_dynamic_ec2_mturk_sandbox.py` + - Simple form with Gold Units: `cd /mephisto/examples/form_composer_demo && python ./run_task_with_gold_unit.py` --- diff --git a/examples/form_composer_demo/data/simple/gold_units/gold_units_data.json b/examples/form_composer_demo/data/simple/gold_units/gold_units_data.json new file mode 100644 index 000000000..099cc9473 --- /dev/null +++ b/examples/form_composer_demo/data/simple/gold_units/gold_units_data.json @@ -0,0 +1,177 @@ +[ + { + "expecting_answers": { + "name_first": "First", + "name_last": "Last", + "email": "gold_user@mephisto.ai", + "country": "USA", + "language": ["en", "es"], + "bio": "custom validation" + }, + "form": { + "title": "Form example (Gold)", + "instruction": "Please answer all questions to the best of your ability as part of our study.", + "sections": [ + { + "name": "section_about", + "title": "About you", + "instruction": "Please introduce yourself. We would like to know more about your background, personal information, etc.", + "fieldsets": [ + { + "title": "Personal information", + "instruction": "", + "rows": [ + { + "fields": [ + { + "help": "", + "id": "id_name_first", + "label": "First name", + "name": "name_first", + "placeholder": "Type first name", + "tooltip": "Your first name", + "type": "input", + "validators": { + "required": true, + "minLength": 2, + "maxLength": 20 + }, + "value": "" + }, + { + "help": "Optional", + "id": "id_name_last", + "label": "Last name", + "name": "name_last", + "placeholder": "Type last name", + "tooltip": "Your last name", + "type": "input", + "validators": { "required": true }, + "value": "" + } + ], + "help": "Please use your legal name" + }, + { + "fields": [ + { + "help": "We may contact you later for additional information", + "id": "id_email", + "label": "Email address for Mephisto", + "name": "email", + "placeholder": "user@mephisto.ai", + "tooltip": "Email address for Mephisto", + "type": "email", + "validators": { + "required": true, + "regexp": ["^[a-zA-Z0-9._-]+@mephisto\\.ai$", "ig"] + }, + "value": "" + } + ] + } + ] + }, + { + "title": "Cultural background", + "instruction": "Please tell us about your cultural affiliations and values that you use in your daily life.", + "rows": [ + { + "fields": [ + { + "help": "Select country of your residence", + "id": "id_country", + "label": "Country", + "multiple": false, + "name": "country", + "options": [ + { + "label": "---", + "value": "" + }, + { + "label": "United States of America", + "value": "USA" + }, + { + "label": "Canada", + "value": "CAN" + } + ], + "placeholder": "", + "tooltip": "Country", + "type": "select", + "validators": { "required": true }, + "value": "" + }, + { + "help": "Select language spoken in your local community", + "id": "id_language", + "label": "Language", + "multiple": true, + "name": "language", + "options": [ + { + "label": "English", + "value": "en" + }, + { + "label": "French", + "value": "fr" + }, + { + "label": "Spanish", + "value": "es" + }, + { + "label": "Chinese", + "value": "ch" + } + ], + "placeholder": "", + "tooltip": "Language", + "type": "select", + "validators": { + "required": true, + "minLength": 2, + "maxLength": 3 + }, + "value": "" + } + ] + } + ], + "help": "This information will help us compile study statistics" + }, + { + "title": "Additional information", + "instruction": "Optional details about you. You can fill out what you are most comfortable with.", + "rows": [ + { + "fields": [ + { + "help": "", + "id": "id_bio", + "label": "Biography since age of 18", + "name": "bio", + "placeholder": "", + "tooltip": "Your bio in a few paragraphs", + "type": "textarea", + "validators": { "required": false }, + "value": "" + } + ] + } + ], + "help": "Some additional details about your persona" + } + ] + } + ], + "submit_button": { + "text": "Submit", + "tooltip": "Submit form" + } + } + } +] diff --git a/examples/form_composer_demo/data/simple/gold_units/gold_units_validation.py b/examples/form_composer_demo/data/simple/gold_units/gold_units_validation.py new file mode 100644 index 000000000..54f2fd45d --- /dev/null +++ b/examples/form_composer_demo/data/simple/gold_units/gold_units_validation.py @@ -0,0 +1,95 @@ +from typing import Any +from typing import Callable +from typing import List +from typing import Optional + +from mephisto.data_model.unit import Unit + + +ValidationFuncType = Callable[[Any, Optional[Any]], bool] + + +def _simple_comparing(worker_value: Any, correct_value: Optional[Any]) -> bool: + if correct_value is None: + # Just skip if there's no value, we do not validate this field at all + return True + + return worker_value == correct_value + + +def _validate_name_first(worker_value: Any, correct_value: Optional[Any]) -> bool: + return _simple_comparing(worker_value, correct_value) + + +def _validate_name_last(worker_value: Any, correct_value: Optional[Any]) -> bool: + return _simple_comparing(worker_value, correct_value) + + +def _validate_email(worker_value: Any, correct_value: Optional[Any]) -> bool: + return _simple_comparing(worker_value, correct_value) + + +def _validate_country(worker_value: Any, correct_value: Optional[Any]) -> bool: + return _simple_comparing(worker_value, correct_value) + + +def _validate_language(worker_value: Any, correct_value: Optional[Any]) -> bool: + return _simple_comparing(worker_value, correct_value) + + +def _validate_bio(worker_value: Any, correct_value: Optional[Any]) -> bool: + # Custom more complicated logic + if len(worker_value) < 10: + return False + + if "Gold" not in worker_value: + return False + + if "Bad" in worker_value: + return False + + return True + + +FIELD_VALIDATOR_MAPPINGS = { + "name_first": _validate_name_first, + "name_last": _validate_name_last, + "email": _validate_email, + "country": _validate_country, + "language": _validate_language, + "bio": _validate_bio, +} + + +def validate_gold_unit(unit: "Unit") -> bool: + agent = unit.get_assigned_agent() + data = agent.state.get_data() + + worker_answeres = data["outputs"] + + expecting_answers: dict = data["inputs"]["expecting_answers"] + + validated_fields: List[bool] = [] + + for fieldname, correct_value in expecting_answers.items(): + # No correct value set for this field, they pass validation + if correct_value is None: + validated_fields.append(True) + continue + + # No validation function set for this field, they pass validation + validation_func: ValidationFuncType = FIELD_VALIDATOR_MAPPINGS.get(fieldname) + if not validation_func: + validated_fields.append(True) + continue + + # No worker answer for this field, they fail validation + worker_value = worker_answeres.get(fieldname) + if not worker_value: + validated_fields.append(False) + continue + + validation_result = validation_func(worker_value, correct_value) + validated_fields.append(validation_result) + + return all(validated_fields) diff --git a/examples/form_composer_demo/hydra_configs/conf/example_local_mock_with_gold_unit.yaml b/examples/form_composer_demo/hydra_configs/conf/example_local_mock_with_gold_unit.yaml new file mode 100644 index 000000000..ff3dfabb3 --- /dev/null +++ b/examples/form_composer_demo/hydra_configs/conf/example_local_mock_with_gold_unit.yaml @@ -0,0 +1,32 @@ +#@package _global_ + +# Copyright (c) Meta Platforms and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +defaults: + - /mephisto/blueprint: static_react_task + - /mephisto/architect: local + - /mephisto/provider: mock + +mephisto: + blueprint: + data_json: ${task_dir}/data/simple/task_data.json + task_source: ${task_dir}/webapp/build/bundle.js + task_source_review: ${task_dir}/webapp/build/bundle.review.js + link_task_source: false + extra_source_dir: ${task_dir}/webapp/src/static + units_per_assignment: 2 + gold_qualification_base: "gold_qualification" # Required for Gold Units + use_golds: true # Required for Gold Units + min_golds: 1 # Required for Gold Units + max_incorrect_golds: 1 # Required for Gold Units + max_gold_units: 1 # Required for Gold Units + task: + allowed_concurrent: 1 # Required for Gold Units + task_name: "Sample Questionnaire" + task_title: "Example how to easily create simple form-based Tasks" + task_description: "In this Task, we use FormComposer feature." + task_reward: 0 + task_tags: "test,simple,form,form-composer" + force_rebuild: true diff --git a/examples/form_composer_demo/run_task_with_gold_unit.py b/examples/form_composer_demo/run_task_with_gold_unit.py new file mode 100644 index 000000000..603466b53 --- /dev/null +++ b/examples/form_composer_demo/run_task_with_gold_unit.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +# Copyright (c) Meta Platforms and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +import os +from json import JSONDecodeError +from typing import Any +from typing import Dict +from typing import List + +from omegaconf import DictConfig + +from examples.form_composer_demo.data.simple.gold_units.gold_units_validation import ( + validate_gold_unit, +) +from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import ( + SharedStaticTaskState, +) +from mephisto.abstractions.blueprints.mixins.use_gold_unit import get_gold_factory +from mephisto.abstractions.blueprints.mixins.use_gold_unit import UseGoldUnit +from mephisto.operations.operator import Operator +from mephisto.tools.scripts import build_custom_bundle +from mephisto.tools.scripts import task_script + + +def _build_custom_bundles(cfg: DictConfig) -> None: + """Locally build bundles that are not available on npm repository""" + mephisto_packages_dir = os.path.join( + # Root project directory + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + "packages", + ) + + # Build `mephisto-task-multipart` React package + build_custom_bundle( + mephisto_packages_dir, + force_rebuild=cfg.mephisto.task.force_rebuild, + webapp_name="mephisto-task-multipart", + build_command="build", + ) + + # Build `react-form-composer` React package + build_custom_bundle( + mephisto_packages_dir, + force_rebuild=cfg.mephisto.task.force_rebuild, + webapp_name="react-form-composer", + build_command="build", + ) + + # Build Review UI for the application + build_custom_bundle( + cfg.task_dir, + force_rebuild=cfg.mephisto.task.force_rebuild, + webapp_name="webapp", + build_command="build:simple:review", + ) + + # Build Task UI for the application + build_custom_bundle( + cfg.task_dir, + force_rebuild=cfg.mephisto.task.force_rebuild, + post_install_script=cfg.mephisto.task.post_install_script, + build_command="dev:simple", + ) + + +def _get_gold_data() -> List[Dict[str, Any]]: + gold_data_path = os.path.join( + # Root project directory + os.path.dirname(os.path.abspath(__file__)), + "data", + "simple", + "gold_units", + "gold_units_data.json", + ) + + try: + with open(gold_data_path) as config_file: + gold_data = json.load(config_file) + except (JSONDecodeError, TypeError) as e: + print(f"[red]Could not read Gold Unit data from file: '{gold_data_path}': {e}.[/red]") + exit() + + return gold_data + + +@task_script(default_config_file="example_local_mock_with_gold_unit") +def main(operator: Operator, cfg: DictConfig) -> None: + # 1. Build packages + _build_custom_bundles(cfg) + + # 2. Prepare ShareState with Gold Units + gold_data = _get_gold_data() + shared_state = SharedStaticTaskState( + get_gold_for_worker=get_gold_factory(gold_data), + on_unit_submitted=UseGoldUnit.create_validation_function(cfg.mephisto, validate_gold_unit), + ) + shared_state.qualifications += UseGoldUnit.get_mixin_qualifications(cfg.mephisto, shared_state) + + # 3. Launch TaskRun + operator.launch_task_run(cfg.mephisto, shared_state=shared_state) + operator.wait_for_runs_then_shutdown(skip_input=True, log_rate=30) + + +if __name__ == "__main__": + main() diff --git a/examples/remote_procedure/mnist/run_task.py b/examples/remote_procedure/mnist/run_task.py index f8db452a0..8446aee38 100644 --- a/examples/remote_procedure/mnist/run_task.py +++ b/examples/remote_procedure/mnist/run_task.py @@ -43,7 +43,9 @@ def my_screening_unit_generator(): prop """ while True: - yield {"isScreeningUnit": True} + yield { + "isScreeningUnit": True, + } def validate_screening_unit(unit: Unit): @@ -54,6 +56,7 @@ def validate_screening_unit(unit: Unit): annotation = data["outputs"]["final_submission"]["annotations"][0] if annotation["isCorrect"] and annotation["currentAnnotation"] == 3: return True + return False @@ -90,7 +93,7 @@ def handle_with_model( ) if is_using_screening_units: - """You have to defined a few more properties to enable screening units""" + # You have to define a few more properties to enable screening units shared_state.on_unit_submitted = ScreenTaskRequired.create_validation_function( cfg.mephisto, validate_screening_unit, diff --git a/examples/remote_procedure/mnist/webapp/src/components/core_components.jsx b/examples/remote_procedure/mnist/webapp/src/components/core_components.jsx index bf5fdf78f..91ecb2915 100644 --- a/examples/remote_procedure/mnist/webapp/src/components/core_components.jsx +++ b/examples/remote_procedure/mnist/webapp/src/components/core_components.jsx @@ -154,15 +154,16 @@ function TaskFrontend({ ); } - // TODO Update this file such that, if finalResults contains data we render in review mode with that data - const NUM_ANNOTATIONS = initialTaskData.isScreeningUnit ? 1 : 3; + // TODO: Update this file such that, + // if finalResults contains data we render in review mode with that data + const numAnnotations = initialTaskData.isScreeningUnit ? 1 : 3; const [annotations, updateAnnotations] = React.useReducer( (currentAnnotation, { updateIdx, updatedAnnotation }) => { return currentAnnotation.map((val, idx) => - idx == updateIdx ? updatedAnnotation : val + idx === updateIdx ? updatedAnnotation : val ); }, - Array(NUM_ANNOTATIONS).fill({ + Array(numAnnotations).fill({ currentAnnotation: null, trueAnnotation: null, isCorrect: null, @@ -170,7 +171,7 @@ function TaskFrontend({ ); let canSubmit = annotations.filter((a) => a.isCorrect === true || a.trueAnnotation !== "") - .length == NUM_ANNOTATIONS; + .length === numAnnotations; return (
diff --git a/mephisto/abstractions/blueprints/mixins/use_gold_unit.py b/mephisto/abstractions/blueprints/mixins/use_gold_unit.py index c5710fbed..9af662d51 100644 --- a/mephisto/abstractions/blueprints/mixins/use_gold_unit.py +++ b/mephisto/abstractions/blueprints/mixins/use_gold_unit.py @@ -4,41 +4,34 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from typing import ( - List, - Optional, - Dict, - Any, - Union, - Iterable, - Callable, - Tuple, - Generator, - TYPE_CHECKING, -) - -import types -import random import math +import random import traceback -from mephisto.abstractions.blueprint import BlueprintMixin, AgentState -from dataclasses import dataclass, field -from omegaconf import MISSING, DictConfig +from dataclasses import dataclass +from dataclasses import field +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Optional +from typing import Tuple +from typing import TYPE_CHECKING + +from omegaconf import DictConfig +from omegaconf import MISSING + +from mephisto.abstractions.blueprint import AgentState +from mephisto.abstractions.blueprint import BlueprintMixin from mephisto.data_model.qualification import QUAL_NOT_EXIST -from mephisto.utils.qualifications import ( - make_qualification_dict, - find_or_create_qualification, -) from mephisto.operations.task_launcher import GOLD_UNIT_INDEX - +from mephisto.utils.qualifications import find_or_create_qualification +from mephisto.utils.qualifications import make_qualification_dict if TYPE_CHECKING: from mephisto.data_model.task_run import TaskRun from mephisto.data_model.unit import Unit - from mephisto.data_model.packet import Packet from mephisto.data_model.worker import Worker from mephisto.abstractions.blueprint import SharedTaskState - from argparse import _ArgumentGroup as ArgumentGroup from mephisto.utils.logger_core import get_logger @@ -49,30 +42,29 @@ class UseGoldUnitArgs: gold_qualification_base: str = field( default=MISSING, - metadata={"help": ("Basename for a qualification that tracks gold completion rates")}, + metadata={"help": "Basename for a qualification that tracks gold completion rates"}, ) max_gold_units: int = field( default=MISSING, metadata={ "help": ( - "The maximum number of gold units that can be launched " - "with this batch, specified to limit the number of golds " - "you may need to pay out for." + "The maximum number of gold units that can be launched with this batch, " + "specified to limit the number of golds you may need to pay out for." ) }, ) use_golds: bool = field( default=False, - metadata={"help": ("Whether or not to use gold tasks in this run.")}, + metadata={"help": "Whether or not to use gold tasks in this run."}, ) min_golds: int = field( default=1, - metadata={"help": ("Minimum golds a worker needs to complete before getting real units.")}, + metadata={"help": "Minimum golds a worker needs to complete before getting real units."}, ) max_incorrect_golds: int = field( default=0, metadata={ - "help": ("Maximum number of golds a worker can get incorrect before being disqualified") + "help": "Maximum number of golds a worker can get incorrect before being disqualified", }, ) @@ -88,12 +80,14 @@ def get_gold_factory(golds: List[Dict[str, Any]]) -> GoldFactory: """ worker_gold_maps: Dict[str, List[int]] = {} num_golds = len(golds) + assert num_golds != 0, "Must provide at least one gold to get_gold_factory" def get_gold_for_worker(worker: "Worker"): if worker.db_id not in worker_gold_maps or len(worker_gold_maps[worker.db_id]) == 0: # create a list of gold indices a worker hasn't done worker_gold_maps[worker.db_id] = [x for x in range(num_golds)] + # select a random gold index from what remains rg = worker_gold_maps[worker.db_id] selected_idx = random.randint(0, len(rg) - 1) @@ -105,27 +99,36 @@ def get_gold_for_worker(worker: "Worker"): def worker_needs_gold( - units_completed: int, num_correct: int, num_incorrect: int, min_golds: int + units_completed: int, + num_correct: int, + num_incorrect: int, + min_golds: int, ) -> bool: """ Return a bool of whether or not a worker needs to be shown a gold unit in the current slot. - Generally we show a lot of of golds to begin with, (up until min_golds), and then scale down. + Generally we show a lot of golds to begin with, (up until min_golds), and then scale down. """ # After launching, if the correct golds are less than the min, we need more golds if num_correct < min_golds: return True + excess_golds = num_correct - (min_golds + num_incorrect) # (Somewhat arbitrarily), we scale to ensure that workers complete golds for every # This gives ~5% gold at 100 and ~1% gold at 1000 target_gold = math.ceil(math.pow(math.log10(units_completed + 1), 2.2)) - 1 + if excess_golds < target_gold: return True + return False def worker_qualifies( - units_completed: int, num_correct: int, num_incorrect: int, max_incorrect_golds: int + units_completed: int, + num_correct: int, + num_incorrect: int, + max_incorrect_golds: int, ) -> bool: """ Return a bool of whether or not a worker is qualified to continue working on these tasks. @@ -169,6 +172,7 @@ def init_mixin_config( assert isinstance( shared_state, GoldUnitSharedState ), f"Must use GoldUnitSharedState with this mixin, found {shared_state}" + return self.init_gold_config(task_run, args, shared_state) def init_gold_config( @@ -208,19 +212,22 @@ def assert_mixin_args(cls, args: "DictConfig", shared_state: "SharedTaskState"): use_golds = args.blueprint.get("use_golds", False) if not use_golds: return + assert args.task.allowed_concurrent == 1, ( - "Can only run this task type with one allowed concurrent unit at a time per worker, to ensure " - "golds are completed in order." + "Can only run this task type with one allowed concurrent unit at a time per worker, " + "to ensure golds are completed in order." ) assert ( args.blueprint.get("use_screening_task") is not True ), "Gold units currently cannot be used with screening units" + max_gold_units = args.blueprint.max_gold_units assert max_gold_units is not None, ( "You must supply a blueprint.max_gold_units argument to set the maximum number of " "additional units you will pay out for evaluating on gold units. Note that you " "do pay for gold units, they are just like any other units." ) + gold_qualification_base = args.blueprint.gold_qualification_base assert ( gold_qualification_base is not None @@ -230,8 +237,8 @@ def assert_mixin_args(cls, args: "DictConfig", shared_state: "SharedTaskState"): "You must supply a get_gold_for_worker generator in your SharedTaskState to use " "gold units units." ) - # TODO(#97) it would be nice to test that `get_gold_for_worker` actually returns a task when - # given a worker + # TODO (#97): it would be nice to test that `get_gold_for_worker` + # actually returns a task when given a worker @staticmethod def get_current_qual_or_default(worker: "Worker", qual_name: str, default_val: Any = 0) -> Any: @@ -243,10 +250,12 @@ def get_completion_stats_for_worker(self, worker: "Worker") -> Tuple[int, int, i """Return the correct and incorrect gold counts, as well as the total count for a worker""" completed_units = UseGoldUnit.get_current_qual_or_default(worker, self.task_count_qual_name) correct_golds = UseGoldUnit.get_current_qual_or_default( - worker, self.golds_correct_qual_name + worker, + self.golds_correct_qual_name, ) incorrect_golds = UseGoldUnit.get_current_qual_or_default( - worker, self.golds_failed_qual_name + worker, + self.golds_failed_qual_name, ) return completed_units, correct_golds, incorrect_golds @@ -257,15 +266,21 @@ def should_produce_gold_for_worker(self, worker: "Worker") -> bool: correct_units, incorrect_units, ) = self.get_completion_stats_for_worker(worker) + if not self.worker_qualifies( completed_units, correct_units, incorrect_units, self.max_incorrect_golds ): return False + if correct_units >= self.min_golds: if self.gold_units_launched >= self.gold_unit_cap: return False # they qualify, but we don't have golds to launch + return self.worker_needs_gold( - completed_units, correct_units, incorrect_units, self.min_golds + completed_units, + correct_units, + incorrect_units, + self.min_golds, ) def update_qualified_status(self, worker: "Worker") -> bool: @@ -275,16 +290,22 @@ def update_qualified_status(self, worker: "Worker") -> bool: correct_units, incorrect_units, ) = self.get_completion_stats_for_worker(worker) + if not self.worker_qualifies( - completed_units, correct_units, incorrect_units, self.max_incorrect_golds + completed_units, + correct_units, + incorrect_units, + self.max_incorrect_golds, ): worker.grant_qualification(self.disqualified_qual_name) return True + return False def get_gold_unit_data_for_worker(self, worker: "Worker") -> Optional[Dict[str, Any]]: if self.gold_units_launched >= self.gold_unit_cap: return None + try: self.gold_units_launched += 1 return self.get_gold_for_worker(worker) @@ -312,7 +333,8 @@ def _wrapped_validate(unit): if agent is not None and agent.get_status() == AgentState.STATUS_COMPLETED: worker = agent.get_worker() completed_units = UseGoldUnit.get_current_qual_or_default( - worker, task_count_qual_name + worker, + task_count_qual_name, ) worker.grant_qualification( task_count_qual_name, @@ -330,17 +352,23 @@ def _wrapped_validate(unit): if validation_result is True: correct_units = UseGoldUnit.get_current_qual_or_default( - worker, golds_correct_qual_name + worker, + golds_correct_qual_name, ) worker.grant_qualification( - golds_correct_qual_name, correct_units + 1, skip_crowd=True + golds_correct_qual_name, + correct_units + 1, + skip_crowd=True, ) elif validation_result is False: incorrect_units = UseGoldUnit.get_current_qual_or_default( - worker, golds_failed_qual_name + worker, + golds_failed_qual_name, ) worker.grant_qualification( - golds_failed_qual_name, incorrect_units + 1, skip_crowd=True + golds_failed_qual_name, + incorrect_units + 1, + skip_crowd=True, ) return _wrapped_validate