From 1ba9777ff5e9faeb943be6eacafa59b8adf81d4d Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 7 Nov 2023 23:21:52 +0000 Subject: [PATCH 01/25] defining rewardresult dataclass and reward event --- prompting/validators/reward/relevance.py | 50 ++++++++++++++++++++---- prompting/validators/reward/reward.py | 10 ++++- 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/prompting/validators/reward/relevance.py b/prompting/validators/reward/relevance.py index 7ed0602..32e96b1 100644 --- a/prompting/validators/reward/relevance.py +++ b/prompting/validators/reward/relevance.py @@ -23,6 +23,7 @@ from transformers import AutoTokenizer, AutoModel from torchmetrics.functional import pairwise_cosine_similarity import torch.nn.functional as F +from dataclasses import dataclass, asdict def mean_pooling(model_output, attention_mask): @@ -48,6 +49,12 @@ def mean_pooling(model_output, attention_mask): class RelevanceRewardModel(BaseRewardModel): + @dataclass + class RewardResult(): + reward: int = 1 + bert_relevancy_score: float = None + mpnet_relevancy_score: float = None + @property def name(self) -> str: return RewardModelType.relevance.value @@ -64,29 +71,54 @@ def __init__(self, device: str): def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( - [self.reward(prompt, completion, name) for completion in completions], - dtype=torch.float32, - ).to(self.device) + + # All reward result for each completions. + reward_results = [asdict(self.reward(prompt, completion, name)).values() for completion in completions] + + # Transpose the reward results. + rewards, bert_relevancy_scores, mpnet_relevancy_scores = list(zip(*reward_results)) + + rewards = torch.tensor(rewards, dtype=torch.float32).to(self.device) + + reward_event = { + 'bert_relevancy_score': bert_relevancy_scores, + 'mpnet_relevancy_scores': mpnet_relevancy_scores + } + + return rewards, reward_event def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards def reward(self, prompt: str, completion: str, name: str) -> float: + + result = RelevanceRewardModel.RewardResult() + for i, model in enumerate(self.models): # rewards diff = model.reward(prompt, completion) # If a model returns 0, stop iterating and return 0 if diff < self.bounds[i]: - return 0.0 + result.reward = 0 + + if model.name == 'relevance_bert': + result.bert_relevancy_score = diff + + elif model.name == 'relevance_mpnet': + result.mpnet_relevancy_score = diff + # If none of the models returned 0, return 1 - return 1.0 + return result class BertRelevanceRewardModel(BaseRewardModel): relevance_model_path = "bert-base-uncased" + @property + def name(self) -> str: + return RewardModelType.relevance_bert.value + def __init__(self, device: str): super().__init__() self.device = device @@ -142,6 +174,10 @@ def reward(self, prompt: str, completion: str) -> float: class MpnetRelevenceModel(BaseRewardModel): diversity_model_path = "sentence-transformers/all-mpnet-base-v2" + @property + def name(self) -> str: + return RewardModelType.relevance_mpnet.value + def __init__(self, device: str): super().__init__() self.device = device @@ -190,4 +226,4 @@ def reward(self, prompt: str, completion: str) -> torch.FloatTensor: # Calculate the pairwise cosine similarity. similarity = pairwise_cosine_similarity(prompt_embed, embeddings) - return torch.abs(similarity) + return torch.abs(similarity).item() \ No newline at end of file diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index c20220d..a304e9b 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -117,7 +117,7 @@ def apply( ] # Reward each completion. - successful_rewards = self.get_rewards(prompt, successful_completions, name) + successful_rewards, reward_event = self.get_rewards(prompt, successful_completions, name) # Softmax rewards across samples. successful_rewards_normalized = self.normalize_rewards(successful_rewards) @@ -135,5 +135,11 @@ def apply( filled_rewards[idx] = reward filled_rewards_normalized[idx] = reward_normalized + if not reward_event: + reward_event = {} + + reward_event[reward_fn_i.name] = filled_rewards.tolist() + reward_event[reward_fn_i.name + "_normalized"] = filled_rewards_normalized.tolist() + # Return the filled rewards. - return filled_rewards, filled_rewards_normalized + return filled_rewards_normalized, reward_event From 22a201f1da9f91c9859adb2e91767e5364bc44de Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 7 Nov 2023 23:22:38 +0000 Subject: [PATCH 02/25] moved event addition into reward model apply function --- prompting/validators/forward.py | 10 ++++------ prompting/validators/reward/config.py | 2 ++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/prompting/validators/forward.py b/prompting/validators/forward.py index efcfa6a..073c0e8 100644 --- a/prompting/validators/forward.py +++ b/prompting/validators/forward.py @@ -122,19 +122,17 @@ async def run_step( self.device ) for weight_i, reward_fn_i in zip(self.reward_weights, self.reward_functions): - reward_i, reward_i_normalized = reward_fn_i.apply(prompt, responses, name) + reward_i_normalized, reward_event = reward_fn_i.apply(prompt, responses, name) rewards += weight_i * reward_i_normalized.to(self.device) if not self.config.neuron.disable_log_rewards: - event[reward_fn_i.name] = reward_i.tolist() - event[reward_fn_i.name + "_normalized"] = reward_i_normalized.tolist() + event = {**event, **reward_event} bt.logging.trace(str(reward_fn_i.name), reward_i_normalized.tolist()) for masking_fn_i in self.masking_functions: - mask_i, mask_i_normalized = masking_fn_i.apply(base_prompt, responses, name) + mask_i_normalized, reward_event = masking_fn_i.apply(base_prompt, responses, name) rewards *= mask_i_normalized.to(self.device) # includes diversity if not self.config.neuron.disable_log_rewards: - event[masking_fn_i.name] = mask_i.tolist() - event[masking_fn_i.name + "_normalized"] = mask_i_normalized.tolist() + event = {**event, **reward_event} bt.logging.trace(str(masking_fn_i.name), mask_i_normalized.tolist()) # Train the gating model based on the predicted scores and the actual rewards. diff --git a/prompting/validators/reward/config.py b/prompting/validators/reward/config.py index ea5df05..bffae05 100644 --- a/prompting/validators/reward/config.py +++ b/prompting/validators/reward/config.py @@ -29,6 +29,8 @@ class RewardModelType(Enum): blacklist = "blacklist_filter" nsfw = "nsfw_filter" relevance = "relevance_filter" + relevance_bert = "relevance_bert" + relevance_mpnet = "relevance_mpnet" task_validator = "task_validator_filter" From 270fb932f663524ecb5e5a20ad73e32aaefbca55 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 7 Nov 2023 23:55:06 +0000 Subject: [PATCH 03/25] clean up relevence --- prompting/validators/reward/relevance.py | 32 +++++++++++++++--------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/prompting/validators/reward/relevance.py b/prompting/validators/reward/relevance.py index 32e96b1..b9a5d52 100644 --- a/prompting/validators/reward/relevance.py +++ b/prompting/validators/reward/relevance.py @@ -23,7 +23,7 @@ from transformers import AutoTokenizer, AutoModel from torchmetrics.functional import pairwise_cosine_similarity import torch.nn.functional as F -from dataclasses import dataclass, asdict +from dataclasses import dataclass, asdict, fields def mean_pooling(model_output, attention_mask): @@ -68,24 +68,32 @@ def __init__(self, device: str): ] self.bounds = [-0.0246, 0.3] + def parse_reward_results(self, reward_results): + field_names = [field.name for field in fields(RelevanceRewardModel.RewardResult)] + + reward_results = [asdict(reward_result).values() for reward_result in reward_results] + + reward_event = dict(zip(field_names, list(zip(*reward_results)))) + + reward = reward_event['reward'] + + del reward_event['reward'] + + return reward, reward_event + def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: + # Get all the reward results. + reward_results = [self.reward(prompt, completion, name) for completion in completions] - # All reward result for each completions. - reward_results = [asdict(self.reward(prompt, completion, name)).values() for completion in completions] + # Parse the result and generate an event to be logged. + reward, reward_event = self.parse_reward_results(reward_results) - # Transpose the reward results. - rewards, bert_relevancy_scores, mpnet_relevancy_scores = list(zip(*reward_results)) - - rewards = torch.tensor(rewards, dtype=torch.float32).to(self.device) + reward = torch.tensor(reward, dtype=torch.float32) - reward_event = { - 'bert_relevancy_score': bert_relevancy_scores, - 'mpnet_relevancy_scores': mpnet_relevancy_scores - } + return reward, reward_event - return rewards, reward_event def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards From e0b0f9d69847ac76fe1d2ee395dc35dee892df29 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 7 Nov 2023 23:55:14 +0000 Subject: [PATCH 04/25] apply to blaclist --- prompting/validators/reward/blacklist.py | 46 ++++++++++++++++++++---- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/prompting/validators/reward/blacklist.py b/prompting/validators/reward/blacklist.py index a6baff2..7789364 100644 --- a/prompting/validators/reward/blacklist.py +++ b/prompting/validators/reward/blacklist.py @@ -24,10 +24,17 @@ from .config import RewardModelType from .reward import BaseRewardModel from transformers import BertTokenizer +from dataclasses import dataclass, asdict, fields # TODO: Use CLI arguments to set blacklist values: the most important being the boundary value and max_size class Blacklist(BaseRewardModel): + @dataclass + class RewardResult(): + reward: int = 1 + matched_ngram: str = None + significance_score: float = None + @property def name(self) -> str: return RewardModelType.blacklist.value @@ -265,8 +272,11 @@ def reward(self, prompt: str, completion: str, name: str) -> float: float: Reward value {0,1} """ + result = Blacklist.RewardResult() + if completion in prompt: - return 0.0 + result.reward = 0.0 + return result # Get significance scores scores = self.get_significance() @@ -276,17 +286,39 @@ def reward(self, prompt: str, completion: str, name: str) -> float: if (score > self.boundary and fuzz.partial_ratio(ngram, completion.lower()) > self.partial_ratio_boundary ): - return 0 + result.reward = 0 + result.matched_ngram = ngram + result.significance_score = score + return result + + result.reward = 1 + return result - return 1 + def parse_reward_results(reward_results): + field_names = [field.name for field in fields(Blacklist.RewardResult)] + + reward_results = [asdict(reward_result).values() for reward_result in reward_results] + + reward_event = dict(zip(field_names, list(zip(*reward_results)))) + + reward = reward_event['reward'] + + del reward_event['reward'] + + return reward, reward_event def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( - [self.reward(prompt, completion, name) for completion in completions], - dtype=torch.float32, - ) + # Get all the reward results. + reward_results = [self.reward(prompt, completion, name) for completion in completions] + + # Parse the result and generate an event to be logged. + reward, reward_event = parse_reward_results(reward_results) + + reward = torch.tensor(reward, dtype=torch.float32) + + return reward, reward_event def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards \ No newline at end of file From 0aeddc25600e40c44a12d63398fe4daca300efd7 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Tue, 7 Nov 2023 23:59:13 +0000 Subject: [PATCH 05/25] fixes --- prompting/validators/reward/blacklist.py | 6 +++--- prompting/validators/reward/relevance.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/prompting/validators/reward/blacklist.py b/prompting/validators/reward/blacklist.py index 7789364..ecb2416 100644 --- a/prompting/validators/reward/blacklist.py +++ b/prompting/validators/reward/blacklist.py @@ -294,8 +294,8 @@ def reward(self, prompt: str, completion: str, name: str) -> float: result.reward = 1 return result - def parse_reward_results(reward_results): - field_names = [field.name for field in fields(Blacklist.RewardResult)] + def parse_reward_results(self, reward_results): + field_names = [field.name for field in fields(self.RewardResult)] reward_results = [asdict(reward_result).values() for reward_result in reward_results] @@ -314,7 +314,7 @@ def get_rewards( reward_results = [self.reward(prompt, completion, name) for completion in completions] # Parse the result and generate an event to be logged. - reward, reward_event = parse_reward_results(reward_results) + reward, reward_event = self.parse_reward_results(reward_results) reward = torch.tensor(reward, dtype=torch.float32) diff --git a/prompting/validators/reward/relevance.py b/prompting/validators/reward/relevance.py index b9a5d52..be6d788 100644 --- a/prompting/validators/reward/relevance.py +++ b/prompting/validators/reward/relevance.py @@ -69,7 +69,7 @@ def __init__(self, device: str): self.bounds = [-0.0246, 0.3] def parse_reward_results(self, reward_results): - field_names = [field.name for field in fields(RelevanceRewardModel.RewardResult)] + field_names = [field.name for field in fields(self.RewardResult)] reward_results = [asdict(reward_result).values() for reward_result in reward_results] From 73303149aa693cf92d41a9b2077966f25beb04c6 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 8 Nov 2023 00:10:41 +0000 Subject: [PATCH 06/25] changed get_reward returns for all --- prompting/validators/reward/blacklist.py | 4 ++-- prompting/validators/reward/dahoas.py | 6 +++--- prompting/validators/reward/diversity.py | 8 ++++---- prompting/validators/reward/dpo.py | 6 +++--- prompting/validators/reward/nsfw.py | 6 +++--- prompting/validators/reward/open_assistant.py | 6 +++--- prompting/validators/reward/prompt.py | 6 +++--- prompting/validators/reward/reciprocate.py | 6 +++--- prompting/validators/reward/relevance.py | 4 ++-- prompting/validators/reward/reward.py | 4 ++-- prompting/validators/reward/task_validator.py | 6 +++--- 11 files changed, 31 insertions(+), 31 deletions(-) diff --git a/prompting/validators/reward/blacklist.py b/prompting/validators/reward/blacklist.py index ecb2416..e31ec11 100644 --- a/prompting/validators/reward/blacklist.py +++ b/prompting/validators/reward/blacklist.py @@ -20,7 +20,7 @@ import torch import math from fuzzywuzzy import fuzz -from typing import List +from typing import List, Union from .config import RewardModelType from .reward import BaseRewardModel from transformers import BertTokenizer @@ -309,7 +309,7 @@ def parse_reward_results(self, reward_results): def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: # Get all the reward results. reward_results = [self.reward(prompt, completion, name) for completion in completions] diff --git a/prompting/validators/reward/dahoas.py b/prompting/validators/reward/dahoas.py index b1183cc..2784b4f 100644 --- a/prompting/validators/reward/dahoas.py +++ b/prompting/validators/reward/dahoas.py @@ -18,7 +18,7 @@ import os import torch -from typing import List +from typing import List, Union from .config import RewardModelType from .reward import BaseRewardModel from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig @@ -102,11 +102,11 @@ def reward_fn(samples): def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: return torch.tensor( [self.reward(prompt, completion, name) for completion in completions], dtype=torch.float32, - ).to(self.device) + ).to(self.device), None def forward( self, diff --git a/prompting/validators/reward/diversity.py b/prompting/validators/reward/diversity.py index d0a5b77..8ea17b5 100644 --- a/prompting/validators/reward/diversity.py +++ b/prompting/validators/reward/diversity.py @@ -155,10 +155,10 @@ def regularise(rewards): def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: # Check if completions are empty, return 0 if so if len(completions) == 0: - return torch.tensor([]).to(self.device) + return torch.tensor([]).to(self.device), None # Get embeddings for all completions. embeddings = self.get_embeddings(completions) @@ -173,9 +173,9 @@ def get_rewards( # Return all if historic_rewards != None: - return batch_rewards * historic_rewards + return batch_rewards * historic_rewards, None else: - return batch_rewards + return batch_rewards, None def normalize_rewards(self, raw_rewards: torch.FloatTensor) -> torch.FloatTensor: # Applies binarization on the rewards. diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index a987f69..b0e7295 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -18,7 +18,7 @@ import torch import bittensor as bt -from typing import List +from typing import List, Union from .config import RewardModelType from .reward import BaseRewardModel from transformers import ( @@ -128,7 +128,7 @@ def reward_single( def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: rewards = torch.tensor( [ self.reward_single(prompt, completion, name) @@ -137,4 +137,4 @@ def get_rewards( dtype=torch.float32, ).to(self.device) bt.logging.trace(f"DirectPreferenceRewardModel | rewards: {rewards.tolist()}") - return rewards + return rewards, None diff --git a/prompting/validators/reward/nsfw.py b/prompting/validators/reward/nsfw.py index bb649d2..4098dea 100644 --- a/prompting/validators/reward/nsfw.py +++ b/prompting/validators/reward/nsfw.py @@ -17,7 +17,7 @@ # DEALINGS IN THE SOFTWARE. import torch -from typing import List +from typing import List, Union from .config import RewardModelType from .reward import BaseRewardModel from transformers import AutoModelForSequenceClassification, AutoTokenizer @@ -67,11 +67,11 @@ def sum_nsfw_scores(input_ids, chunk_size): def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: return torch.tensor( [self.reward(prompt, completion, name) for completion in completions], dtype=torch.float32, - ).to(self.device) + ).to(self.device), None def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards diff --git a/prompting/validators/reward/open_assistant.py b/prompting/validators/reward/open_assistant.py index 77dfa36..2b57915 100644 --- a/prompting/validators/reward/open_assistant.py +++ b/prompting/validators/reward/open_assistant.py @@ -17,7 +17,7 @@ # DEALINGS IN THE SOFTWARE. import torch -from typing import List +from typing import List, Union from .config import RewardModelType from .reward import BaseRewardModel from transformers import AutoTokenizer, AutoModelForSequenceClassification @@ -49,11 +49,11 @@ def reward_single(self, prompt: str, completion: str, name: str) -> float: def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: return torch.tensor( [ self.reward_single(prompt, completion, name) for completion in completions ], dtype=torch.float32, - ).to(self.device) + ).to(self.device), None diff --git a/prompting/validators/reward/prompt.py b/prompting/validators/reward/prompt.py index b72e366..e44bfda 100644 --- a/prompting/validators/reward/prompt.py +++ b/prompting/validators/reward/prompt.py @@ -19,7 +19,7 @@ import time import torch import bittensor as bt -from typing import List +from typing import List, Union from .config import RewardModelType from .reward import BaseRewardModel from prompting.validators.prompts import AugmentPrompt, FollowupPrompt, AnswerPrompt @@ -100,7 +100,7 @@ def reward(self, prompt: str, completion: str, name: str) -> float: def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: bt.logging.debug( f"PromptRewardModel | Calculating {len(completions)} rewards (typically < 1 sec/reward)." ) @@ -110,4 +110,4 @@ def get_rewards( return torch.tensor( [self.reward(prompt, completion, name) for completion in completions], dtype=torch.float32, - ).to(self.device) + ).to(self.device), None diff --git a/prompting/validators/reward/reciprocate.py b/prompting/validators/reward/reciprocate.py index ff2e572..81288d9 100644 --- a/prompting/validators/reward/reciprocate.py +++ b/prompting/validators/reward/reciprocate.py @@ -17,7 +17,7 @@ # DEALINGS IN THE SOFTWARE. import torch -from typing import List +from typing import List, Union from .config import RewardModelType from .reward import BaseRewardModel from transformers import AutoTokenizer, AutoModelForSequenceClassification @@ -58,8 +58,8 @@ def reward(self, prompt: str, completion: str, name: str) -> float: def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: return torch.tensor( [self.reward(prompt, completion, name) for completion in completions], dtype=torch.float32, - ).to(self.device) + ).to(self.device), None diff --git a/prompting/validators/reward/relevance.py b/prompting/validators/reward/relevance.py index be6d788..b8e7b64 100644 --- a/prompting/validators/reward/relevance.py +++ b/prompting/validators/reward/relevance.py @@ -17,7 +17,7 @@ # DEALINGS IN THE SOFTWARE. import torch -from typing import List +from typing import List, Union from .config import RewardModelType from .reward import BaseRewardModel from transformers import AutoTokenizer, AutoModel @@ -83,7 +83,7 @@ def parse_reward_results(self, reward_results): def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: # Get all the reward results. reward_results = [self.reward(prompt, completion, name) for completion in completions] diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index a304e9b..2c6b13a 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -18,7 +18,7 @@ import torch import bittensor as bt -from typing import List +from typing import List, Union from abc import abstractmethod @@ -37,7 +37,7 @@ def __repr__(self) -> str: @abstractmethod def get_rewards( self, prompt: str, completion: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: ... def __init__(self) -> None: diff --git a/prompting/validators/reward/task_validator.py b/prompting/validators/reward/task_validator.py index 0e3f494..6d3448a 100644 --- a/prompting/validators/reward/task_validator.py +++ b/prompting/validators/reward/task_validator.py @@ -16,7 +16,7 @@ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. import torch -from typing import List +from typing import List, Union from .config import RewardModelType from .reward import BaseRewardModel @@ -68,11 +68,11 @@ def reward(self, prompt: str, completion: str, name: str) -> float: def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: return torch.tensor( [self.reward(prompt, completion, name) for completion in completions], dtype=torch.float32, - ) + ), None def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards From e1023a56ff2d1bf04b5002ff44fab5fb3f0f2b53 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 8 Nov 2023 19:28:00 +0000 Subject: [PATCH 07/25] added BaseRewardEvent --- prompting/validators/reward/blacklist.py | 59 ++++++++----------- prompting/validators/reward/dahoas.py | 33 +++++++---- prompting/validators/reward/diversity.py | 9 +-- prompting/validators/reward/dpo.py | 38 +++++++----- prompting/validators/reward/nsfw.py | 32 +++++++--- prompting/validators/reward/open_assistant.py | 26 ++++---- prompting/validators/reward/prompt.py | 28 ++++++--- prompting/validators/reward/reciprocate.py | 23 +++++--- prompting/validators/reward/relevance.py | 50 ++++++---------- prompting/validators/reward/reward.py | 25 +++++--- prompting/validators/reward/task_validator.py | 34 +++++++---- 11 files changed, 205 insertions(+), 152 deletions(-) diff --git a/prompting/validators/reward/blacklist.py b/prompting/validators/reward/blacklist.py index e31ec11..fadee0a 100644 --- a/prompting/validators/reward/blacklist.py +++ b/prompting/validators/reward/blacklist.py @@ -22,19 +22,18 @@ from fuzzywuzzy import fuzz from typing import List, Union from .config import RewardModelType -from .reward import BaseRewardModel +from .reward import BaseRewardModel, BaseRewardEvent from transformers import BertTokenizer -from dataclasses import dataclass, asdict, fields +from dataclasses import dataclass # TODO: Use CLI arguments to set blacklist values: the most important being the boundary value and max_size -class Blacklist(BaseRewardModel): - @dataclass - class RewardResult(): - reward: int = 1 - matched_ngram: str = None - significance_score: float = None +@dataclass +class BlacklistRewardEvent(BaseRewardEvent): + matched_ngram: str = None + significance_score: float = None +class Blacklist(BaseRewardModel): @property def name(self) -> str: return RewardModelType.blacklist.value @@ -260,7 +259,7 @@ def set_counter_to_half(self): self.counter = { tokens: [ math.ceil(count[0]/2), math.ceil(count[1]/2)] for tokens, count in self.counter.items()} self._last_update = 0 - def reward(self, prompt: str, completion: str, name: str) -> float: + def reward(self, prompt: str, completion: str, name: str) -> BlacklistRewardEvent: """Reward function for blacklist reward model. Returns 1 if completion contains an n-gram with significance above the boundary, 0 otherwise. Args: @@ -272,11 +271,11 @@ def reward(self, prompt: str, completion: str, name: str) -> float: float: Reward value {0,1} """ - result = Blacklist.RewardResult() + reward_event = BlacklistRewardEvent() if completion in prompt: - result.reward = 0.0 - return result + reward_event.reward = 0.0 + return reward_event # Get significance scores scores = self.get_significance() @@ -286,39 +285,27 @@ def reward(self, prompt: str, completion: str, name: str) -> float: if (score > self.boundary and fuzz.partial_ratio(ngram, completion.lower()) > self.partial_ratio_boundary ): - result.reward = 0 - result.matched_ngram = ngram - result.significance_score = score - return result - - result.reward = 1 - return result - - def parse_reward_results(self, reward_results): - field_names = [field.name for field in fields(self.RewardResult)] - - reward_results = [asdict(reward_result).values() for reward_result in reward_results] - - reward_event = dict(zip(field_names, list(zip(*reward_results)))) + reward_event.reward = 0 + reward_event.matched_ngram = ngram + reward_event.significance_score = score + return reward_event - reward = reward_event['reward'] - - del reward_event['reward'] - - return reward, reward_event + reward_event.reward = 1 + return reward_event def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> Union[torch.FloatTensor, dict]: + ) -> dict: # Get all the reward results. - reward_results = [self.reward(prompt, completion, name) for completion in completions] + reward_events = [self.reward(prompt, completion, name) for completion in completions] # Parse the result and generate an event to be logged. - reward, reward_event = self.parse_reward_results(reward_results) + parsed_reward_events = BlacklistRewardEvent.parse_reward_events(reward_events) - reward = torch.tensor(reward, dtype=torch.float32) + # Change the reward into tensor object + parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32) - return reward, reward_event + return parsed_reward_events def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards \ No newline at end of file diff --git a/prompting/validators/reward/dahoas.py b/prompting/validators/reward/dahoas.py index 2784b4f..5948870 100644 --- a/prompting/validators/reward/dahoas.py +++ b/prompting/validators/reward/dahoas.py @@ -20,7 +20,7 @@ import torch from typing import List, Union from .config import RewardModelType -from .reward import BaseRewardModel +from .reward import BaseRewardModel, BaseRewardEvent from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig @@ -63,10 +63,15 @@ def __init__(self, path: str, device: str): self.tokenizer.pad_token = self.tokenizer.eos_token self.PAD_ID = self.tokenizer(self.tokenizer.pad_token)["input_ids"][0] - def reward(self, prompt: str, completion: str, name: str) -> float: + def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: + + reward_event = BaseRewardEvent() + def reward_fn(samples): if samples is None: - return 0 + reward_event.reward = 0 + return reward_event + scores_list = [] batch_size = 1 for i in range(0, len(samples), batch_size): @@ -92,21 +97,27 @@ def reward_fn(samples): attention_mask=attn_masks.to(self.device), ) scores_list.append(sub_scores["chosen_end_scores"]) - scores = torch.cat(scores_list, dim=0).mean().item() - return scores + score = torch.cat(scores_list, dim=0).mean().item() + return score with torch.no_grad(): combined_reward = reward_fn(prompt + completion) independent_reward = reward_fn(completion) - return float((combined_reward - independent_reward).item()) + reward_event.reward = float((combined_reward - independent_reward).item()) + return reward_event def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> Union[torch.FloatTensor, dict]: - return torch.tensor( - [self.reward(prompt, completion, name) for completion in completions], - dtype=torch.float32, - ).to(self.device), None + ) -> dict: + # Get all the reward results. + reward_events = [self.reward(prompt, completion, name) for completion in completions] + + # Parse the result and generate an event to be logged. + parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) + + parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + + return parsed_reward_events def forward( self, diff --git a/prompting/validators/reward/diversity.py b/prompting/validators/reward/diversity.py index 8ea17b5..6e4d0a7 100644 --- a/prompting/validators/reward/diversity.py +++ b/prompting/validators/reward/diversity.py @@ -18,7 +18,7 @@ import torch import torch.nn.functional as F -from typing import List +from typing import List, Union from .config import RewardModelType from .reward import BaseRewardModel from transformers import AutoTokenizer, AutoModel @@ -155,7 +155,8 @@ def regularise(rewards): def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> Union[torch.FloatTensor, dict]: + ) -> dict: + # Check if completions are empty, return 0 if so if len(completions) == 0: return torch.tensor([]).to(self.device), None @@ -173,9 +174,9 @@ def get_rewards( # Return all if historic_rewards != None: - return batch_rewards * historic_rewards, None + return {'reward': batch_rewards * historic_rewards} else: - return batch_rewards, None + return {'reward': batch_rewards} def normalize_rewards(self, raw_rewards: torch.FloatTensor) -> torch.FloatTensor: # Applies binarization on the rewards. diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index b0e7295..00fa867 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -20,7 +20,7 @@ import bittensor as bt from typing import List, Union from .config import RewardModelType -from .reward import BaseRewardModel +from .reward import BaseRewardModel, BaseRewardEvent from transformers import ( AutoTokenizer, AutoModelForCausalLM, @@ -56,10 +56,14 @@ def reward_single( which is a reference model's average log-probability for completion tokens given a prompt. Uses guidance from https://github.com/eric-mitchell/direct-preference-optimization/blob/main/trainers.py. """ + + reward_event = BaseRewardEvent() + with torch.no_grad(): # Check if completion is if completion.strip() == "" or len(completion) <= 5: - return -11 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + reward_event.reward = -11.0 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + return reward_event # Tokenize the combined prompt + completion. combined = ( @@ -74,7 +78,8 @@ def reward_single( # Completion doesn't fit into model sequence, so return lowest reward. if self.tokenizer.model_max_length <= len(prompt_part): - return -11.0 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + reward_event.reward = -11.0 + return reward_event # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) # Truncate combined to fit into model max sequence length. if self.tokenizer.model_max_length < len(combined): @@ -123,18 +128,23 @@ def reward_single( # NaNs can possibly arise through log(0)=-inf, replace with suitably small logits. if torch.isnan(reward) or torch.isinf(reward): - return -11.0 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) - return reward.item() + reward_event.reward = -11.0 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + + reward_event.reward = reward.item() + return reward_event def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> Union[torch.FloatTensor, dict]: - rewards = torch.tensor( - [ - self.reward_single(prompt, completion, name) - for completion in completions - ], - dtype=torch.float32, - ).to(self.device) + ) -> dict: + + # Get all the reward results. + reward_events = [self.reward_single(prompt, completion, name) for completion in completions] + + # Parse the result and generate an event to be logged. + parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) + + parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + bt.logging.trace(f"DirectPreferenceRewardModel | rewards: {rewards.tolist()}") - return rewards, None + + return parsed_reward_events diff --git a/prompting/validators/reward/nsfw.py b/prompting/validators/reward/nsfw.py index 4098dea..edd1385 100644 --- a/prompting/validators/reward/nsfw.py +++ b/prompting/validators/reward/nsfw.py @@ -19,10 +19,13 @@ import torch from typing import List, Union from .config import RewardModelType -from .reward import BaseRewardModel +from .reward import BaseRewardModel, BaseRewardEvent from transformers import AutoModelForSequenceClassification, AutoTokenizer +from dataclasses import dataclass - +@dataclass +class NSFWRewardEvent(BaseRewardEvent): + nsfw_score: float = None class NSFWRewardModel(BaseRewardModel): nsfw_filter_model_path = "facebook/roberta-hate-speech-dynabench-r4-target" @@ -40,7 +43,10 @@ def __init__(self, device: str): NSFWRewardModel.nsfw_filter_model_path ).to(self.device) - def reward(self, prompt: str, completion: str, name: str) -> float: + def reward(self, prompt: str, completion: str, name: str) -> NSFWRewardEvent: + + reward_event = NSFWRewardEvent() + boundary = -0.5 with torch.no_grad(): message = completion @@ -63,15 +69,23 @@ def sum_nsfw_scores(input_ids, chunk_size): return max_score # 0 when needs to be filtered out, 1 when it is safe - return 0.0 if sum_nsfw_scores(input_ids, chunk_size=512) > boundary else 1.0 + nsfw_score = sum_nsfw_scores(input_ids, chunk_size=512) + reward_event.nsfw_score = nsfw_score + reward_event.reward = 0.0 if nsfw_score > boundary else 1.0 + return reward_event def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> Union[torch.FloatTensor, dict]: - return torch.tensor( - [self.reward(prompt, completion, name) for completion in completions], - dtype=torch.float32, - ).to(self.device), None + ) -> dict: + # Get all the reward results. + reward_events = [self.reward(prompt, completion, name) for completion in completions] + + # Parse the result and generate an event to be logged. + parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) + + parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + + return parsed_reward_events def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards diff --git a/prompting/validators/reward/open_assistant.py b/prompting/validators/reward/open_assistant.py index 2b57915..5975346 100644 --- a/prompting/validators/reward/open_assistant.py +++ b/prompting/validators/reward/open_assistant.py @@ -19,7 +19,7 @@ import torch from typing import List, Union from .config import RewardModelType -from .reward import BaseRewardModel +from .reward import BaseRewardModel, BaseRewardEvent from transformers import AutoTokenizer, AutoModelForSequenceClassification @@ -41,19 +41,25 @@ def __init__(self, device: str): ).to(self.device) def reward_single(self, prompt: str, completion: str, name: str) -> float: + + reward_event = BaseRewardEvent() + with torch.no_grad(): inputs = self.tokenizer(prompt, completion, return_tensors="pt").to( self.device ) - return float(self.model(**inputs).logits[0].cpu().detach()) + reward_event.reward = float(self.model(**inputs).logits[0].cpu().detach()) + return reward_event def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> Union[torch.FloatTensor, dict]: - return torch.tensor( - [ - self.reward_single(prompt, completion, name) - for completion in completions - ], - dtype=torch.float32, - ).to(self.device), None + ) -> dict: + # Get all the reward results. + reward_events = [self.reward_single(prompt, completion, name) for completion in completions] + + # Parse the result and generate an event to be logged. + parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) + + parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + + return parsed_reward_events diff --git a/prompting/validators/reward/prompt.py b/prompting/validators/reward/prompt.py index e44bfda..d8cc629 100644 --- a/prompting/validators/reward/prompt.py +++ b/prompting/validators/reward/prompt.py @@ -21,7 +21,7 @@ import bittensor as bt from typing import List, Union from .config import RewardModelType -from .reward import BaseRewardModel +from .reward import BaseRewardModel, BaseRewardEvent from prompting.validators.prompts import AugmentPrompt, FollowupPrompt, AnswerPrompt from transformers import AutoTokenizer, AutoModelForCausalLM @@ -50,7 +50,10 @@ def __init__(self, device: str): PromptRewardModel.reward_model_name, torch_dtype=torch.float16 ).to(self.device) - def reward(self, prompt: str, completion: str, name: str) -> float: + def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: + + reward_event = BaseRewardEvent() + with torch.no_grad(): # Choose correct scoring prompt for request type. if name == "augment": @@ -60,7 +63,8 @@ def reward(self, prompt: str, completion: str, name: str) -> float: elif name == "answer": scoring_prompt = AnswerPrompt() else: - return 0 + reward_event.reward = 0 + return reward_event # Format scoring prompt for this completion. scoring_prompt_text = scoring_prompt.text(prompt, completion) @@ -96,18 +100,24 @@ def reward(self, prompt: str, completion: str, name: str) -> float: # Scale 0-10 score to 0-1 range. score /= 10.0 - return score + reward_event.reward = score + return reward_event def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> Union[torch.FloatTensor, dict]: + ) -> dict: bt.logging.debug( f"PromptRewardModel | Calculating {len(completions)} rewards (typically < 1 sec/reward)." ) bt.logging.trace( f"PromptRewardModel | prompt: {repr(prompt[:50])} ... {repr(prompt[-50:])}" ) - return torch.tensor( - [self.reward(prompt, completion, name) for completion in completions], - dtype=torch.float32, - ).to(self.device), None + # Get all the reward results. + reward_events = [self.reward(prompt, completion, name) for completion in completions] + + # Parse the result and generate an event to be logged. + parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) + + parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + + return parsed_reward_events \ No newline at end of file diff --git a/prompting/validators/reward/reciprocate.py b/prompting/validators/reward/reciprocate.py index 81288d9..66974a8 100644 --- a/prompting/validators/reward/reciprocate.py +++ b/prompting/validators/reward/reciprocate.py @@ -19,7 +19,7 @@ import torch from typing import List, Union from .config import RewardModelType -from .reward import BaseRewardModel +from .reward import BaseRewardModel, BaseRewardEvent from transformers import AutoTokenizer, AutoModelForSequenceClassification @@ -44,7 +44,8 @@ def __init__(self, device: str): torch_dtype=torch.float16, ).to(self.device) - def reward(self, prompt: str, completion: str, name: str) -> float: + def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: + reward_event = BaseRewardEvent() with torch.no_grad(): message = ( f"<|prompter|>{prompt}<|assistant|>{completion}<|endoftext|>" @@ -54,12 +55,18 @@ def reward(self, prompt: str, completion: str, name: str) -> float: return_tensors="pt", truncation=True, ).to(self.device) - return float(self.model(**inputs)[0].item()) + reward_event.reward = float(self.model(**inputs)[0].item()) + return reward_event def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> Union[torch.FloatTensor, dict]: - return torch.tensor( - [self.reward(prompt, completion, name) for completion in completions], - dtype=torch.float32, - ).to(self.device), None + ) -> dict: + # Get all the reward results. + reward_events = [self.reward(prompt, completion, name) for completion in completions] + + # Parse the result and generate an event to be logged. + parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) + + parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + + return parsed_reward_events \ No newline at end of file diff --git a/prompting/validators/reward/relevance.py b/prompting/validators/reward/relevance.py index b8e7b64..8b33b6a 100644 --- a/prompting/validators/reward/relevance.py +++ b/prompting/validators/reward/relevance.py @@ -19,11 +19,11 @@ import torch from typing import List, Union from .config import RewardModelType -from .reward import BaseRewardModel +from .reward import BaseRewardModel, BaseRewardEvent from transformers import AutoTokenizer, AutoModel from torchmetrics.functional import pairwise_cosine_similarity import torch.nn.functional as F -from dataclasses import dataclass, asdict, fields +from dataclasses import dataclass def mean_pooling(model_output, attention_mask): @@ -47,14 +47,12 @@ def mean_pooling(model_output, attention_mask): input_mask_expanded.sum(1), min=1e-9 ) +@dataclass +class RelevanceRewardEvent(BaseRewardEvent): + bert_relevancy_score: float = None + mpnet_relevancy_score: float = None class RelevanceRewardModel(BaseRewardModel): - @dataclass - class RewardResult(): - reward: int = 1 - bert_relevancy_score: float = None - mpnet_relevancy_score: float = None - @property def name(self) -> str: return RewardModelType.relevance.value @@ -68,39 +66,25 @@ def __init__(self, device: str): ] self.bounds = [-0.0246, 0.3] - def parse_reward_results(self, reward_results): - field_names = [field.name for field in fields(self.RewardResult)] - - reward_results = [asdict(reward_result).values() for reward_result in reward_results] - - reward_event = dict(zip(field_names, list(zip(*reward_results)))) - - reward = reward_event['reward'] - - del reward_event['reward'] - - return reward, reward_event - def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> Union[torch.FloatTensor, dict]: + ) -> dict: # Get all the reward results. - reward_results = [self.reward(prompt, completion, name) for completion in completions] + reward_events = [self.reward(prompt, completion, name) for completion in completions] # Parse the result and generate an event to be logged. - reward, reward_event = self.parse_reward_results(reward_results) - - reward = torch.tensor(reward, dtype=torch.float32) + parsed_reward_events = RelevanceRewardEvent.parse_reward_events(reward_events) - return reward, reward_event + parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + return parsed_reward_events def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards - def reward(self, prompt: str, completion: str, name: str) -> float: + def reward(self, prompt: str, completion: str, name: str) -> RelevanceRewardEvent: - result = RelevanceRewardModel.RewardResult() + reward_event = RelevanceRewardEvent() for i, model in enumerate(self.models): # rewards @@ -108,16 +92,16 @@ def reward(self, prompt: str, completion: str, name: str) -> float: # If a model returns 0, stop iterating and return 0 if diff < self.bounds[i]: - result.reward = 0 + reward_event.reward = 0 if model.name == 'relevance_bert': - result.bert_relevancy_score = diff + reward_event.bert_relevancy_score = diff elif model.name == 'relevance_mpnet': - result.mpnet_relevancy_score = diff + reward_event.mpnet_relevancy_score = diff # If none of the models returned 0, return 1 - return result + return reward_event class BertRelevanceRewardModel(BaseRewardModel): diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index 2c6b13a..139abed 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -20,7 +20,18 @@ import bittensor as bt from typing import List, Union from abc import abstractmethod +from dataclasses import dataclass, asdict, fields +@dataclass +class BaseRewardEvent: + reward: float = 1. + normalized_reward: float = None + + def parse_reward_events(reward_events): + field_names = [field.name for field in fields(reward_events[0])] + reward_events = [asdict(reward_event).values() for reward_event in reward_events] + reward_event = dict(zip(field_names, list(zip(*reward_events)))) + return reward_event class BaseRewardModel: @property @@ -117,7 +128,8 @@ def apply( ] # Reward each completion. - successful_rewards, reward_event = self.get_rewards(prompt, successful_completions, name) + reward_event = self.get_rewards(prompt, successful_completions, name) + successful_rewards = reward_event.pop('reward') # Softmax rewards across samples. successful_rewards_normalized = self.normalize_rewards(successful_rewards) @@ -135,11 +147,10 @@ def apply( filled_rewards[idx] = reward filled_rewards_normalized[idx] = reward_normalized - if not reward_event: - reward_event = {} - - reward_event[reward_fn_i.name] = filled_rewards.tolist() - reward_event[reward_fn_i.name + "_normalized"] = filled_rewards_normalized.tolist() - + # Name each item of the reward event with the reward model name. + reward_event = { f"{self.name}_{k}": v for k, v in reward_event.items()} + reward_event[self.name] = filled_rewards.tolist() + reward_event[self.name + "_normalized"] = filled_rewards_normalized.tolist() + # Return the filled rewards. return filled_rewards_normalized, reward_event diff --git a/prompting/validators/reward/task_validator.py b/prompting/validators/reward/task_validator.py index 6d3448a..5b15cea 100644 --- a/prompting/validators/reward/task_validator.py +++ b/prompting/validators/reward/task_validator.py @@ -18,7 +18,7 @@ import torch from typing import List, Union from .config import RewardModelType -from .reward import BaseRewardModel +from .reward import BaseRewardModel, BaseRewardEvent class TaskValidator(BaseRewardModel): @@ -29,7 +29,10 @@ def name(self) -> str: def __init__(self): super().__init__() - def reward(self, prompt: str, completion: str, name: str) -> float: + def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: + + reward_event = BaseRewardEvent() + summary_keywords = ["Summary:", "Paraphrase:", "Paraphrasing:", "Paraphrased:"] question_keywords = ["Question:", "Query:", "Q:"] answer_keywords = ["Answer:", "Response:", "A:", "Completion:"] @@ -54,25 +57,34 @@ def reward(self, prompt: str, completion: str, name: str) -> float: if ( is_summarization_prompt or is_question_prompt ) and completion_contains_answer: - return 0.0 + reward_event.reward = 0.0 + return reward_event if ( is_summarization_prompt or is_answer_prompt ) and completion_contains_question: - return 0.0 + reward_event.reward = 0.0 + return reward_event if not is_summarization_prompt and completion_contains_summary: - return 0.0 + reward_event.reward = 0.0 + return reward_event - return 1 + reward_event.reward = 1 + return reward_event def get_rewards( self, prompt: str, completions: List[str], name: str - ) -> Union[torch.FloatTensor, dict]: - return torch.tensor( - [self.reward(prompt, completion, name) for completion in completions], - dtype=torch.float32, - ), None + ) -> dict: + # Get all the reward results. + reward_events = [self.reward(prompt, completion, name) for completion in completions] + + # Parse the result and generate an event to be logged. + parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) + + parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32) + + return parsed_reward_events def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards From 602270664c74a14d22f7542887fca09bc298145e Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 8 Nov 2023 20:24:49 +0000 Subject: [PATCH 08/25] update event schema --- prompting/validators/event.py | 24 ++++++++++++++++++++++++ prompting/validators/reward/nsfw.py | 8 ++++---- prompting/validators/reward/relevance.py | 8 ++++---- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/prompting/validators/event.py b/prompting/validators/event.py index 6e51584..9c7b9ce 100644 --- a/prompting/validators/event.py +++ b/prompting/validators/event.py @@ -47,6 +47,8 @@ class EventSchema: List[float] ] # Output vector of the dahoas reward model blacklist_filter: Optional[List[float]] # Output vector of the blacklist filter + blacklist_filter_matched_ngram: Optional[List[str]] # Output vector of the blacklist filter + blacklist_filter_significance_score: Optional[List[float]] # Output vector of the blacklist filter nsfw_filter: Optional[List[float]] # Output vector of the nsfw filter reciprocate_reward_model: Optional[ List[float] @@ -68,6 +70,7 @@ class EventSchema: List[float] ] # Output vector of the dahoas reward model nsfw_filter_normalized: Optional[List[float]] # Output vector of the nsfw filter + nsfw_filter_score: Optional[List[float]] # Output vector of the nsfw filter reciprocate_reward_model_normalized: Optional[ List[float] ] # Output vector of the reciprocate reward model @@ -86,6 +89,12 @@ class EventSchema: relevance_filter_normalized: Optional[ List[float] ] # Output vector of the relevance scoring reward model + relevance_filter_bert_score: Optional[ + List[float] + ] # Output vector of the relevance scoring reward model + relevance_filter_mpnet_score: Optional[ + List[float] + ] # Output vector of the relevance scoring reward model task_validator_filter_normalized: Optional[List[float]] # Weights data @@ -136,6 +145,21 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> "EventSchema": "prompt_reward_model_normalized": event_dict.get( RewardModelType.prompt.value + "_normalized" ), + "blacklist_filter_matched_ngram": event_dict.get( + RewardModelType.blacklist.value + "_matched_ngram" + ), + "blacklist_filter_significance_score": event_dict.get( + RewardModelType.blacklist.value + "_significance_score" + ), + "relevance_filter_bert_score": event_dict.get( + RewardModelType.relevance.value + "_bert_score" + ), + "relevance_filter_mpnet_score": event_dict.get( + RewardModelType.relevance.value + "_mpnet_score" + ), + "nsfw_filter_score": event_dict.get( + RewardModelType.nsfw.value + "_score" + ) } # Logs warning that expected data was not set properly diff --git a/prompting/validators/reward/nsfw.py b/prompting/validators/reward/nsfw.py index edd1385..c693e2a 100644 --- a/prompting/validators/reward/nsfw.py +++ b/prompting/validators/reward/nsfw.py @@ -25,7 +25,7 @@ @dataclass class NSFWRewardEvent(BaseRewardEvent): - nsfw_score: float = None + score: float = None class NSFWRewardModel(BaseRewardModel): nsfw_filter_model_path = "facebook/roberta-hate-speech-dynabench-r4-target" @@ -69,9 +69,9 @@ def sum_nsfw_scores(input_ids, chunk_size): return max_score # 0 when needs to be filtered out, 1 when it is safe - nsfw_score = sum_nsfw_scores(input_ids, chunk_size=512) - reward_event.nsfw_score = nsfw_score - reward_event.reward = 0.0 if nsfw_score > boundary else 1.0 + score = sum_nsfw_scores(input_ids, chunk_size=512) + reward_event.score = score + reward_event.reward = 0.0 if score > boundary else 1.0 return reward_event def get_rewards( diff --git a/prompting/validators/reward/relevance.py b/prompting/validators/reward/relevance.py index 8b33b6a..3011d15 100644 --- a/prompting/validators/reward/relevance.py +++ b/prompting/validators/reward/relevance.py @@ -49,8 +49,8 @@ def mean_pooling(model_output, attention_mask): @dataclass class RelevanceRewardEvent(BaseRewardEvent): - bert_relevancy_score: float = None - mpnet_relevancy_score: float = None + bert_score: float = None + mpnet_score: float = None class RelevanceRewardModel(BaseRewardModel): @property @@ -95,10 +95,10 @@ def reward(self, prompt: str, completion: str, name: str) -> RelevanceRewardEven reward_event.reward = 0 if model.name == 'relevance_bert': - reward_event.bert_relevancy_score = diff + reward_event.bert_score = diff elif model.name == 'relevance_mpnet': - reward_event.mpnet_relevancy_score = diff + reward_event.mpnet_score = diff # If none of the models returned 0, return 1 return reward_event From e374246ca5cbcc681f798061aa60e548ac1aa232 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 8 Nov 2023 20:42:20 +0000 Subject: [PATCH 09/25] black format --- prompting/validators/event.py | 12 +- prompting/validators/forward.py | 8 +- prompting/validators/reward/blacklist.py | 142 ++++++++++-------- prompting/validators/reward/dahoas.py | 15 +- prompting/validators/reward/diversity.py | 9 +- prompting/validators/reward/dpo.py | 27 ++-- prompting/validators/reward/nsfw.py | 16 +- prompting/validators/reward/open_assistant.py | 13 +- prompting/validators/reward/prompt.py | 17 ++- prompting/validators/reward/reciprocate.py | 14 +- prompting/validators/reward/relevance.py | 27 ++-- prompting/validators/reward/reward.py | 14 +- prompting/validators/reward/task_validator.py | 17 ++- 13 files changed, 186 insertions(+), 145 deletions(-) diff --git a/prompting/validators/event.py b/prompting/validators/event.py index 9c7b9ce..45e40be 100644 --- a/prompting/validators/event.py +++ b/prompting/validators/event.py @@ -47,8 +47,12 @@ class EventSchema: List[float] ] # Output vector of the dahoas reward model blacklist_filter: Optional[List[float]] # Output vector of the blacklist filter - blacklist_filter_matched_ngram: Optional[List[str]] # Output vector of the blacklist filter - blacklist_filter_significance_score: Optional[List[float]] # Output vector of the blacklist filter + blacklist_filter_matched_ngram: Optional[ + List[str] + ] # Output vector of the blacklist filter + blacklist_filter_significance_score: Optional[ + List[float] + ] # Output vector of the blacklist filter nsfw_filter: Optional[List[float]] # Output vector of the nsfw filter reciprocate_reward_model: Optional[ List[float] @@ -157,9 +161,7 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> "EventSchema": "relevance_filter_mpnet_score": event_dict.get( RewardModelType.relevance.value + "_mpnet_score" ), - "nsfw_filter_score": event_dict.get( - RewardModelType.nsfw.value + "_score" - ) + "nsfw_filter_score": event_dict.get(RewardModelType.nsfw.value + "_score"), } # Logs warning that expected data was not set properly diff --git a/prompting/validators/forward.py b/prompting/validators/forward.py index 073c0e8..06d17a4 100644 --- a/prompting/validators/forward.py +++ b/prompting/validators/forward.py @@ -99,7 +99,9 @@ async def run_step( ) # Update blacklist with completions so that n-gram filtering can be applied - self.blacklist.add([response.completion for response in responses if response.completion]) + self.blacklist.add( + [response.completion for response in responses if response.completion] + ) # Restrict the format of acceptable followup completions. for response in responses: @@ -129,7 +131,9 @@ async def run_step( bt.logging.trace(str(reward_fn_i.name), reward_i_normalized.tolist()) for masking_fn_i in self.masking_functions: - mask_i_normalized, reward_event = masking_fn_i.apply(base_prompt, responses, name) + mask_i_normalized, reward_event = masking_fn_i.apply( + base_prompt, responses, name + ) rewards *= mask_i_normalized.to(self.device) # includes diversity if not self.config.neuron.disable_log_rewards: event = {**event, **reward_event} diff --git a/prompting/validators/reward/blacklist.py b/prompting/validators/reward/blacklist.py index fadee0a..ed457bb 100644 --- a/prompting/validators/reward/blacklist.py +++ b/prompting/validators/reward/blacklist.py @@ -28,28 +28,31 @@ # TODO: Use CLI arguments to set blacklist values: the most important being the boundary value and max_size + @dataclass class BlacklistRewardEvent(BaseRewardEvent): matched_ngram: str = None significance_score: float = None + class Blacklist(BaseRewardModel): @property def name(self) -> str: return RewardModelType.blacklist.value - def __init__(self, - boundary:float = 6, - n_min:int = 5, - n_max:int = 14, - word_limit:int = 2000, - A:float = 1.3, - preprocess:str = '[^(\\w|\\s)]', + def __init__( + self, + boundary: float = 6, + n_min: int = 5, + n_max: int = 14, + word_limit: int = 2000, + A: float = 1.3, + preprocess: str = "[^(\\w|\\s)]", partial_ratio_boundary: float = 95, half_life: int = 20000, support: float = 0.01, error: float = 0.001, - memory_lim: int = 1_000_000 + memory_lim: int = 1_000_000, ): """N-gram blacklist reward model which penalizes overused phrases in the network @@ -61,7 +64,7 @@ def __init__(self, word_limit (int, optional): Maximum word length, to prevent extremely long completions from overworking the queue. Defaults to 2000. A (float, optional): Exponent used in significance scoring, smaller A gives more weight to smaller ngrams. Values of 1.1-2 are recommended. Defaults to 1.1. preprocess (str, optional): Regex preprocessing string to make text more uniform. Defaults to '[^(\w|\s)]'. - partial_ratio_boundry (int, optional): Boundry for fuzzy match. + partial_ratio_boundry (int, optional): Boundry for fuzzy match. half_life (int, optional): Half life of the counter. ie. When the number of completions processed > half life, then put all the counters in half. support (float, optional): The percentage of times that a phrase need to appear to get the phrase kept in counter. (support should be >> counter) error (float, optional): Error parameter for lossy sampling, should be as small as possible, further decreasing it further will increase memory usage. (support should be >> error ) @@ -83,15 +86,17 @@ def __init__(self, self.preprocess = re.compile(preprocess) if preprocess else None self._last_update = 0 - # Lossy sampling parameters - self.support = support - self.error = error - self.window = math.ceil(1/self.error) # Window size, counter would get pruned once for each window. - self.w_current = 1 # window index. + # Lossy sampling parameters + self.support = support + self.error = error + self.window = math.ceil( + 1 / self.error + ) # Window size, counter would get pruned once for each window. + self.w_current = 1 # window index. self.num_ngram = 0 self.num_completion = 0 - - self.half_life = half_life + + self.half_life = half_life self.tokenizer = BertTokenizer.from_pretrained("bert-base-cased") self.memory_lim = memory_lim @@ -109,7 +114,6 @@ def add(self, texts: List[str]): if ngrams: self._add_ngrams(ngrams) - def extract_ngrams(self, text: str) -> List[tuple]: """Extract n-grams from text string @@ -123,12 +127,12 @@ def extract_ngrams(self, text: str) -> List[tuple]: if self.preprocess: # remove all punctuation - text = self.preprocess.sub('', text) + text = self.preprocess.sub("", text) - words = self.tokenizer(text.lower())['input_ids'][1:-1] + words = self.tokenizer(text.lower())["input_ids"][1:-1] if self.word_limit is not None: - words = words[:self.word_limit] + words = words[: self.word_limit] ngrams = [] for i in range(self.n_min, self.n_max + 1): @@ -148,48 +152,46 @@ def _add_ngrams(self, ngrams: List[tuple]): for ngram in ngrams: if ngram in self.counter: self.counter[ngram][0] += 1 - else: + else: # Store the tuple (frequence, max_error) self.counter[ngram] = [1, self.w_current - 1] - + # Start the prune procedure periodically. self.num_ngram += 1 - + self.num_completion += 1 - + # Prune when move to next window. if self.num_completion % self.window == 0: - self.w_current = math.ceil(self.num_completion / self.window) + self.w_current = math.ceil(self.num_completion / self.window) self.prune() - + # Safety feature: prune when reached max memory size. if len(self.counter) > self.memory_lim: self.w_current += 1 self.prune() - - # Apply half life for the counter + + # Apply half life for the counter if self.num_completion > self.half_life: self.set_counter_to_half() def prune(self): - """Prune the counter when the count is smaller then bucket index. - """ + """Prune the counter when the count is smaller then bucket index.""" prune_ele = [] for ele, (frequence, max_error) in self.counter.items(): if frequence + max_error <= self.w_current: prune_ele.append(ele) - + for ele in prune_ele: - del self.counter[ele] + del self.counter[ele] def reset(self): - """Reset counters to initial values. - """ + """Reset counters to initial values.""" self.num_ngram = 0 - self.num_completion = 0 - self.w_current = 1 + self.num_completion = 0 + self.w_current = 1 self.counter = {} - self.significance_scores = {} + self.significance_scores = {} self._last_update = 0 def calculate_significance(self) -> dict: @@ -201,11 +203,17 @@ def calculate_significance(self) -> dict: significance_scores = {} for ngram, count in self.counter.items(): - if count[0] + count[1] > max(self.support * self.num_completion, self.w_current + 1): + if count[0] + count[1] > max( + self.support * self.num_completion, self.w_current + 1 + ): decoded_ngram = self.tokenizer.decode(ngram) if len(decoded_ngram.split()) >= self.n_min: # calculate significance score for ngram - significance_scores[decoded_ngram] = self.A ** (len(decoded_ngram) - 1) * ((count[0] + count[1]) / self.num_completion) * 100 + significance_scores[decoded_ngram] = ( + self.A ** (len(decoded_ngram) - 1) + * ((count[0] + count[1]) / self.num_completion) + * 100 + ) self._last_update = self.num_completion @@ -223,7 +231,7 @@ def get_significance(self) -> dict: return self.significance_scores - def most_common(self, n:int = 10) -> dict: + def most_common(self, n: int = 10) -> dict: """Get most common n-grams in queue Args: @@ -232,10 +240,11 @@ def most_common(self, n:int = 10) -> dict: Returns: dict: Sorted dictionary of n-gram tuples and their counts """ - return sorted(self.counter.items(), key=lambda x: x[1][0] + x[1][1], reverse=True)[:n] - - - def most_significant(self, n:int = 10, force_update:bool = True) -> dict: + return sorted( + self.counter.items(), key=lambda x: x[1][0] + x[1][1], reverse=True + )[:n] + + def most_significant(self, n: int = 10, force_update: bool = True) -> dict: """Get most significant n-grams in queue based on significance scores Args: @@ -246,17 +255,18 @@ def most_significant(self, n:int = 10, force_update:bool = True) -> dict: dict: Sorted dictionary of n-gram tuples and their significance scores """ - scores = self.get_significance() if force_update else self.significance_scores return sorted(scores.items(), key=lambda x: x[1], reverse=True)[:n] def set_counter_to_half(self): - """Set all the counters to half for a rolling window effect. - """ - self.num_ngram = math.ceil(self.num_ngram/2) - self.num_completion = math.ceil(self.num_completion/2) - self.w_current = math.ceil(self.num_completion / self.window) - self.counter = { tokens: [ math.ceil(count[0]/2), math.ceil(count[1]/2)] for tokens, count in self.counter.items()} + """Set all the counters to half for a rolling window effect.""" + self.num_ngram = math.ceil(self.num_ngram / 2) + self.num_completion = math.ceil(self.num_completion / 2) + self.w_current = math.ceil(self.num_completion / self.window) + self.counter = { + tokens: [math.ceil(count[0] / 2), math.ceil(count[1] / 2)] + for tokens, count in self.counter.items() + } self._last_update = 0 def reward(self, prompt: str, completion: str, name: str) -> BlacklistRewardEvent: @@ -281,31 +291,35 @@ def reward(self, prompt: str, completion: str, name: str) -> BlacklistRewardEven scores = self.get_significance() # Check if any n-grams have significance above the boundary - for ngram, score in scores.items(): - if (score > self.boundary and - fuzz.partial_ratio(ngram, completion.lower()) > self.partial_ratio_boundary + for ngram, score in scores.items(): + if ( + score > self.boundary + and fuzz.partial_ratio(ngram, completion.lower()) + > self.partial_ratio_boundary ): - reward_event.reward = 0 - reward_event.matched_ngram = ngram - reward_event.significance_score = score - return reward_event + reward_event.reward = 0 + reward_event.matched_ngram = ngram + reward_event.significance_score = score + return reward_event reward_event.reward = 1 return reward_event - def get_rewards( - self, prompt: str, completions: List[str], name: str - ) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: # Get all the reward results. - reward_events = [self.reward(prompt, completion, name) for completion in completions] + reward_events = [ + self.reward(prompt, completion, name) for completion in completions + ] # Parse the result and generate an event to be logged. parsed_reward_events = BlacklistRewardEvent.parse_reward_events(reward_events) # Change the reward into tensor object - parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32) + parsed_reward_events["reward"] = torch.tensor( + parsed_reward_events["reward"], dtype=torch.float32 + ) return parsed_reward_events def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: - return rewards \ No newline at end of file + return rewards diff --git a/prompting/validators/reward/dahoas.py b/prompting/validators/reward/dahoas.py index 5948870..3e1a543 100644 --- a/prompting/validators/reward/dahoas.py +++ b/prompting/validators/reward/dahoas.py @@ -64,9 +64,8 @@ def __init__(self, path: str, device: str): self.PAD_ID = self.tokenizer(self.tokenizer.pad_token)["input_ids"][0] def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: - reward_event = BaseRewardEvent() - + def reward_fn(samples): if samples is None: reward_event.reward = 0 @@ -106,16 +105,18 @@ def reward_fn(samples): reward_event.reward = float((combined_reward - independent_reward).item()) return reward_event - def get_rewards( - self, prompt: str, completions: List[str], name: str - ) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: # Get all the reward results. - reward_events = [self.reward(prompt, completion, name) for completion in completions] + reward_events = [ + self.reward(prompt, completion, name) for completion in completions + ] # Parse the result and generate an event to be logged. parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + parsed_reward_events["reward"] = torch.tensor( + parsed_reward_events["reward"], dtype=torch.float32 + ).to(self.device) return parsed_reward_events diff --git a/prompting/validators/reward/diversity.py b/prompting/validators/reward/diversity.py index 6e4d0a7..b7f3216 100644 --- a/prompting/validators/reward/diversity.py +++ b/prompting/validators/reward/diversity.py @@ -153,10 +153,7 @@ def regularise(rewards): return regularise(rewards) - def get_rewards( - self, prompt: str, completions: List[str], name: str - ) -> dict: - + def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: # Check if completions are empty, return 0 if so if len(completions) == 0: return torch.tensor([]).to(self.device), None @@ -174,9 +171,9 @@ def get_rewards( # Return all if historic_rewards != None: - return {'reward': batch_rewards * historic_rewards} + return {"reward": batch_rewards * historic_rewards} else: - return {'reward': batch_rewards} + return {"reward": batch_rewards} def normalize_rewards(self, raw_rewards: torch.FloatTensor) -> torch.FloatTensor: # Applies binarization on the rewards. diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 00fa867..e0ffdbb 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -62,7 +62,9 @@ def reward_single( with torch.no_grad(): # Check if completion is if completion.strip() == "" or len(completion) <= 5: - reward_event.reward = -11.0 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + reward_event.reward = ( + -11.0 + ) # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) return reward_event # Tokenize the combined prompt + completion. @@ -79,7 +81,9 @@ def reward_single( # Completion doesn't fit into model sequence, so return lowest reward. if self.tokenizer.model_max_length <= len(prompt_part): reward_event.reward = -11.0 - return reward_event # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + return ( + reward_event # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + ) # Truncate combined to fit into model max sequence length. if self.tokenizer.model_max_length < len(combined): @@ -128,22 +132,25 @@ def reward_single( # NaNs can possibly arise through log(0)=-inf, replace with suitably small logits. if torch.isnan(reward) or torch.isinf(reward): - reward_event.reward = -11.0 # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) - + reward_event.reward = ( + -11.0 + ) # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + reward_event.reward = reward.item() return reward_event - def get_rewards( - self, prompt: str, completions: List[str], name: str - ) -> dict: - + def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: # Get all the reward results. - reward_events = [self.reward_single(prompt, completion, name) for completion in completions] + reward_events = [ + self.reward_single(prompt, completion, name) for completion in completions + ] # Parse the result and generate an event to be logged. parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + parsed_reward_events["reward"] = torch.tensor( + parsed_reward_events["reward"], dtype=torch.float32 + ).to(self.device) bt.logging.trace(f"DirectPreferenceRewardModel | rewards: {rewards.tolist()}") diff --git a/prompting/validators/reward/nsfw.py b/prompting/validators/reward/nsfw.py index c693e2a..74d2340 100644 --- a/prompting/validators/reward/nsfw.py +++ b/prompting/validators/reward/nsfw.py @@ -23,9 +23,12 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer from dataclasses import dataclass + @dataclass class NSFWRewardEvent(BaseRewardEvent): score: float = None + + class NSFWRewardModel(BaseRewardModel): nsfw_filter_model_path = "facebook/roberta-hate-speech-dynabench-r4-target" @@ -44,7 +47,6 @@ def __init__(self, device: str): ).to(self.device) def reward(self, prompt: str, completion: str, name: str) -> NSFWRewardEvent: - reward_event = NSFWRewardEvent() boundary = -0.5 @@ -74,16 +76,18 @@ def sum_nsfw_scores(input_ids, chunk_size): reward_event.reward = 0.0 if score > boundary else 1.0 return reward_event - def get_rewards( - self, prompt: str, completions: List[str], name: str - ) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: # Get all the reward results. - reward_events = [self.reward(prompt, completion, name) for completion in completions] + reward_events = [ + self.reward(prompt, completion, name) for completion in completions + ] # Parse the result and generate an event to be logged. parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + parsed_reward_events["reward"] = torch.tensor( + parsed_reward_events["reward"], dtype=torch.float32 + ).to(self.device) return parsed_reward_events diff --git a/prompting/validators/reward/open_assistant.py b/prompting/validators/reward/open_assistant.py index 5975346..dc0a689 100644 --- a/prompting/validators/reward/open_assistant.py +++ b/prompting/validators/reward/open_assistant.py @@ -41,7 +41,6 @@ def __init__(self, device: str): ).to(self.device) def reward_single(self, prompt: str, completion: str, name: str) -> float: - reward_event = BaseRewardEvent() with torch.no_grad(): @@ -51,15 +50,17 @@ def reward_single(self, prompt: str, completion: str, name: str) -> float: reward_event.reward = float(self.model(**inputs).logits[0].cpu().detach()) return reward_event - def get_rewards( - self, prompt: str, completions: List[str], name: str - ) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: # Get all the reward results. - reward_events = [self.reward_single(prompt, completion, name) for completion in completions] + reward_events = [ + self.reward_single(prompt, completion, name) for completion in completions + ] # Parse the result and generate an event to be logged. parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + parsed_reward_events["reward"] = torch.tensor( + parsed_reward_events["reward"], dtype=torch.float32 + ).to(self.device) return parsed_reward_events diff --git a/prompting/validators/reward/prompt.py b/prompting/validators/reward/prompt.py index d8cc629..94c5502 100644 --- a/prompting/validators/reward/prompt.py +++ b/prompting/validators/reward/prompt.py @@ -51,9 +51,8 @@ def __init__(self, device: str): ).to(self.device) def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: - reward_event = BaseRewardEvent() - + with torch.no_grad(): # Choose correct scoring prompt for request type. if name == "augment": @@ -103,9 +102,7 @@ def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: reward_event.reward = score return reward_event - def get_rewards( - self, prompt: str, completions: List[str], name: str - ) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: bt.logging.debug( f"PromptRewardModel | Calculating {len(completions)} rewards (typically < 1 sec/reward)." ) @@ -113,11 +110,15 @@ def get_rewards( f"PromptRewardModel | prompt: {repr(prompt[:50])} ... {repr(prompt[-50:])}" ) # Get all the reward results. - reward_events = [self.reward(prompt, completion, name) for completion in completions] + reward_events = [ + self.reward(prompt, completion, name) for completion in completions + ] # Parse the result and generate an event to be logged. parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + parsed_reward_events["reward"] = torch.tensor( + parsed_reward_events["reward"], dtype=torch.float32 + ).to(self.device) - return parsed_reward_events \ No newline at end of file + return parsed_reward_events diff --git a/prompting/validators/reward/reciprocate.py b/prompting/validators/reward/reciprocate.py index 66974a8..9006b79 100644 --- a/prompting/validators/reward/reciprocate.py +++ b/prompting/validators/reward/reciprocate.py @@ -58,15 +58,17 @@ def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: reward_event.reward = float(self.model(**inputs)[0].item()) return reward_event - def get_rewards( - self, prompt: str, completions: List[str], name: str - ) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: # Get all the reward results. - reward_events = [self.reward(prompt, completion, name) for completion in completions] + reward_events = [ + self.reward(prompt, completion, name) for completion in completions + ] # Parse the result and generate an event to be logged. parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + parsed_reward_events["reward"] = torch.tensor( + parsed_reward_events["reward"], dtype=torch.float32 + ).to(self.device) - return parsed_reward_events \ No newline at end of file + return parsed_reward_events diff --git a/prompting/validators/reward/relevance.py b/prompting/validators/reward/relevance.py index 3011d15..fd2648b 100644 --- a/prompting/validators/reward/relevance.py +++ b/prompting/validators/reward/relevance.py @@ -47,11 +47,13 @@ def mean_pooling(model_output, attention_mask): input_mask_expanded.sum(1), min=1e-9 ) + @dataclass class RelevanceRewardEvent(BaseRewardEvent): bert_score: float = None mpnet_score: float = None + class RelevanceRewardModel(BaseRewardModel): @property def name(self) -> str: @@ -66,16 +68,18 @@ def __init__(self, device: str): ] self.bounds = [-0.0246, 0.3] - def get_rewards( - self, prompt: str, completions: List[str], name: str - ) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: # Get all the reward results. - reward_events = [self.reward(prompt, completion, name) for completion in completions] + reward_events = [ + self.reward(prompt, completion, name) for completion in completions + ] # Parse the result and generate an event to be logged. parsed_reward_events = RelevanceRewardEvent.parse_reward_events(reward_events) - parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32).to(self.device) + parsed_reward_events["reward"] = torch.tensor( + parsed_reward_events["reward"], dtype=torch.float32 + ).to(self.device) return parsed_reward_events @@ -83,7 +87,6 @@ def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards def reward(self, prompt: str, completion: str, name: str) -> RelevanceRewardEvent: - reward_event = RelevanceRewardEvent() for i, model in enumerate(self.models): @@ -94,12 +97,12 @@ def reward(self, prompt: str, completion: str, name: str) -> RelevanceRewardEven if diff < self.bounds[i]: reward_event.reward = 0 - if model.name == 'relevance_bert': + if model.name == "relevance_bert": reward_event.bert_score = diff - - elif model.name == 'relevance_mpnet': + + elif model.name == "relevance_mpnet": reward_event.mpnet_score = diff - + # If none of the models returned 0, return 1 return reward_event @@ -169,7 +172,7 @@ class MpnetRelevenceModel(BaseRewardModel): @property def name(self) -> str: return RewardModelType.relevance_mpnet.value - + def __init__(self, device: str): super().__init__() self.device = device @@ -218,4 +221,4 @@ def reward(self, prompt: str, completion: str) -> torch.FloatTensor: # Calculate the pairwise cosine similarity. similarity = pairwise_cosine_similarity(prompt_embed, embeddings) - return torch.abs(similarity).item() \ No newline at end of file + return torch.abs(similarity).item() diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index 139abed..a2cf3ab 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -22,17 +22,21 @@ from abc import abstractmethod from dataclasses import dataclass, asdict, fields + @dataclass class BaseRewardEvent: - reward: float = 1. + reward: float = 1.0 normalized_reward: float = None def parse_reward_events(reward_events): field_names = [field.name for field in fields(reward_events[0])] - reward_events = [asdict(reward_event).values() for reward_event in reward_events] + reward_events = [ + asdict(reward_event).values() for reward_event in reward_events + ] reward_event = dict(zip(field_names, list(zip(*reward_events)))) return reward_event + class BaseRewardModel: @property @abstractmethod @@ -129,7 +133,7 @@ def apply( # Reward each completion. reward_event = self.get_rewards(prompt, successful_completions, name) - successful_rewards = reward_event.pop('reward') + successful_rewards = reward_event.pop("reward") # Softmax rewards across samples. successful_rewards_normalized = self.normalize_rewards(successful_rewards) @@ -147,8 +151,8 @@ def apply( filled_rewards[idx] = reward filled_rewards_normalized[idx] = reward_normalized - # Name each item of the reward event with the reward model name. - reward_event = { f"{self.name}_{k}": v for k, v in reward_event.items()} + # Name each item of the reward event with the reward model name. + reward_event = {f"{self.name}_{k}": v for k, v in reward_event.items()} reward_event[self.name] = filled_rewards.tolist() reward_event[self.name + "_normalized"] = filled_rewards_normalized.tolist() diff --git a/prompting/validators/reward/task_validator.py b/prompting/validators/reward/task_validator.py index 5b15cea..7026b42 100644 --- a/prompting/validators/reward/task_validator.py +++ b/prompting/validators/reward/task_validator.py @@ -30,9 +30,8 @@ def __init__(self): super().__init__() def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: - - reward_event = BaseRewardEvent() - + reward_event = BaseRewardEvent() + summary_keywords = ["Summary:", "Paraphrase:", "Paraphrasing:", "Paraphrased:"] question_keywords = ["Question:", "Query:", "Q:"] answer_keywords = ["Answer:", "Response:", "A:", "Completion:"] @@ -73,16 +72,18 @@ def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: reward_event.reward = 1 return reward_event - def get_rewards( - self, prompt: str, completions: List[str], name: str - ) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: # Get all the reward results. - reward_events = [self.reward(prompt, completion, name) for completion in completions] + reward_events = [ + self.reward(prompt, completion, name) for completion in completions + ] # Parse the result and generate an event to be logged. parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - parsed_reward_events['reward'] = torch.tensor(parsed_reward_events['reward'], dtype=torch.float32) + parsed_reward_events["reward"] = torch.tensor( + parsed_reward_events["reward"], dtype=torch.float32 + ) return parsed_reward_events From d76bfd24686e005f088fb11966fb165c7862eb29 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 8 Nov 2023 22:03:44 +0000 Subject: [PATCH 10/25] fix mock --- prompting/validators/mock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/validators/mock.py b/prompting/validators/mock.py index ae07e76..73aab25 100644 --- a/prompting/validators/mock.py +++ b/prompting/validators/mock.py @@ -59,7 +59,7 @@ def __init__(self, mock_name: str = "MockReward"): def apply(self, prompt: str, completion: List[str], name: str) -> torch.FloatTensor: mock_reward = torch.tensor([1 for _ in completion], dtype=torch.float32) - return mock_reward, mock_reward + return mock_reward, {} def reset(self): return self From c9bbae218db10d884e8294b9bfc549cf5653252f Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 8 Nov 2023 22:13:21 +0000 Subject: [PATCH 11/25] get rewards -> List[BaseRewardEvent] --- prompting/validators/reward/blacklist.py | 13 ++---------- prompting/validators/reward/dahoas.py | 11 ++-------- prompting/validators/reward/diversity.py | 20 +++++++++++++------ prompting/validators/reward/dpo.py | 13 +++--------- prompting/validators/reward/nsfw.py | 11 ++-------- prompting/validators/reward/open_assistant.py | 13 +++--------- prompting/validators/reward/prompt.py | 11 ++-------- prompting/validators/reward/reciprocate.py | 11 ++-------- prompting/validators/reward/relevance.py | 12 ++--------- prompting/validators/reward/reward.py | 17 +++++++++------- prompting/validators/reward/task_validator.py | 11 ++-------- 11 files changed, 44 insertions(+), 99 deletions(-) diff --git a/prompting/validators/reward/blacklist.py b/prompting/validators/reward/blacklist.py index ed457bb..a60cf4c 100644 --- a/prompting/validators/reward/blacklist.py +++ b/prompting/validators/reward/blacklist.py @@ -305,21 +305,12 @@ def reward(self, prompt: str, completion: str, name: str) -> BlacklistRewardEven reward_event.reward = 1 return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BlacklistRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions ] - - # Parse the result and generate an event to be logged. - parsed_reward_events = BlacklistRewardEvent.parse_reward_events(reward_events) - - # Change the reward into tensor object - parsed_reward_events["reward"] = torch.tensor( - parsed_reward_events["reward"], dtype=torch.float32 - ) - - return parsed_reward_events + return reward_events def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards diff --git a/prompting/validators/reward/dahoas.py b/prompting/validators/reward/dahoas.py index 3e1a543..24c29f4 100644 --- a/prompting/validators/reward/dahoas.py +++ b/prompting/validators/reward/dahoas.py @@ -105,20 +105,13 @@ def reward_fn(samples): reward_event.reward = float((combined_reward - independent_reward).item()) return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions ] - # Parse the result and generate an event to be logged. - parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - - parsed_reward_events["reward"] = torch.tensor( - parsed_reward_events["reward"], dtype=torch.float32 - ).to(self.device) - - return parsed_reward_events + return reward_events def forward( self, diff --git a/prompting/validators/reward/diversity.py b/prompting/validators/reward/diversity.py index b7f3216..7b51671 100644 --- a/prompting/validators/reward/diversity.py +++ b/prompting/validators/reward/diversity.py @@ -20,9 +20,9 @@ import torch.nn.functional as F from typing import List, Union from .config import RewardModelType -from .reward import BaseRewardModel +from .reward import BaseRewardModel, BaseRewardEvent from transformers import AutoTokenizer, AutoModel - +from dataclasses import dataclass from torchmetrics.functional import pairwise_cosine_similarity @@ -47,6 +47,10 @@ def mean_pooling(model_output, attention_mask): input_mask_expanded.sum(1), min=1e-9 ) +@dataclass +class DiversityRewardEvent(BaseRewardEvent): + historic: float = None + batch: float = None class DiversityRewardModel(BaseRewardModel): diversity_model_path = "sentence-transformers/all-mpnet-base-v2" @@ -153,7 +157,7 @@ def regularise(rewards): return regularise(rewards) - def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[DiversityRewardEvent]: # Check if completions are empty, return 0 if so if len(completions) == 0: return torch.tensor([]).to(self.device), None @@ -169,11 +173,15 @@ def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: self.update_historic_embeddings(embeddings) - # Return all + reward_events = [] if historic_rewards != None: - return {"reward": batch_rewards * historic_rewards} + for b, h in zip(batch_rewards.tolist(), historic_rewards.tolist()): + reward_events.append(DiversityRewardEvent(reward = b*h, batch = b, historic = h)) else: - return {"reward": batch_rewards} + for b in batch_rewards.tolist(): + reward_events.append(DiversityRewardEvent(reward = b, batch = b)) + + return reward_events def normalize_rewards(self, raw_rewards: torch.FloatTensor) -> torch.FloatTensor: # Applies binarization on the rewards. diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index e0ffdbb..1e7a1bc 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -51,7 +51,7 @@ def __init__(self, device: str): def reward_single( self, prompt: str, completion: str, name: str, with_penalty=True - ) -> float: + ) -> BaseRewardEvent: r"""Calculates a direct preference optimization (DPO) style reward for a completion, which is a reference model's average log-probability for completion tokens given a prompt. Uses guidance from https://github.com/eric-mitchell/direct-preference-optimization/blob/main/trainers.py. @@ -139,19 +139,12 @@ def reward_single( reward_event.reward = reward.item() return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: # Get all the reward results. reward_events = [ self.reward_single(prompt, completion, name) for completion in completions ] - # Parse the result and generate an event to be logged. - parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - - parsed_reward_events["reward"] = torch.tensor( - parsed_reward_events["reward"], dtype=torch.float32 - ).to(self.device) - bt.logging.trace(f"DirectPreferenceRewardModel | rewards: {rewards.tolist()}") - return parsed_reward_events + return reward_events diff --git a/prompting/validators/reward/nsfw.py b/prompting/validators/reward/nsfw.py index 74d2340..6ad567a 100644 --- a/prompting/validators/reward/nsfw.py +++ b/prompting/validators/reward/nsfw.py @@ -76,20 +76,13 @@ def sum_nsfw_scores(input_ids, chunk_size): reward_event.reward = 0.0 if score > boundary else 1.0 return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[NSFWRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions ] - # Parse the result and generate an event to be logged. - parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - - parsed_reward_events["reward"] = torch.tensor( - parsed_reward_events["reward"], dtype=torch.float32 - ).to(self.device) - - return parsed_reward_events + return reward_events def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards diff --git a/prompting/validators/reward/open_assistant.py b/prompting/validators/reward/open_assistant.py index dc0a689..f1c1506 100644 --- a/prompting/validators/reward/open_assistant.py +++ b/prompting/validators/reward/open_assistant.py @@ -40,7 +40,7 @@ def __init__(self, device: str): OpenAssistantRewardModel.reward_model_name ).to(self.device) - def reward_single(self, prompt: str, completion: str, name: str) -> float: + def reward_single(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: reward_event = BaseRewardEvent() with torch.no_grad(): @@ -50,17 +50,10 @@ def reward_single(self, prompt: str, completion: str, name: str) -> float: reward_event.reward = float(self.model(**inputs).logits[0].cpu().detach()) return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: # Get all the reward results. reward_events = [ self.reward_single(prompt, completion, name) for completion in completions ] - # Parse the result and generate an event to be logged. - parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - - parsed_reward_events["reward"] = torch.tensor( - parsed_reward_events["reward"], dtype=torch.float32 - ).to(self.device) - - return parsed_reward_events + return reward_events diff --git a/prompting/validators/reward/prompt.py b/prompting/validators/reward/prompt.py index 94c5502..88b6f1d 100644 --- a/prompting/validators/reward/prompt.py +++ b/prompting/validators/reward/prompt.py @@ -102,7 +102,7 @@ def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: reward_event.reward = score return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: bt.logging.debug( f"PromptRewardModel | Calculating {len(completions)} rewards (typically < 1 sec/reward)." ) @@ -114,11 +114,4 @@ def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: self.reward(prompt, completion, name) for completion in completions ] - # Parse the result and generate an event to be logged. - parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - - parsed_reward_events["reward"] = torch.tensor( - parsed_reward_events["reward"], dtype=torch.float32 - ).to(self.device) - - return parsed_reward_events + return reward_events diff --git a/prompting/validators/reward/reciprocate.py b/prompting/validators/reward/reciprocate.py index 9006b79..146843f 100644 --- a/prompting/validators/reward/reciprocate.py +++ b/prompting/validators/reward/reciprocate.py @@ -58,17 +58,10 @@ def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: reward_event.reward = float(self.model(**inputs)[0].item()) return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions ] - # Parse the result and generate an event to be logged. - parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - - parsed_reward_events["reward"] = torch.tensor( - parsed_reward_events["reward"], dtype=torch.float32 - ).to(self.device) - - return parsed_reward_events + return reward_events diff --git a/prompting/validators/reward/relevance.py b/prompting/validators/reward/relevance.py index fd2648b..41a6925 100644 --- a/prompting/validators/reward/relevance.py +++ b/prompting/validators/reward/relevance.py @@ -68,20 +68,12 @@ def __init__(self, device: str): ] self.bounds = [-0.0246, 0.3] - def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[RelevanceRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions ] - - # Parse the result and generate an event to be logged. - parsed_reward_events = RelevanceRewardEvent.parse_reward_events(reward_events) - - parsed_reward_events["reward"] = torch.tensor( - parsed_reward_events["reward"], dtype=torch.float32 - ).to(self.device) - - return parsed_reward_events + return reward_events def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index a2cf3ab..9c60875 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -28,6 +28,7 @@ class BaseRewardEvent: reward: float = 1.0 normalized_reward: float = None + @staticmethod def parse_reward_events(reward_events): field_names = [field.name for field in fields(reward_events[0])] reward_events = [ @@ -116,7 +117,7 @@ def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: def apply( self, prompt: str, responses: List[bt.Synapse], name: str - ) -> torch.FloatTensor: + ) -> Union[torch.FloatTensor, dict]: """Applies the reward model across each call. Unsuccessful responses are zeroed.""" # Get indices of correctly responding calls. @@ -132,8 +133,10 @@ def apply( ] # Reward each completion. - reward_event = self.get_rewards(prompt, successful_completions, name) - successful_rewards = reward_event.pop("reward") + reward_events = BaseRewardEvent.parse_reward_events(self.get_rewards(prompt, successful_completions, name)) + successful_rewards = torch.tensor( + reward_events.pop("reward"), dtype=torch.float32 + ) # Softmax rewards across samples. successful_rewards_normalized = self.normalize_rewards(successful_rewards) @@ -152,9 +155,9 @@ def apply( filled_rewards_normalized[idx] = reward_normalized # Name each item of the reward event with the reward model name. - reward_event = {f"{self.name}_{k}": v for k, v in reward_event.items()} - reward_event[self.name] = filled_rewards.tolist() - reward_event[self.name + "_normalized"] = filled_rewards_normalized.tolist() + reward_events = {f"{self.name}_{k}": v for k, v in reward_event.items()} + reward_events[self.name] = filled_rewards.tolist() + reward_events[self.name + "_normalized"] = filled_rewards_normalized.tolist() # Return the filled rewards. - return filled_rewards_normalized, reward_event + return filled_rewards_normalized, reward_events diff --git a/prompting/validators/reward/task_validator.py b/prompting/validators/reward/task_validator.py index 7026b42..dbd782f 100644 --- a/prompting/validators/reward/task_validator.py +++ b/prompting/validators/reward/task_validator.py @@ -72,20 +72,13 @@ def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: reward_event.reward = 1 return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> dict: + def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions ] - # Parse the result and generate an event to be logged. - parsed_reward_events = BaseRewardEvent.parse_reward_events(reward_events) - - parsed_reward_events["reward"] = torch.tensor( - parsed_reward_events["reward"], dtype=torch.float32 - ) - - return parsed_reward_events + return reward_events def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor: return rewards From 21bbd62301e9d3cb529c260db1e9b6c8498e0409 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 8 Nov 2023 22:16:49 +0000 Subject: [PATCH 12/25] schema update --- prompting/validators/event.py | 8 ++++++++ prompting/validators/reward/reward.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/prompting/validators/event.py b/prompting/validators/event.py index 45e40be..1563387 100644 --- a/prompting/validators/event.py +++ b/prompting/validators/event.py @@ -60,6 +60,12 @@ class EventSchema: diversity_reward_model: Optional[ List[float] ] # Output vector of the diversity reward model + diversity_reward_model_historic: Optional[ + List[float] + ] # Output vector of the diversity reward model + diversity_reward_model_batch: Optional[ + List[float] + ] # Output vector of the diversity reward model dpo_reward_model: Optional[List[float]] # Output vector of the dpo reward model rlhf_reward_model: Optional[List[float]] # Output vector of the rlhf reward model prompt_reward_model: Optional[ @@ -119,6 +125,8 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> "EventSchema": RewardModelType.reciprocate.value ), "diversity_reward_model": event_dict.get(RewardModelType.diversity.value), + "diversity_reward_model_historic": event_dict.get(RewardModelType.diversity.value + '_historic'), + "diversity_reward_model_batch": event_dict.get(RewardModelType.diversity.value + '_batch'), "dpo_reward_model": event_dict.get(RewardModelType.dpo.value), "rlhf_reward_model": event_dict.get(RewardModelType.rlhf.value), "prompt_reward_model": event_dict.get(RewardModelType.prompt.value), diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index 9c60875..4fcf864 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -155,7 +155,7 @@ def apply( filled_rewards_normalized[idx] = reward_normalized # Name each item of the reward event with the reward model name. - reward_events = {f"{self.name}_{k}": v for k, v in reward_event.items()} + reward_events = {f"{self.name}_{k}": v for k, v in reward_events.items()} reward_events[self.name] = filled_rewards.tolist() reward_events[self.name + "_normalized"] = filled_rewards_normalized.tolist() From 1d033af5fab3efffa19d73a9bbf1615dff4a365f Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 8 Nov 2023 22:20:59 +0000 Subject: [PATCH 13/25] black formatting --- prompting/validators/event.py | 8 ++++++-- prompting/validators/reward/blacklist.py | 4 +++- prompting/validators/reward/dahoas.py | 4 +++- prompting/validators/reward/diversity.py | 12 +++++++++--- prompting/validators/reward/dpo.py | 4 +++- prompting/validators/reward/nsfw.py | 4 +++- prompting/validators/reward/open_assistant.py | 4 +++- prompting/validators/reward/prompt.py | 4 +++- prompting/validators/reward/reciprocate.py | 4 +++- prompting/validators/reward/relevance.py | 4 +++- prompting/validators/reward/reward.py | 6 ++++-- prompting/validators/reward/task_validator.py | 4 +++- 12 files changed, 46 insertions(+), 16 deletions(-) diff --git a/prompting/validators/event.py b/prompting/validators/event.py index 1563387..2ea5887 100644 --- a/prompting/validators/event.py +++ b/prompting/validators/event.py @@ -125,8 +125,12 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> "EventSchema": RewardModelType.reciprocate.value ), "diversity_reward_model": event_dict.get(RewardModelType.diversity.value), - "diversity_reward_model_historic": event_dict.get(RewardModelType.diversity.value + '_historic'), - "diversity_reward_model_batch": event_dict.get(RewardModelType.diversity.value + '_batch'), + "diversity_reward_model_historic": event_dict.get( + RewardModelType.diversity.value + "_historic" + ), + "diversity_reward_model_batch": event_dict.get( + RewardModelType.diversity.value + "_batch" + ), "dpo_reward_model": event_dict.get(RewardModelType.dpo.value), "rlhf_reward_model": event_dict.get(RewardModelType.rlhf.value), "prompt_reward_model": event_dict.get(RewardModelType.prompt.value), diff --git a/prompting/validators/reward/blacklist.py b/prompting/validators/reward/blacklist.py index a60cf4c..39c2554 100644 --- a/prompting/validators/reward/blacklist.py +++ b/prompting/validators/reward/blacklist.py @@ -305,7 +305,9 @@ def reward(self, prompt: str, completion: str, name: str) -> BlacklistRewardEven reward_event.reward = 1 return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BlacklistRewardEvent]: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> List[BlacklistRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions diff --git a/prompting/validators/reward/dahoas.py b/prompting/validators/reward/dahoas.py index 24c29f4..27344ba 100644 --- a/prompting/validators/reward/dahoas.py +++ b/prompting/validators/reward/dahoas.py @@ -105,7 +105,9 @@ def reward_fn(samples): reward_event.reward = float((combined_reward - independent_reward).item()) return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> List[BaseRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions diff --git a/prompting/validators/reward/diversity.py b/prompting/validators/reward/diversity.py index 7b51671..8fa7a9c 100644 --- a/prompting/validators/reward/diversity.py +++ b/prompting/validators/reward/diversity.py @@ -47,11 +47,13 @@ def mean_pooling(model_output, attention_mask): input_mask_expanded.sum(1), min=1e-9 ) + @dataclass class DiversityRewardEvent(BaseRewardEvent): historic: float = None batch: float = None + class DiversityRewardModel(BaseRewardModel): diversity_model_path = "sentence-transformers/all-mpnet-base-v2" @@ -157,7 +159,9 @@ def regularise(rewards): return regularise(rewards) - def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[DiversityRewardEvent]: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> List[DiversityRewardEvent]: # Check if completions are empty, return 0 if so if len(completions) == 0: return torch.tensor([]).to(self.device), None @@ -176,10 +180,12 @@ def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[Di reward_events = [] if historic_rewards != None: for b, h in zip(batch_rewards.tolist(), historic_rewards.tolist()): - reward_events.append(DiversityRewardEvent(reward = b*h, batch = b, historic = h)) + reward_events.append( + DiversityRewardEvent(reward=b * h, batch=b, historic=h) + ) else: for b in batch_rewards.tolist(): - reward_events.append(DiversityRewardEvent(reward = b, batch = b)) + reward_events.append(DiversityRewardEvent(reward=b, batch=b)) return reward_events diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 1e7a1bc..504a958 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -139,7 +139,9 @@ def reward_single( reward_event.reward = reward.item() return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> List[BaseRewardEvent]: # Get all the reward results. reward_events = [ self.reward_single(prompt, completion, name) for completion in completions diff --git a/prompting/validators/reward/nsfw.py b/prompting/validators/reward/nsfw.py index 6ad567a..710f1d9 100644 --- a/prompting/validators/reward/nsfw.py +++ b/prompting/validators/reward/nsfw.py @@ -76,7 +76,9 @@ def sum_nsfw_scores(input_ids, chunk_size): reward_event.reward = 0.0 if score > boundary else 1.0 return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[NSFWRewardEvent]: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> List[NSFWRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions diff --git a/prompting/validators/reward/open_assistant.py b/prompting/validators/reward/open_assistant.py index f1c1506..342001c 100644 --- a/prompting/validators/reward/open_assistant.py +++ b/prompting/validators/reward/open_assistant.py @@ -50,7 +50,9 @@ def reward_single(self, prompt: str, completion: str, name: str) -> BaseRewardEv reward_event.reward = float(self.model(**inputs).logits[0].cpu().detach()) return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> List[BaseRewardEvent]: # Get all the reward results. reward_events = [ self.reward_single(prompt, completion, name) for completion in completions diff --git a/prompting/validators/reward/prompt.py b/prompting/validators/reward/prompt.py index 88b6f1d..ad5d656 100644 --- a/prompting/validators/reward/prompt.py +++ b/prompting/validators/reward/prompt.py @@ -102,7 +102,9 @@ def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: reward_event.reward = score return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> List[BaseRewardEvent]: bt.logging.debug( f"PromptRewardModel | Calculating {len(completions)} rewards (typically < 1 sec/reward)." ) diff --git a/prompting/validators/reward/reciprocate.py b/prompting/validators/reward/reciprocate.py index 146843f..7da1cdd 100644 --- a/prompting/validators/reward/reciprocate.py +++ b/prompting/validators/reward/reciprocate.py @@ -58,7 +58,9 @@ def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: reward_event.reward = float(self.model(**inputs)[0].item()) return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> List[BaseRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions diff --git a/prompting/validators/reward/relevance.py b/prompting/validators/reward/relevance.py index 41a6925..5f15bca 100644 --- a/prompting/validators/reward/relevance.py +++ b/prompting/validators/reward/relevance.py @@ -68,7 +68,9 @@ def __init__(self, device: str): ] self.bounds = [-0.0246, 0.3] - def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[RelevanceRewardEvent]: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> List[RelevanceRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index 4fcf864..7d7dd4e 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -133,10 +133,12 @@ def apply( ] # Reward each completion. - reward_events = BaseRewardEvent.parse_reward_events(self.get_rewards(prompt, successful_completions, name)) + reward_events = BaseRewardEvent.parse_reward_events( + self.get_rewards(prompt, successful_completions, name) + ) successful_rewards = torch.tensor( reward_events.pop("reward"), dtype=torch.float32 - ) + ) # Softmax rewards across samples. successful_rewards_normalized = self.normalize_rewards(successful_rewards) diff --git a/prompting/validators/reward/task_validator.py b/prompting/validators/reward/task_validator.py index dbd782f..5317642 100644 --- a/prompting/validators/reward/task_validator.py +++ b/prompting/validators/reward/task_validator.py @@ -72,7 +72,9 @@ def reward(self, prompt: str, completion: str, name: str) -> BaseRewardEvent: reward_event.reward = 1 return reward_event - def get_rewards(self, prompt: str, completions: List[str], name: str) -> List[BaseRewardEvent]: + def get_rewards( + self, prompt: str, completions: List[str], name: str + ) -> List[BaseRewardEvent]: # Get all the reward results. reward_events = [ self.reward(prompt, completion, name) for completion in completions From 27badfcdeea512efe37d3685b5a6173f082ee1eb Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Wed, 8 Nov 2023 22:58:09 +0000 Subject: [PATCH 14/25] black formatting --- prompting/validators/reward/blacklist.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/prompting/validators/reward/blacklist.py b/prompting/validators/reward/blacklist.py index c254f45..475b494 100644 --- a/prompting/validators/reward/blacklist.py +++ b/prompting/validators/reward/blacklist.py @@ -29,11 +29,13 @@ # TODO: Use CLI arguments to set blacklist values: the most important being the boundary value and max_size + @dataclass class BlacklistRewardEvent(BaseRewardEvent): matched_ngram: str = None significance_score: float = None + class Blacklist(BaseRewardModel): @property def name(self) -> str: From abd0f17ac6d8feb883ed3adc3ec467bc69e5e95a Mon Sep 17 00:00:00 2001 From: isabella618033 <49876827+isabella618033@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:32:51 -0500 Subject: [PATCH 15/25] retain comments Co-authored-by: Steffen Cruz --- prompting/validators/reward/dpo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 504a958..9b4c19f 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -62,7 +62,8 @@ def reward_single( with torch.no_grad(): # Check if completion is if completion.strip() == "" or len(completion) <= 5: - reward_event.reward = ( + # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + reward_event.reward = -11.0 -11.0 ) # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) return reward_event From 00fe827b390c9a3ef0162833dc7732256d343201 Mon Sep 17 00:00:00 2001 From: isabella618033 <49876827+isabella618033@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:33:01 -0500 Subject: [PATCH 16/25] retain comments Co-authored-by: Steffen Cruz --- prompting/validators/reward/dpo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 9b4c19f..85de452 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -65,7 +65,7 @@ def reward_single( # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) reward_event.reward = -11.0 -11.0 - ) # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + return reward_event # Tokenize the combined prompt + completion. From 6b0256388fbace6acb3b57e2db30623e741799ad Mon Sep 17 00:00:00 2001 From: isabella618033 <49876827+isabella618033@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:33:23 -0500 Subject: [PATCH 17/25] retain comments Co-authored-by: Steffen Cruz --- prompting/validators/reward/dpo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 85de452..1ed36e7 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -82,7 +82,7 @@ def reward_single( # Completion doesn't fit into model sequence, so return lowest reward. if self.tokenizer.model_max_length <= len(prompt_part): reward_event.reward = -11.0 - return ( + return reward_event reward_event # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) ) From 2794c32cd3eba9cd5dac95fe4c469c4588ce415d Mon Sep 17 00:00:00 2001 From: isabella618033 <49876827+isabella618033@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:33:33 -0500 Subject: [PATCH 18/25] retain comments Co-authored-by: Steffen Cruz --- prompting/validators/reward/dpo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 1ed36e7..db3f01b 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -83,7 +83,7 @@ def reward_single( if self.tokenizer.model_max_length <= len(prompt_part): reward_event.reward = -11.0 return reward_event - reward_event # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + ) # Truncate combined to fit into model max sequence length. From 3531257eb16e164bd34be85c63f4e1d477597b28 Mon Sep 17 00:00:00 2001 From: isabella618033 <49876827+isabella618033@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:33:42 -0500 Subject: [PATCH 19/25] retain comments Co-authored-by: Steffen Cruz --- prompting/validators/reward/dpo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index db3f01b..253963f 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -84,7 +84,7 @@ def reward_single( reward_event.reward = -11.0 return reward_event - ) + # Truncate combined to fit into model max sequence length. if self.tokenizer.model_max_length < len(combined): From fda08afe1495ea91e3c4be98d66999feafab5d99 Mon Sep 17 00:00:00 2001 From: isabella618033 <49876827+isabella618033@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:33:51 -0500 Subject: [PATCH 20/25] retain comments Co-authored-by: Steffen Cruz --- prompting/validators/reward/dpo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 253963f..35fa956 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -133,7 +133,7 @@ def reward_single( # NaNs can possibly arise through log(0)=-inf, replace with suitably small logits. if torch.isnan(reward) or torch.isinf(reward): - reward_event.reward = ( + reward_event.reward = 11 -11.0 ) # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) From c68a22af9bdc1554b5fb667d8a11c0050b9d072a Mon Sep 17 00:00:00 2001 From: isabella618033 <49876827+isabella618033@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:34:06 -0500 Subject: [PATCH 21/25] retain comments Co-authored-by: Steffen Cruz --- prompting/validators/reward/dpo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 35fa956..7ffc108 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -134,7 +134,7 @@ def reward_single( # NaNs can possibly arise through log(0)=-inf, replace with suitably small logits. if torch.isnan(reward) or torch.isinf(reward): reward_event.reward = 11 - -11.0 + ) # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) reward_event.reward = reward.item() From 820e6a5cfa62e8e7ef86567fee8f90fe56830cc0 Mon Sep 17 00:00:00 2001 From: isabella618033 <49876827+isabella618033@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:34:15 -0500 Subject: [PATCH 22/25] retain comments Co-authored-by: Steffen Cruz --- prompting/validators/reward/dpo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 7ffc108..2e83e0b 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -135,7 +135,7 @@ def reward_single( if torch.isnan(reward) or torch.isinf(reward): reward_event.reward = 11 - ) # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) + reward_event.reward = reward.item() return reward_event From b30046038dc9e081621f3bf2a074c46f267e8476 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Thu, 9 Nov 2023 00:29:12 +0000 Subject: [PATCH 23/25] fixes --- prompting/validators/event.py | 1 - prompting/validators/forward.py | 9 +++------ prompting/validators/reward/reward.py | 7 +++++++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/prompting/validators/event.py b/prompting/validators/event.py index f18c758..206554f 100644 --- a/prompting/validators/event.py +++ b/prompting/validators/event.py @@ -103,7 +103,6 @@ class EventSchema: relevance_filter_mpnet_score: Optional[ List[float] ] # Output vector of the relevance scoring reward model - task_validator_filter_normalized: Optional[List[float]] # TODO: Add comments task_validation_penalty_raw: Optional[List[float]] task_validation_penalty_adjusted: Optional[List[float]] diff --git a/prompting/validators/forward.py b/prompting/validators/forward.py index 8e5aa7b..2429bde 100644 --- a/prompting/validators/forward.py +++ b/prompting/validators/forward.py @@ -122,7 +122,7 @@ async def run_step(self, task: Task, k: int, timeout: float, exclude: list = []) self.device ) for weight_i, reward_fn_i in zip(self.reward_weights, self.reward_functions): - reward_i_normalized, reward_event = reward_fn_i.apply(prompt, responses, name) + reward_i_normalized, reward_event = reward_fn_i.apply(task.base_text, responses, task_name) rewards += weight_i * reward_i_normalized.to(self.device) if not self.config.neuron.disable_log_rewards: event = {**event, **reward_event} @@ -130,7 +130,7 @@ async def run_step(self, task: Task, k: int, timeout: float, exclude: list = []) for masking_fn_i in self.masking_functions: mask_i_normalized, reward_event = masking_fn_i.apply( - base_prompt, responses, name + task.base_text, responses, task_name ) rewards *= mask_i_normalized.to(self.device) # includes diversity if not self.config.neuron.disable_log_rewards: @@ -272,7 +272,4 @@ async def forward(self): best_answer = qa_event["best"] prompt_context += f"\n### ANSWER {k}:\n{best_answer}" - exclude += qa_event["uids"] - - self.blacklist.question_blacklist.append(qg_event["best"]) - self.blacklist.answer_blacklist.append(qa_event["best"]) + exclude += qa_event["uids"] \ No newline at end of file diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index 7d7dd4e..dad0e5e 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -155,6 +155,13 @@ def apply( ): filled_rewards[idx] = reward filled_rewards_normalized[idx] = reward_normalized + + # Fill every item of the reward_events + for name, reward_values in reward_events.items(): + filled_values = [None]*len(responses) + for idx, reward_value in zip(successful_completions_indices, reward_values): + filled_values[idx] = reward_value + reward_events[name] = filled_values # Name each item of the reward event with the reward model name. reward_events = {f"{self.name}_{k}": v for k, v in reward_events.items()} From 00a201b45eadec7a4263b6c625def0cb13db882d Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Thu, 9 Nov 2023 15:14:38 +0000 Subject: [PATCH 24/25] black format --- prompting/validators/forward.py | 6 ++++-- prompting/validators/reward/reward.py | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/prompting/validators/forward.py b/prompting/validators/forward.py index 2429bde..d6bdb62 100644 --- a/prompting/validators/forward.py +++ b/prompting/validators/forward.py @@ -122,7 +122,9 @@ async def run_step(self, task: Task, k: int, timeout: float, exclude: list = []) self.device ) for weight_i, reward_fn_i in zip(self.reward_weights, self.reward_functions): - reward_i_normalized, reward_event = reward_fn_i.apply(task.base_text, responses, task_name) + reward_i_normalized, reward_event = reward_fn_i.apply( + task.base_text, responses, task_name + ) rewards += weight_i * reward_i_normalized.to(self.device) if not self.config.neuron.disable_log_rewards: event = {**event, **reward_event} @@ -272,4 +274,4 @@ async def forward(self): best_answer = qa_event["best"] prompt_context += f"\n### ANSWER {k}:\n{best_answer}" - exclude += qa_event["uids"] \ No newline at end of file + exclude += qa_event["uids"] diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index dad0e5e..23c8463 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -155,13 +155,13 @@ def apply( ): filled_rewards[idx] = reward filled_rewards_normalized[idx] = reward_normalized - + # Fill every item of the reward_events - for name, reward_values in reward_events.items(): - filled_values = [None]*len(responses) + for name, reward_values in reward_events.items(): + filled_values = [None] * len(responses) for idx, reward_value in zip(successful_completions_indices, reward_values): filled_values[idx] = reward_value - reward_events[name] = filled_values + reward_events[name] = filled_values # Name each item of the reward event with the reward model name. reward_events = {f"{self.name}_{k}": v for k, v in reward_events.items()} From 07882b7f4c537d8c780c5216e2a1dfbf91e7eb76 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Thu, 9 Nov 2023 15:17:51 +0000 Subject: [PATCH 25/25] black formatted --- prompting/validators/reward/dpo.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 2e83e0b..dc2b89b 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -64,8 +64,6 @@ def reward_single( if completion.strip() == "" or len(completion) <= 5: # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size) reward_event.reward = -11.0 - -11.0 - return reward_event # Tokenize the combined prompt + completion. @@ -83,8 +81,6 @@ def reward_single( if self.tokenizer.model_max_length <= len(prompt_part): reward_event.reward = -11.0 return reward_event - - # Truncate combined to fit into model max sequence length. if self.tokenizer.model_max_length < len(combined): @@ -134,8 +130,6 @@ def reward_single( # NaNs can possibly arise through log(0)=-inf, replace with suitably small logits. if torch.isnan(reward) or torch.isinf(reward): reward_event.reward = 11 - - reward_event.reward = reward.item() return reward_event