diff --git a/ccs/extraction/extraction.py b/ccs/extraction/extraction.py index 741d1026..72430434 100644 --- a/ccs/extraction/extraction.py +++ b/ccs/extraction/extraction.py @@ -264,12 +264,14 @@ def extract_hiddens( if is_enc_dec: answer = labels = assert_type(Tensor, encoding.labels) else: - a_id = tokenizer.encode(" " + choice["answer"], add_special_tokens=False) + a_id = tokenizer.encode( + " " + choice["answer"], add_special_tokens=False + ) # the Llama tokenizer splits off leading spaces if tokenizer.decode(a_id[0]).strip() == "": a_id_without_space = tokenizer.encode( - choice, add_special_tokens=False + choice["answer"], add_special_tokens=False ) assert a_id_without_space == a_id[1:] a_id = a_id_without_space @@ -303,7 +305,7 @@ def extract_hiddens( # probs near 1 will be somewhat imprecise # log(p/(1-p)) = log(p) - log(1-p) = logp - log(1 - exp(logp)) lm_log_odds[i, j] = logprob - torch.log1p(-logprob.exp()) - + hiddens = ( outputs.get("decoder_hidden_states") or outputs["hidden_states"] ) diff --git a/ccs/promptsource/templates/qm_grader_first/templates.yaml b/ccs/promptsource/templates/qm_grader_first/templates.yaml new file mode 100644 index 00000000..730fd0ed --- /dev/null +++ b/ccs/promptsource/templates/qm_grader_first/templates.yaml @@ -0,0 +1,16 @@ +dataset: None +templates: + 41e13d03004a458abeab1e5c22f83e68: !Template + answer_choices: False ||| True + id: 41e13d03004a458abeab1e5c22f83e68 + jinja: 'Grader: {{ character }} + +{{ summand1 }} + {{ summand2 }} = {{ sum }} + +Score: ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + languages: + - en + metrics: + - Accuracy + name: "grader_first"