diff --git a/src/torchmetrics/functional/multimodal/clip_score.py b/src/torchmetrics/functional/multimodal/clip_score.py index 4ee5c12c89f..e5757c86619 100644 --- a/src/torchmetrics/functional/multimodal/clip_score.py +++ b/src/torchmetrics/functional/multimodal/clip_score.py @@ -51,7 +51,9 @@ def _clip_score_update( f"Expected the number of images and text examples to be the same but got {len(images)} and {len(text)}" ) device = images[0].device - processed_input = processor(text=text, images=[i.cpu() for i in images], return_tensors="pt", padding=True) + processed_input = processor( + text=text, images=[i.cpu() for i in images], return_tensors="pt", padding=True + ) # type: ignore img_features = model.get_image_features(processed_input["pixel_values"].to(device)) img_features = img_features / img_features.norm(p=2, dim=-1, keepdim=True) diff --git a/src/torchmetrics/functional/text/infolm.py b/src/torchmetrics/functional/text/infolm.py index f9c9c65b045..25033d00100 100644 --- a/src/torchmetrics/functional/text/infolm.py +++ b/src/torchmetrics/functional/text/infolm.py @@ -384,7 +384,7 @@ def _get_batch_distribution( for mask_idx in range(seq_len): input_ids = batch["input_ids"].clone() input_ids[:, mask_idx] = special_tokens_map["mask_token_id"] - logits_distribution = model(input_ids, batch["attention_mask"]).logits + logits_distribution = model(input_ids, batch["attention_mask"]).logits # type: ignore # [batch_size, seq_len, vocab_size] -> [batch_size, vocab_size] logits_distribution = logits_distribution[:, mask_idx, :] prob_distribution = F.softmax(logits_distribution / temperature, dim=-1)