From 1bee6e34afd3dc7eef8c2c344f4ae316f36ad1c1 Mon Sep 17 00:00:00 2001 From: stephantul Date: Thu, 3 Oct 2024 11:38:21 +0200 Subject: [PATCH] Fix nomic embed bug --- model2vec/distill/inference.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/model2vec/distill/inference.py b/model2vec/distill/inference.py index a4ceac6..6459336 100644 --- a/model2vec/distill/inference.py +++ b/model2vec/distill/inference.py @@ -127,7 +127,13 @@ def create_output_embeddings_from_model_name( for batch_idx in tqdm(range(0, len(stacked), _DEFAULT_BATCH_SIZE)): batch = stacked[batch_idx : batch_idx + _DEFAULT_BATCH_SIZE].to(model.device) with torch.no_grad(): - encoded: BaseModelOutputWithPoolingAndCrossAttentions = model(input_ids=batch.to(device)) + # NOTE: we create these masks because nomic embed requires them. + # Normally, we could set them to None + token_type_ids = torch.zeros_like(batch) + attention_mask = torch.ones_like(batch) + encoded: BaseModelOutputWithPoolingAndCrossAttentions = model( + input_ids=batch.to(device), attention_mask=attention_mask, token_type_ids=token_type_ids + ) out: torch.Tensor = encoded.last_hidden_state # NOTE: If the dtype is bfloat 16, we convert to float32, # because numpy does not suport bfloat16