From fc8a1202857c32f5672b5944d4227b4ed2705037 Mon Sep 17 00:00:00 2001
From: Shashank Rajput <shashank.rajput@databricks.com>
Date: Tue, 3 Dec 2024 22:25:26 -0800
Subject: [PATCH] ..

---
 llmfoundry/models/mpt/modeling_mpt.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py
index 824a7fc929..969a8b56a2 100644
--- a/llmfoundry/models/mpt/modeling_mpt.py
+++ b/llmfoundry/models/mpt/modeling_mpt.py
@@ -196,7 +196,6 @@ def gen_sequence_id_info(
         attn_uses_sequence_id (bool): Whether the attention uses sequence id based masking.
         attn_impl (str): Attention implementation. This function is only creates attention_mask_in_length for flash attention.
         attention_mask (Union[torch.Tensor, None]): Attention mask tensor of shape (batch_size, seq_len)
-        return_pos_in_seq (bool): Whether to return the position in sequence tensor instead of attention mask in length.
 
     Returns:
         attention_mask_in_length: (batch, seqlen), int, a nonzero number (e.g., 1, 2, 3, etc.) means length of concatenated sequence in b-th batch, and 0 means none. For example, if batch = 3 and seqlen = 6, the attention_mask_in_length is: