From f33590b1e4a15569288e01efa948f0a104f521b5 Mon Sep 17 00:00:00 2001
From: liaoyanqing <1793706453@qq.com>
Date: Thu, 9 Jan 2025 10:59:07 +0800
Subject: [PATCH 1/3] [Bug] Fix usage of '.transpose()' and '.view()'
 consecutively.

---
 vllm/model_executor/models/intern_vit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py
index 7ff68bd60e8ad..367c2decb75bc 100644
--- a/vllm/model_executor/models/intern_vit.py
+++ b/vllm/model_executor/models/intern_vit.py
@@ -271,7 +271,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         v = v.transpose(1, 2)
 
         x = F.scaled_dot_product_attention(q, k, v, scale=self.scale)
-        x = x.transpose(1, 2).view(B, N, -1)
+        x = x.transpose(1, 2).contiguous().view(B, N, -1)
 
         x = self.proj(x)
         return x

From 1aee6980692afde751ed7de09e94da0948dba784 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Mon, 13 Jan 2025 11:38:30 +0800
Subject: [PATCH 2/3] fix MHA layer

Signed-off-by: Isotr0py <2037008807@qq.com>
---
 vllm/attention/layer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py
index f1b3598e60b54..eaa5990de4b66 100644
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -222,7 +222,7 @@ def forward(
                                                  key,
                                                  value,
                                                  scale=self.scale)
-            out = out.transpose(1, 2)
+            out = out.transpose(1, 2).contiguous()
         return out.view(bsz, q_len, -1)
 
 

From 791643cdb068ba712cf9b547053c08273ee88ccc Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Mon, 13 Jan 2025 12:30:16 +0800
Subject: [PATCH 3/3] use reshape

Signed-off-by: Isotr0py <2037008807@qq.com>
---
 vllm/attention/layer.py                  | 4 ++--
 vllm/model_executor/models/intern_vit.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py
index eaa5990de4b66..5f15faa5a0bb6 100644
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -222,8 +222,8 @@ def forward(
                                                  key,
                                                  value,
                                                  scale=self.scale)
-            out = out.transpose(1, 2).contiguous()
-        return out.view(bsz, q_len, -1)
+            out = out.transpose(1, 2)
+        return out.reshape(bsz, q_len, -1)
 
 
 def unified_attention(
diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py
index 367c2decb75bc..8ad009d5101e4 100644
--- a/vllm/model_executor/models/intern_vit.py
+++ b/vllm/model_executor/models/intern_vit.py
@@ -271,7 +271,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         v = v.transpose(1, 2)
 
         x = F.scaled_dot_product_attention(q, k, v, scale=self.scale)
-        x = x.transpose(1, 2).contiguous().view(B, N, -1)
+        x = x.transpose(1, 2).reshape(B, N, -1)
 
         x = self.proj(x)
         return x