From 2dd34371a6054966d30971dae89b0c431d7f0f08 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Fri, 2 Aug 2024 03:00:28 +0800 Subject: [PATCH] [Bugfix] Fix RMSNorm forward in InternViT attention qk_layernorm (#6992) --- vllm/model_executor/models/intern_vit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py index 86d0930d..c6c692de 100644 --- a/vllm/model_executor/models/intern_vit.py +++ b/vllm/model_executor/models/intern_vit.py @@ -113,10 +113,10 @@ class InternAttention(nn.Module): if self.qk_normalization: B_, H_, N_, D_ = q.shape - q = self.q_norm(q.transpose(1, 2).flatten(-2, -1)).view( - B_, N_, H_, D_).transpose(1, 2) - k = self.k_norm(k.transpose(1, 2).flatten(-2, -1)).view( - B_, N_, H_, D_).transpose(1, 2) + q = self.q_norm.forward_native(q.transpose(1, 2).flatten( + -2, -1)).view(B_, N_, H_, D_).transpose(1, 2) + k = self.k_norm.forward_native(k.transpose(1, 2).flatten( + -2, -1)).view(B_, N_, H_, D_).transpose(1, 2) x = F.scaled_dot_product_attention(q, k, v, scale=self.scale) x = x.transpose(1, 2).reshape(B, N, C)