From a7b3e33078469943d2a11b1c3d634e220b71bf76 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 29 Nov 2023 13:01:19 -0800 Subject: [PATCH] [Fix] Fix RoPE in ChatGLM-32K (#1841) --- vllm/model_executor/models/chatglm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py index db426a94..5d243168 100644 --- a/vllm/model_executor/models/chatglm.py +++ b/vllm/model_executor/models/chatglm.py @@ -75,12 +75,17 @@ class GLMAttention(nn.Module): linear_method=linear_method, ) + # https://huggingface.co/THUDM/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141 + rope_ratio = getattr(config, "rope_ratio", 1.0) + max_positions = getattr(config, "seq_length", 8192) self.attn = PagedAttentionWithRoPE( self.num_heads, self.head_dim, self.scaling, rotary_dim=self.head_dim // 2, num_kv_heads=self.num_kv_heads, + max_position=max_positions, + base=10000 * rope_ratio, is_neox_style=False, )