[Bugfix] Fix KV head calculation for MPT models when using GQA (#5142)

2024-06-17 15:26:41 -07:00 · 2024-06-17 15:26:41 -07:00 · a3e8a05d4c
commit a3e8a05d4c
parent e441bad674
1 changed files with 5 additions and 1 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -302,7 +302,11 @@ class ModelConfig:
            return 1

        # For DBRX and MPT
-        if self.hf_config.model_type in ["dbrx", "mpt"]:
+        if self.hf_config.model_type == "mpt":
+            if "kv_n_heads" in self.hf_config.attn_config:
+                return self.hf_config.attn_config["kv_n_heads"]
+            return self.hf_config.num_attention_heads
+        if self.hf_config.model_type == "dbrx":
            return getattr(self.hf_config.attn_config, "kv_n_heads",
                           self.hf_config.num_attention_heads)