From 621980bdc0d5a41e224febf962a6e0474e2b14ef Mon Sep 17 00:00:00 2001
From: Wen Sun <35923278+HermitSun@users.noreply.github.com>
Date: Sat, 5 Aug 2023 01:35:22 +0800
Subject: [PATCH] fix: incorrect bigcode attention heads num (#676)

---
 vllm/config.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index bd3dd6a2..2e8d5841 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -98,9 +98,11 @@ class ModelConfig:
         # Note: for falcon, when new_decoder_architecture is True, the
         # multi_query flag is ignored and we use n_head_kv for the number of
         # KV heads.
-        if (getattr(self.hf_config, "multi_query", False) and
-            (self.hf_config.model_type == "falcon" and
-             not getattr(self.hf_config, "new_decoder_architecture", False))):
+        new_decoder_arch_falcon = (
+            self.hf_config.model_type == "falcon"
+            and getattr(self.hf_config, "new_decoder_architecture", False))
+        if not new_decoder_arch_falcon and getattr(self.hf_config,
+                                                   "multi_query", False):
             # Multi-query attention, only one KV head.
             return 1
         # For Falcon: