From 621980bdc0d5a41e224febf962a6e0474e2b14ef Mon Sep 17 00:00:00 2001 From: Wen Sun <35923278+HermitSun@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:35:22 +0800 Subject: [PATCH] fix: incorrect bigcode attention heads num (#676) --- vllm/config.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index bd3dd6a2..2e8d5841 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -98,9 +98,11 @@ class ModelConfig: # Note: for falcon, when new_decoder_architecture is True, the # multi_query flag is ignored and we use n_head_kv for the number of # KV heads. - if (getattr(self.hf_config, "multi_query", False) and - (self.hf_config.model_type == "falcon" and - not getattr(self.hf_config, "new_decoder_architecture", False))): + new_decoder_arch_falcon = ( + self.hf_config.model_type == "falcon" + and getattr(self.hf_config, "new_decoder_architecture", False)) + if not new_decoder_arch_falcon and getattr(self.hf_config, + "multi_query", False): # Multi-query attention, only one KV head. return 1 # For Falcon: