From 154a8ae880c800a8e6250b38a66fbf24c5d1be39 Mon Sep 17 00:00:00 2001 From: Haoyu Wang <30562758+blueyo0@users.noreply.github.com> Date: Fri, 18 Oct 2024 12:40:14 +0800 Subject: [PATCH] [Qwen2.5] Support bnb quant for Qwen2.5 (#9467) --- vllm/model_executor/models/qwen2.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py index eb9a9aa9..cb04cc48 100644 --- a/vllm/model_executor/models/qwen2.py +++ b/vllm/model_executor/models/qwen2.py @@ -364,6 +364,14 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ] embedding_modules = {} embedding_padding_modules = [] + bitsandbytes_stacked_params_mapping = { + # shard_name, weight_name, index + "q_proj": ("qkv_proj", 0), + "k_proj": ("qkv_proj", 1), + "v_proj": ("qkv_proj", 2), + "gate_proj": ("gate_up_proj", 0), + "up_proj": ("gate_up_proj", 1), + } def __init__( self,