support bitsandbytes quantization with qwen model (#10549)

Signed-off-by: Ubuntu <zixuanzhang@bytedance.com>
This commit is contained in:
zixuanzhang226 2024-11-22 16:16:14 -08:00 committed by GitHub
parent 97814fbf0f
commit 948c859571
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1028,6 +1028,18 @@ class QWenLLM(QWenBaseModel):
embedding_modules = {}
embedding_padding_modules = []
default_bitsandbytes_target_modules = [
".c_attn.",
".c_proj.",
".w1.",
".w2.",
]
bitsandbytes_stacked_params_mapping = {
# shard_name, weight_name, index
"w2": ("gate_up_proj", 0),
"w1": ("gate_up_proj", 1),
}
class QWenVL(QWenBaseModel, SupportsMultiModal):
packed_modules_mapping = {