support TP in qwen2 bnb (#9574)

This commit is contained in:
chenqianfzh 2024-10-22 00:13:23 -07:00 committed by GitHub
parent f7db5f0fa9
commit 0d02747f2e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -364,6 +364,20 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
]
embedding_modules = {}
embedding_padding_modules = []
# BitandBytes specific attributes
default_bitsandbytes_target_modules = [
".gate_proj.",
".down_proj.",
".up_proj.",
".q_proj.",
".k_proj.",
".v_proj.",
".o_proj.",
]
# in TP, these weights are partitioned along the column dimension (dim=-1)
column_parallel_weights_modules = [".down_proj.", ".o_proj."]
bitsandbytes_stacked_params_mapping = {
# shard_name, weight_name, index
"q_proj": ("qkv_proj", 0),