Feature add lora support for Qwen2 (#3177)
This commit is contained in:
parent
d2339d6840
commit
c59e120c55
@ -21,6 +21,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, narrow, 2048) \
|
f(in_T, out_T, W_T, narrow, 2048) \
|
||||||
f(in_T, out_T, W_T, narrow, 2560) \
|
f(in_T, out_T, W_T, narrow, 2560) \
|
||||||
f(in_T, out_T, W_T, narrow, 2752) \
|
f(in_T, out_T, W_T, narrow, 2752) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 2816) \
|
||||||
f(in_T, out_T, W_T, narrow, 3072) \
|
f(in_T, out_T, W_T, narrow, 3072) \
|
||||||
f(in_T, out_T, W_T, narrow, 3456) \
|
f(in_T, out_T, W_T, narrow, 3456) \
|
||||||
f(in_T, out_T, W_T, narrow, 3584) \
|
f(in_T, out_T, W_T, narrow, 3584) \
|
||||||
@ -36,6 +37,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, narrow, 10240) \
|
f(in_T, out_T, W_T, narrow, 10240) \
|
||||||
f(in_T, out_T, W_T, narrow, 11008) \
|
f(in_T, out_T, W_T, narrow, 11008) \
|
||||||
f(in_T, out_T, W_T, narrow, 12288) \
|
f(in_T, out_T, W_T, narrow, 12288) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 13696) \
|
||||||
f(in_T, out_T, W_T, narrow, 13824) \
|
f(in_T, out_T, W_T, narrow, 13824) \
|
||||||
f(in_T, out_T, W_T, narrow, 14336) \
|
f(in_T, out_T, W_T, narrow, 14336) \
|
||||||
f(in_T, out_T, W_T, narrow, 16384) \
|
f(in_T, out_T, W_T, narrow, 16384) \
|
||||||
|
|||||||
@ -46,6 +46,7 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
|
|||||||
from vllm.model_executor.weight_utils import (default_weight_loader,
|
from vllm.model_executor.weight_utils import (default_weight_loader,
|
||||||
hf_model_weights_iterator)
|
hf_model_weights_iterator)
|
||||||
from vllm.sequence import SamplerOutput
|
from vllm.sequence import SamplerOutput
|
||||||
|
from vllm.config import LoRAConfig
|
||||||
|
|
||||||
KVCache = Tuple[torch.Tensor, torch.Tensor]
|
KVCache = Tuple[torch.Tensor, torch.Tensor]
|
||||||
|
|
||||||
@ -264,12 +265,35 @@ class Qwen2Model(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
class Qwen2ForCausalLM(nn.Module):
|
class Qwen2ForCausalLM(nn.Module):
|
||||||
|
packed_modules_mapping = {
|
||||||
|
"qkv_proj": [
|
||||||
|
"q_proj",
|
||||||
|
"k_proj",
|
||||||
|
"v_proj",
|
||||||
|
],
|
||||||
|
"gate_up_proj": [
|
||||||
|
"gate_proj",
|
||||||
|
"up_proj",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# LoRA specific attributes
|
||||||
|
supported_lora_modules = [
|
||||||
|
"qkv_proj",
|
||||||
|
"o_proj",
|
||||||
|
"gate_up_proj",
|
||||||
|
"down_proj",
|
||||||
|
]
|
||||||
|
embedding_modules = {}
|
||||||
|
embedding_padding_modules = []
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
config: Qwen2Config,
|
config: Qwen2Config,
|
||||||
linear_method: Optional[LinearMethodBase] = None,
|
linear_method: Optional[LinearMethodBase] = None,
|
||||||
|
lora_config: Optional[LoRAConfig] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
del lora_config
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.config = config
|
self.config = config
|
||||||
self.linear_method = linear_method
|
self.linear_method = linear_method
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user