[Bugfix] Correct adapter usage for cohere and jamba (#8292)

This commit is contained in:
Vladislav Kruglikov 2024-09-09 21:20:46 +03:00 committed by GitHub
parent 58fcc8545a
commit f9b4a2d415
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 3 deletions

View File

@ -47,6 +47,8 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.model_executor.utils import set_weight_attrs
from vllm.sequence import IntermediateTensors
from .interfaces import SupportsLoRA
@torch.compile
def layer_norm_func(hidden_states, weight, variance_epsilon):
@ -292,8 +294,7 @@ class CohereModel(nn.Module):
return hidden_states
class CohereForCausalLM(nn.Module):
class CohereForCausalLM(nn.Module, SupportsLoRA):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",

View File

@ -38,6 +38,8 @@ from vllm.sequence import IntermediateTensors
from vllm.worker.model_runner import (_BATCH_SIZES_TO_CAPTURE,
_get_graph_batch_size)
from .interfaces import SupportsLoRA
KVCache = Tuple[torch.Tensor, torch.Tensor]
@ -539,7 +541,7 @@ class JambaModel(nn.Module):
return hidden_states
class JambaForCausalLM(nn.Module, HasInnerState):
class JambaForCausalLM(nn.Module, HasInnerState, SupportsLoRA):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",