[Misc] Raise error when using encoder/decoder model with cpu backend (#8355)
This commit is contained in:
parent
1bf2dd9df0
commit
295c4730a8
@ -82,6 +82,9 @@ STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER = ("Prompt adapters are not "
|
|||||||
"currently supported with encoder/"
|
"currently supported with encoder/"
|
||||||
"decoder models.")
|
"decoder models.")
|
||||||
|
|
||||||
|
STR_NOT_IMPL_ENC_DEC_CPU = ("CPU is not currently supported with "
|
||||||
|
"encoder/decoder models.")
|
||||||
|
|
||||||
# Efficiently import all enc/dec error strings
|
# Efficiently import all enc/dec error strings
|
||||||
# rather than having to import all of the above
|
# rather than having to import all of the above
|
||||||
STR_NOT_IMPL_ENC_DEC_ERR_STRS = {
|
STR_NOT_IMPL_ENC_DEC_ERR_STRS = {
|
||||||
@ -97,6 +100,7 @@ STR_NOT_IMPL_ENC_DEC_ERR_STRS = {
|
|||||||
"STR_NOT_IMPL_ENC_DEC_CUDA_GRAPH": STR_NOT_IMPL_ENC_DEC_CUDAGRAPH,
|
"STR_NOT_IMPL_ENC_DEC_CUDA_GRAPH": STR_NOT_IMPL_ENC_DEC_CUDAGRAPH,
|
||||||
"STR_NOT_IMPL_ENC_DEC_BACKEND": STR_NOT_IMPL_ENC_DEC_BACKEND,
|
"STR_NOT_IMPL_ENC_DEC_BACKEND": STR_NOT_IMPL_ENC_DEC_BACKEND,
|
||||||
"STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER": STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER,
|
"STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER": STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER,
|
||||||
|
"STR_NOT_IMPL_ENC_DEC_CPU": STR_NOT_IMPL_ENC_DEC_CPU
|
||||||
}
|
}
|
||||||
|
|
||||||
# Constants related to forcing the attention backend selection
|
# Constants related to forcing the attention backend selection
|
||||||
|
|||||||
@ -15,7 +15,7 @@ from vllm.model_executor.model_loader import get_model
|
|||||||
from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
|
from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
|
||||||
MultiModalInputs)
|
MultiModalInputs)
|
||||||
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
|
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
|
||||||
from vllm.utils import make_tensor_with_pad
|
from vllm.utils import STR_NOT_IMPL_ENC_DEC_ERR_STRS, make_tensor_with_pad
|
||||||
from vllm.worker.model_runner_base import (
|
from vllm.worker.model_runner_base import (
|
||||||
ModelRunnerBase, ModelRunnerInputBase,
|
ModelRunnerBase, ModelRunnerInputBase,
|
||||||
_add_attn_metadata_broadcastable_dict,
|
_add_attn_metadata_broadcastable_dict,
|
||||||
@ -121,6 +121,10 @@ class CPUModelRunner(ModelRunnerBase[CPUModelInput]):
|
|||||||
# Lazy initialization.
|
# Lazy initialization.
|
||||||
self.model: nn.Module # Set after init_Model
|
self.model: nn.Module # Set after init_Model
|
||||||
|
|
||||||
|
if self.model_config.is_encoder_decoder_model:
|
||||||
|
raise NotImplementedError(
|
||||||
|
STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_CPU'])
|
||||||
|
|
||||||
def load_model(self) -> None:
|
def load_model(self) -> None:
|
||||||
self.model = get_model(model_config=self.model_config,
|
self.model = get_model(model_config=self.model_config,
|
||||||
load_config=self.load_config,
|
load_config=self.load_config,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user