From 044793d8df6aeb5326b5992d0e60aa4457760e8a Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sat, 1 Jun 2024 19:35:41 -0400 Subject: [PATCH] [BugFix] Prevent `LLM.encode` for non-generation Models (#5184) Co-authored-by: mgoin --- vllm/entrypoints/llm.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 6e971ae7..beee16d1 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -276,6 +276,11 @@ class LLM: considered legacy and may be deprecated in the future. You should instead pass them via the ``inputs`` parameter. """ + if self.llm_engine.model_config.embedding_mode: + raise ValueError( + "LLM.generate() is only supported for generation models " + "(XForCausalLM).") + if prompt_token_ids is not None or multi_modal_data is not None: inputs = self._convert_v1_inputs( prompts=cast(Optional[Union[str, List[str]]], prompts), @@ -420,6 +425,11 @@ class LLM: considered legacy and may be deprecated in the future. You should instead pass them via the ``inputs`` parameter. """ + if not self.llm_engine.model_config.embedding_mode: + raise ValueError( + "LLM.encode() is only supported for embedding models (XModel)." + ) + if prompt_token_ids is not None or multi_modal_data is not None: inputs = self._convert_v1_inputs( prompts=cast(Optional[Union[str, List[str]]], prompts),