[BugFix] Prevent LLM.encode for non-generation Models (#5184)

Co-authored-by: mgoin <michael@neuralmagic.com>
2024-06-01 19:35:41 -04:00 · 2024-06-01 19:35:41 -04:00 · 044793d8df
commit 044793d8df
parent c2d6d2f960
1 changed files with 10 additions and 0 deletions
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@ -276,6 +276,11 @@ class LLM:
            considered legacy and may be deprecated in the future. You should
            instead pass them via the ``inputs`` parameter.
        """
+        if self.llm_engine.model_config.embedding_mode:
+            raise ValueError(
+                "LLM.generate() is only supported for generation models "
+                "(XForCausalLM).")
+
        if prompt_token_ids is not None or multi_modal_data is not None:
            inputs = self._convert_v1_inputs(
                prompts=cast(Optional[Union[str, List[str]]], prompts),
@ -420,6 +425,11 @@ class LLM:
            considered legacy and may be deprecated in the future. You should
            instead pass them via the ``inputs`` parameter.
        """
+        if not self.llm_engine.model_config.embedding_mode:
+            raise ValueError(
+                "LLM.encode() is only supported for embedding models (XModel)."
+            )
+
        if prompt_token_ids is not None or multi_modal_data is not None:
            inputs = self._convert_v1_inputs(
                prompts=cast(Optional[Union[str, List[str]]], prompts),