From 5e443b594fab5c4e93b462a0206ddd24b2e40238 Mon Sep 17 00:00:00 2001 From: sasha0552 Date: Thu, 17 Oct 2024 15:06:37 +0000 Subject: [PATCH] [Bugfix] Allow prefill of assistant response when using `mistral_common` (#9446) --- vllm/transformers_utils/tokenizers/mistral.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py index aae10d3e..dcb5cf21 100644 --- a/vllm/transformers_utils/tokenizers/mistral.py +++ b/vllm/transformers_utils/tokenizers/mistral.py @@ -166,6 +166,10 @@ class MistralTokenizer: tools: Optional[Dict[str, Any]] = None, **kwargs) -> List[int]: + last_message = messages[-1] + if last_message["role"] == "assistant": + last_message["prefix"] = True + request = ChatCompletionRequest(messages=messages, tools=tools) # type: ignore[type-var] encoded = self.mistral.encode_chat_completion(request)