[Bug]: When apply continue_final_message for OpenAI server, the "echo":false is ignored (#10180)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
2024-11-22 00:24:32 +08:00 · 2024-11-22 00:24:32 +08:00 · da7e702c6f
commit da7e702c6f
parent 4d676f0852
2 changed files with 81 additions and 2 deletions
--- a/tests/entrypoints/openai/test_chat_echo.py
+++ b/tests/entrypoints/openai/test_chat_echo.py
@ -0,0 +1,79 @@
+from typing import NamedTuple
+
+import openai  # use the official client for correctness check
+import pytest
+import pytest_asyncio
+
+from ...utils import RemoteOpenAIServer
+
+# # any model with a chat template should work here
+MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
+DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}"""  # noqa: E501
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        # use half precision for speed and memory savings in CI environment
+        "--dtype",
+        "float16",
+        "--enforce-eager",
+        "--max-model-len",
+        "4080",
+        "--chat-template",
+        DUMMY_CHAT_TEMPLATE,
+    ]
+
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client() as async_client:
+        yield async_client
+
+
+class TestCase(NamedTuple):
+    model_name: str
+    echo: bool
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        TestCase(model_name=MODEL_NAME, echo=True),
+        TestCase(model_name=MODEL_NAME, echo=False)
+    ],
+)
+async def test_chat_session_with_echo_and_continue_final_message(
+        client: openai.AsyncOpenAI, test_case: TestCase):
+    saying: str = "Here is a common saying about apple. An apple a day, keeps"
+    # test echo with continue_final_message parameter
+    chat_completion = await client.chat.completions.create(
+        model=test_case.model_name,
+        messages=[{
+            "role": "user",
+            "content": "tell me a common saying"
+        }, {
+            "role": "assistant",
+            "content": saying
+        }],
+        extra_body={
+            "echo": test_case.echo,
+            "continue_final_message": True,
+            "add_generation_prompt": False
+        })
+    assert chat_completion.id is not None
+    assert len(chat_completion.choices) == 1
+
+    choice = chat_completion.choices[0]
+    assert choice.finish_reason == "stop"
+
+    message = choice.message
+    if test_case.echo:
+        assert message.content is not None and saying in message.content
+    else:
+        assert message.content is not None and saying not in message.content
+    assert message.role == "assistant"
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@ -361,7 +361,7 @@ class OpenAIServingChat(OpenAIServing):

                    # Send response to echo the input portion of the
                    # last message
-                    if request.echo or request.continue_final_message:
+                    if request.echo:
                        last_msg_content: Union[str, List[Dict[str, str]]] = ""
                        if conversation and "content" in conversation[
                                -1] and conversation[-1].get("role") == role:
@ -706,7 +706,7 @@ class OpenAIServingChat(OpenAIServing):
                stop_reason=output.stop_reason)
            choices.append(choice_data)

-        if request.echo or request.continue_final_message:
+        if request.echo:
            last_msg_content: Union[str, List[Dict[str, str]]] = ""
            if conversation and "content" in conversation[-1] and conversation[
                    -1].get("role") == role: