diff --git a/tests/entrypoints/openai/test_chat_echo.py b/tests/entrypoints/openai/test_chat_echo.py
new file mode 100644
index 00000000..223ac5b4
--- /dev/null
+++ b/tests/entrypoints/openai/test_chat_echo.py
@@ -0,0 +1,79 @@
+from typing import NamedTuple
+
+import openai  # use the official client for correctness check
+import pytest
+import pytest_asyncio
+
+from ...utils import RemoteOpenAIServer
+
+# # any model with a chat template should work here
+MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
+DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}"""  # noqa: E501
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        # use half precision for speed and memory savings in CI environment
+        "--dtype",
+        "float16",
+        "--enforce-eager",
+        "--max-model-len",
+        "4080",
+        "--chat-template",
+        DUMMY_CHAT_TEMPLATE,
+    ]
+
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client() as async_client:
+        yield async_client
+
+
+class TestCase(NamedTuple):
+    model_name: str
+    echo: bool
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        TestCase(model_name=MODEL_NAME, echo=True),
+        TestCase(model_name=MODEL_NAME, echo=False)
+    ],
+)
+async def test_chat_session_with_echo_and_continue_final_message(
+        client: openai.AsyncOpenAI, test_case: TestCase):
+    saying: str = "Here is a common saying about apple. An apple a day, keeps"
+    # test echo with continue_final_message parameter
+    chat_completion = await client.chat.completions.create(
+        model=test_case.model_name,
+        messages=[{
+            "role": "user",
+            "content": "tell me a common saying"
+        }, {
+            "role": "assistant",
+            "content": saying
+        }],
+        extra_body={
+            "echo": test_case.echo,
+            "continue_final_message": True,
+            "add_generation_prompt": False
+        })
+    assert chat_completion.id is not None
+    assert len(chat_completion.choices) == 1
+
+    choice = chat_completion.choices[0]
+    assert choice.finish_reason == "stop"
+
+    message = choice.message
+    if test_case.echo:
+        assert message.content is not None and saying in message.content
+    else:
+        assert message.content is not None and saying not in message.content
+    assert message.role == "assistant"
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 2eef909e..54ca0463 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -361,7 +361,7 @@ class OpenAIServingChat(OpenAIServing):
 
                     # Send response to echo the input portion of the
                     # last message
-                    if request.echo or request.continue_final_message:
+                    if request.echo:
                         last_msg_content: Union[str, List[Dict[str, str]]] = ""
                         if conversation and "content" in conversation[
                                 -1] and conversation[-1].get("role") == role:
@@ -706,7 +706,7 @@ class OpenAIServingChat(OpenAIServing):
                 stop_reason=output.stop_reason)
             choices.append(choice_data)
 
-        if request.echo or request.continue_final_message:
+        if request.echo:
             last_msg_content: Union[str, List[Dict[str, str]]] = ""
             if conversation and "content" in conversation[-1] and conversation[
                     -1].get("role") == role: