From 1aa13615103c2ea47e36710a9b2e17dfe1909143 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Sun, 10 Dec 2023 13:01:21 +0800 Subject: [PATCH] Fix OpenAI server completion_tokens referenced before assignment (#1996) --- vllm/entrypoints/openai/api_server.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 7b94e1b5..b4055edd 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -332,8 +332,7 @@ async def create_chat_completion(request: ChatCompletionRequest, # Send token-by-token response for each request.n delta_text = output.text[len(previous_texts[i]):] previous_texts[i] = output.text - completion_tokens = len(output.token_ids) - previous_num_tokens[i] = completion_tokens + previous_num_tokens[i] = len(output.token_ids) choice_data = ChatCompletionResponseStreamChoice( index=i, delta=DeltaMessage(content=delta_text), @@ -351,8 +350,8 @@ async def create_chat_completion(request: ChatCompletionRequest, prompt_tokens = len(res.prompt_token_ids) final_usage = UsageInfo( prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=prompt_tokens + completion_tokens, + completion_tokens=previous_num_tokens[i], + total_tokens=prompt_tokens + previous_num_tokens[i], ) choice_data = ChatCompletionResponseStreamChoice( index=i, delta=[], finish_reason=output.finish_reason)