From 2e8fc0d4c3bf8374f1f55569069e59ef45d4bc98 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Sun, 10 Dec 2023 13:20:30 -0800 Subject: [PATCH] Fix completion API echo and logprob combo (#1992) --- vllm/entrypoints/openai/api_server.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index b4055edd..0f131ce6 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -567,17 +567,22 @@ async def create_completion(request: CompletionRequest, raw_request: Request): i = output.index delta_text = output.text[len(previous_texts[i]):] token_ids = output.token_ids[previous_num_tokens[i]:] - top_logprobs = output.logprobs[previous_num_tokens[i]:] + if request.logprobs is not None: + top_logprobs = output.logprobs[previous_num_tokens[i]:] + else: + top_logprobs = None offsets = len(previous_texts[i]) if request.echo and not has_echoed[i]: if not echo_without_generation: delta_text = res.prompt + delta_text token_ids = res.prompt_token_ids + token_ids - top_logprobs = res.prompt_logprobs + top_logprobs - else: + if top_logprobs: + top_logprobs = res.prompt_logprobs + top_logprobs + else: # only just return the prompt delta_text = res.prompt token_ids = res.prompt_token_ids - top_logprobs = res.prompt_logprobs + if top_logprobs: + top_logprobs = res.prompt_logprobs has_echoed[i] = True if request.logprobs is not None: logprobs = create_logprobs(