diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md index 9b29ca66..200663da 100644 --- a/docs/source/serving/openai_compatible_server.md +++ b/docs/source/serving/openai_compatible_server.md @@ -215,10 +215,10 @@ The order of priorities is `command line > config file values > defaults`. --- ## Tool calling in the chat completion API - -vLLM supports named function calling and `auto` tool choice in the chat completion API. The `tool_choice` options `required` is **not yet supported** but on the roadmap. +vLLM currently supports named function calling, as well as the `auto` and `none` options for the `tool_choice` field in the chat completion API. The `tool_choice` option `required` is **not yet supported** but on the roadmap. It is the callers responsibility to prompt the model with the tool information, vLLM will not automatically manipulate the prompt. +Please see below for recommended configuration and chat templates to use when function calling is to be used with the different models. ### Named Function Calling diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 1335e51b..0e0bb66c 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -454,6 +454,12 @@ class ChatCompletionRequest(OpenAIBaseModel): if "tool_choice" not in data and data.get("tools"): data["tool_choice"] = "auto" + # if "tool_choice" is "none" -- ignore tools if present + if "tool_choice" in data and data["tool_choice"] == "none": + # ensure that no tools are present + data.pop("tools", None) + return data + # if "tool_choice" is specified -- validation if "tool_choice" in data: @@ -467,8 +473,8 @@ class ChatCompletionRequest(OpenAIBaseModel): if data["tool_choice"] != "auto" and not isinstance( data["tool_choice"], dict): raise ValueError( - "`tool_choice` must either be a named tool or \"auto\". " - "`tool_choice=\"none\" is not supported.") + "`tool_choice` must either be a named tool, \"auto\", " + "or \"none\".") # ensure that if "tool_choice" is specified as an object, # it matches a valid tool diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index e31dc2ce..fa315fa5 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -469,12 +469,19 @@ class OpenAIServing: mm_data = await mm_data_future - if tool_parser is not None: + # tool parsing is done only if a tool_parser has been set and if + # tool_choice is not "none" (if tool_choice is "none" but a tool_parser + # is set, we want to prevent parsing a tool_call hallucinated by the LLM + should_parse_tools = tool_parser is not None and (hasattr( + request, "tool_choice") and request.tool_choice != "none") + + if should_parse_tools: if not isinstance(request, ChatCompletionRequest): msg = "Tool usage is only supported for Chat Completions API" raise NotImplementedError(msg) - request = tool_parser(tokenizer).adjust_request(request=request) + request = tool_parser(tokenizer).adjust_request( # type: ignore + request=request) if isinstance(request_prompt, str): prompt_inputs = self._tokenize_prompt_input(