From d65fac2738f0287a41955b45df76a2d5a919bff6 Mon Sep 17 00:00:00 2001 From: Jason Cox Date: Sun, 3 Mar 2024 00:00:29 -0500 Subject: [PATCH] Add vLLM version info to logs and openai API server (#3161) --- vllm/engine/llm_engine.py | 3 ++- vllm/entrypoints/openai/api_server.py | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index e84fda56..c9bd89a1 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -7,6 +7,7 @@ import importlib from typing import (TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union) +import vllm from vllm.lora.request import LoRARequest from vllm.config import (CacheConfig, DeviceConfig, ModelConfig, ParallelConfig, SchedulerConfig, LoRAConfig) @@ -85,7 +86,7 @@ class LLMEngine: log_stats: bool, ) -> None: logger.info( - "Initializing an LLM engine with config: " + f"Initializing an LLM engine (v{vllm.__version__}) with config: " f"model={model_config.model!r}, " f"tokenizer={model_config.tokenizer!r}, " f"tokenizer_mode={model_config.tokenizer_mode}, " diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 3777e0f3..993a834e 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -15,6 +15,7 @@ from fastapi.exceptions import RequestValidationError from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse, Response +import vllm from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.entrypoints.openai.protocol import CompletionRequest, ChatCompletionRequest, ErrorResponse @@ -168,6 +169,12 @@ async def show_available_models(): return JSONResponse(content=models.model_dump()) +@app.get("/version") +async def show_version(): + ver = {"version": vllm.__version__} + return JSONResponse(content=ver) + + @app.post("/v1/chat/completions") async def create_chat_completion(request: ChatCompletionRequest, raw_request: Request): @@ -231,6 +238,7 @@ if __name__ == "__main__": f"Invalid middleware {middleware}. Must be a function or a class." ) + logger.info(f"vLLM API server version {vllm.__version__}") logger.info(f"args: {args}") if args.served_model_name is not None: