From 7e90a2d11785b4cba5172f13178adb6d194a867f Mon Sep 17 00:00:00 2001 From: Fluder-Paradyne <121793617+Fluder-Paradyne@users.noreply.github.com> Date: Wed, 1 Nov 2023 22:59:44 +0530 Subject: [PATCH] Add `/health` Endpoint for both Servers (#1540) --- vllm/entrypoints/api_server.py | 6 ++++++ vllm/entrypoints/openai/api_server.py | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py index 4b22ba71..fb29837d 100644 --- a/vllm/entrypoints/api_server.py +++ b/vllm/entrypoints/api_server.py @@ -17,6 +17,12 @@ app = FastAPI() engine = None +@app.get("/health") +async def health() -> Response: + """Health check.""" + return Response(status_code=200) + + @app.post("/generate") async def generate(request: Request) -> Response: """Generate completion for the request. diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index a0adf4d0..f336b465 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -13,7 +13,7 @@ import uvicorn from fastapi import Request from fastapi.exceptions import RequestValidationError from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse, StreamingResponse +from fastapi.responses import JSONResponse, StreamingResponse, Response from packaging import version from vllm.engine.arg_utils import AsyncEngineArgs @@ -145,6 +145,12 @@ async def check_length( return input_ids, None +@app.get("/health") +async def health() -> Response: + """Health check.""" + return Response(status_code=200) + + @app.get("/v1/models") async def show_available_models(): """Show available models. Right now we only have one model."""