diff --git a/vllm/entrypoints/openai/run_batch.py b/vllm/entrypoints/openai/run_batch.py index 99f1b2d6..731f4f4a 100644 --- a/vllm/entrypoints/openai/run_batch.py +++ b/vllm/entrypoints/openai/run_batch.py @@ -101,7 +101,7 @@ async def main(args): engine_args = AsyncEngineArgs.from_cli_args(args) engine = AsyncLLMEngine.from_engine_args( - engine_args, usage_context=UsageContext.OPENAI_API_SERVER) + engine_args, usage_context=UsageContext.OPENAI_BATCH_RUNNER) # When using single vLLM without engine_use_ray model_config = await engine.get_model_config() diff --git a/vllm/usage/usage_lib.py b/vllm/usage/usage_lib.py index 9029a5b1..40a954a2 100644 --- a/vllm/usage/usage_lib.py +++ b/vllm/usage/usage_lib.py @@ -90,6 +90,7 @@ class UsageContext(str, Enum): LLM_CLASS = "LLM_CLASS" API_SERVER = "API_SERVER" OPENAI_API_SERVER = "OPENAI_API_SERVER" + OPENAI_BATCH_RUNNER = "OPENAI_BATCH_RUNNER" ENGINE_CONTEXT = "ENGINE_CONTEXT"