[Frontend] Separate OpenAI Batch Runner usage from API Server (#4851)

This commit is contained in:
Alex Wu 2024-05-16 11:42:41 -04:00 committed by GitHub
parent dbc0754ddf
commit 5e0391c040
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 2 additions and 1 deletions

View File

@ -101,7 +101,7 @@ async def main(args):
engine_args = AsyncEngineArgs.from_cli_args(args)
engine = AsyncLLMEngine.from_engine_args(
engine_args, usage_context=UsageContext.OPENAI_API_SERVER)
engine_args, usage_context=UsageContext.OPENAI_BATCH_RUNNER)
# When using single vLLM without engine_use_ray
model_config = await engine.get_model_config()

View File

@ -90,6 +90,7 @@ class UsageContext(str, Enum):
LLM_CLASS = "LLM_CLASS"
API_SERVER = "API_SERVER"
OPENAI_API_SERVER = "OPENAI_API_SERVER"
OPENAI_BATCH_RUNNER = "OPENAI_BATCH_RUNNER"
ENGINE_CONTEXT = "ENGINE_CONTEXT"