[Frontend] Separate OpenAI Batch Runner usage from API Server (#4851)
This commit is contained in:
parent
dbc0754ddf
commit
5e0391c040
@ -101,7 +101,7 @@ async def main(args):
|
||||
|
||||
engine_args = AsyncEngineArgs.from_cli_args(args)
|
||||
engine = AsyncLLMEngine.from_engine_args(
|
||||
engine_args, usage_context=UsageContext.OPENAI_API_SERVER)
|
||||
engine_args, usage_context=UsageContext.OPENAI_BATCH_RUNNER)
|
||||
|
||||
# When using single vLLM without engine_use_ray
|
||||
model_config = await engine.get_model_config()
|
||||
|
||||
@ -90,6 +90,7 @@ class UsageContext(str, Enum):
|
||||
LLM_CLASS = "LLM_CLASS"
|
||||
API_SERVER = "API_SERVER"
|
||||
OPENAI_API_SERVER = "OPENAI_API_SERVER"
|
||||
OPENAI_BATCH_RUNNER = "OPENAI_BATCH_RUNNER"
|
||||
ENGINE_CONTEXT = "ENGINE_CONTEXT"
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user