[Frontend][Misc] Enforce Pixel Values as Input Type for VLMs in API Server (#5374)

This commit is contained in:
Roger Wang 2024-06-10 02:13:39 -07:00 committed by GitHub
parent 5884c2b454
commit 68bc81703e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -183,6 +183,16 @@ if __name__ == "__main__":
served_model_names = [args.model]
engine_args = AsyncEngineArgs.from_cli_args(args)
# Enforce pixel values as image input type for vision language models
# when serving with API server
if engine_args.image_input_type is not None and \
engine_args.image_input_type.upper() != "PIXEL_VALUES":
raise ValueError(
f"Invalid image_input_type: {engine_args.image_input_type}. "
"Only --image-input-type 'pixel_values' is supported for serving "
"vision language models with the vLLM API server.")
engine = AsyncLLMEngine.from_engine_args(
engine_args, usage_context=UsageContext.OPENAI_API_SERVER)