[Frontend][OpenAI] Support for returning max_model_len on /v1/models response (#4643)
This commit is contained in:
parent
ed59a7ed23
commit
f790ad3c50
@ -82,6 +82,7 @@ class ModelCard(OpenAIBaseModel):
|
|||||||
owned_by: str = "vllm"
|
owned_by: str = "vllm"
|
||||||
root: Optional[str] = None
|
root: Optional[str] = None
|
||||||
parent: Optional[str] = None
|
parent: Optional[str] = None
|
||||||
|
max_model_len: Optional[int] = None
|
||||||
permission: List[ModelPermission] = Field(default_factory=list)
|
permission: List[ModelPermission] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -62,6 +62,7 @@ class OpenAIServing:
|
|||||||
"""Show available models. Right now we only have one model."""
|
"""Show available models. Right now we only have one model."""
|
||||||
model_cards = [
|
model_cards = [
|
||||||
ModelCard(id=served_model_name,
|
ModelCard(id=served_model_name,
|
||||||
|
max_model_len=self.max_model_len,
|
||||||
root=self.served_model_names[0],
|
root=self.served_model_names[0],
|
||||||
permission=[ModelPermission()])
|
permission=[ModelPermission()])
|
||||||
for served_model_name in self.served_model_names
|
for served_model_name in self.served_model_names
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user