[Model] Molmo vLLM Integration (#9016)
Co-authored-by: sanghol <sanghol@allenai.org> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
parent
16b24e7dcd
commit
dfe43a2071
@ -399,6 +399,12 @@ Text Generation
|
|||||||
- :code:`meta-llama/Llama-3.2-90B-Vision-Instruct`, :code:`meta-llama/Llama-3.2-11B-Vision`, etc.
|
- :code:`meta-llama/Llama-3.2-90B-Vision-Instruct`, :code:`meta-llama/Llama-3.2-11B-Vision`, etc.
|
||||||
-
|
-
|
||||||
-
|
-
|
||||||
|
* - :code:`MolmoForCausalLM`
|
||||||
|
- Molmo
|
||||||
|
- Image
|
||||||
|
- :code:`allenai/Molmo-7B-D-0924`, :code:`allenai/Molmo-72B-0924`, etc.
|
||||||
|
-
|
||||||
|
- ✅︎
|
||||||
* - :code:`NVLM_D_Model`
|
* - :code:`NVLM_D_Model`
|
||||||
- NVLM-D 1.0
|
- NVLM-D 1.0
|
||||||
- Image\ :sup:`E+`
|
- Image\ :sup:`E+`
|
||||||
|
|||||||
@ -300,6 +300,23 @@ def run_mllama(question: str, modality: str):
|
|||||||
return llm, prompt, stop_token_ids
|
return llm, prompt, stop_token_ids
|
||||||
|
|
||||||
|
|
||||||
|
# Molmo
|
||||||
|
def run_molmo(question, modality):
|
||||||
|
assert modality == "image"
|
||||||
|
|
||||||
|
model_name = "allenai/Molmo-7B-D-0924"
|
||||||
|
|
||||||
|
llm = LLM(
|
||||||
|
model=model_name,
|
||||||
|
trust_remote_code=True,
|
||||||
|
dtype="bfloat16",
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = question
|
||||||
|
stop_token_ids = None
|
||||||
|
return llm, prompt, stop_token_ids
|
||||||
|
|
||||||
|
|
||||||
# GLM-4v
|
# GLM-4v
|
||||||
def run_glm4v(question: str, modality: str):
|
def run_glm4v(question: str, modality: str):
|
||||||
assert modality == "image"
|
assert modality == "image"
|
||||||
@ -331,6 +348,7 @@ model_example_map = {
|
|||||||
"qwen_vl": run_qwen_vl,
|
"qwen_vl": run_qwen_vl,
|
||||||
"qwen2_vl": run_qwen2_vl,
|
"qwen2_vl": run_qwen2_vl,
|
||||||
"mllama": run_mllama,
|
"mllama": run_mllama,
|
||||||
|
"molmo": run_molmo,
|
||||||
"glm4v": run_glm4v,
|
"glm4v": run_glm4v,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -163,6 +163,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
|
|||||||
return "<|image|>"
|
return "<|image|>"
|
||||||
if model_type == "qwen2_vl":
|
if model_type == "qwen2_vl":
|
||||||
return "<|vision_start|><|image_pad|><|vision_end|>"
|
return "<|vision_start|><|image_pad|><|vision_end|>"
|
||||||
|
if model_type == "molmo":
|
||||||
|
return ""
|
||||||
|
|
||||||
raise TypeError(f"Unknown model type: {model_type}")
|
raise TypeError(f"Unknown model type: {model_type}")
|
||||||
elif modality == "audio":
|
elif modality == "audio":
|
||||||
|
|||||||
@ -20,4 +20,4 @@ __all__ = [
|
|||||||
"supports_multimodal",
|
"supports_multimodal",
|
||||||
"SupportsPP",
|
"SupportsPP",
|
||||||
"supports_pp",
|
"supports_pp",
|
||||||
]
|
]
|
||||||
1290
vllm/model_executor/models/molmo.py
Normal file
1290
vllm/model_executor/models/molmo.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -1167,8 +1167,7 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
|
|||||||
continue
|
continue
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
print(params_dict.keys())
|
raise ValueError(f"Unexpected weight: {name}") from None
|
||||||
raise
|
|
||||||
|
|
||||||
weight_loader = getattr(param, "weight_loader",
|
weight_loader = getattr(param, "weight_loader",
|
||||||
default_weight_loader)
|
default_weight_loader)
|
||||||
|
|||||||
@ -104,6 +104,7 @@ _MULTIMODAL_MODELS = {
|
|||||||
"LlavaNextVideoForConditionalGeneration": ("llava_next_video", "LlavaNextVideoForConditionalGeneration"), # noqa: E501
|
"LlavaNextVideoForConditionalGeneration": ("llava_next_video", "LlavaNextVideoForConditionalGeneration"), # noqa: E501
|
||||||
"LlavaOnevisionForConditionalGeneration": ("llava_onevision", "LlavaOnevisionForConditionalGeneration"), # noqa: E501
|
"LlavaOnevisionForConditionalGeneration": ("llava_onevision", "LlavaOnevisionForConditionalGeneration"), # noqa: E501
|
||||||
"MiniCPMV": ("minicpmv", "MiniCPMV"),
|
"MiniCPMV": ("minicpmv", "MiniCPMV"),
|
||||||
|
"MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
|
||||||
"NVLM_D": ("nvlm_d", "NVLM_D_Model"),
|
"NVLM_D": ("nvlm_d", "NVLM_D_Model"),
|
||||||
"PaliGemmaForConditionalGeneration": ("paligemma", "PaliGemmaForConditionalGeneration"), # noqa: E501
|
"PaliGemmaForConditionalGeneration": ("paligemma", "PaliGemmaForConditionalGeneration"), # noqa: E501
|
||||||
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user