From 4cfe5d2bcafe1f47d1df046e6788ebbe038eaf3f Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Sat, 23 Nov 2024 13:25:46 +0800 Subject: [PATCH] [Bugfix] `multi_modal_kwargs` broadcast for CPU tensor parallel (#10541) Signed-off-by: Isotr0py <2037008807@qq.com> --- vllm/worker/cpu_enc_dec_model_runner.py | 1 + vllm/worker/cpu_model_runner.py | 1 + 2 files changed, 2 insertions(+) diff --git a/vllm/worker/cpu_enc_dec_model_runner.py b/vllm/worker/cpu_enc_dec_model_runner.py index 1f8e2d2d..cc24cfe0 100644 --- a/vllm/worker/cpu_enc_dec_model_runner.py +++ b/vllm/worker/cpu_enc_dec_model_runner.py @@ -35,6 +35,7 @@ class EncoderDecoderModelInputForCPU(ModelInputForCPUWithSamplingMetadata): "input_positions": self.input_positions, "encoder_input_tokens": self.encoder_input_tokens, "encoder_input_positions": self.encoder_input_positions, + "multi_modal_kwargs": self.multi_modal_kwargs, } _add_attn_metadata_broadcastable_dict(tensor_dict, self.attn_metadata) _add_sampling_metadata_broadcastable_dict(tensor_dict, diff --git a/vllm/worker/cpu_model_runner.py b/vllm/worker/cpu_model_runner.py index 2cf57362..7cab476d 100644 --- a/vllm/worker/cpu_model_runner.py +++ b/vllm/worker/cpu_model_runner.py @@ -83,6 +83,7 @@ class ModelInputForCPUWithSamplingMetadata(ModelInputForCPU): tensor_dict = { "input_tokens": self.input_tokens, "input_positions": self.input_positions, + "multi_modal_kwargs": self.multi_modal_kwargs, } _add_attn_metadata_broadcastable_dict(tensor_dict, self.attn_metadata) _add_sampling_metadata_broadcastable_dict(tensor_dict,