diff --git a/vllm/worker/cpu_model_runner.py b/vllm/worker/cpu_model_runner.py index b7002e75..d7d7d656 100644 --- a/vllm/worker/cpu_model_runner.py +++ b/vllm/worker/cpu_model_runner.py @@ -456,6 +456,8 @@ class CPUModelRunner(ModelRunnerBase[ModelInputForCPU]): model_input.attn_metadata, **MultiModalInputs.as_kwargs(model_input.multi_modal_kwargs or {}, device=self.device), + "intermediate_tensors": + intermediate_tensors, } hidden_states = model_executable(**execute_model_kwargs)