From d3a245138acb358c7e1e5c5dcf4dcb3c2b48c8ff Mon Sep 17 00:00:00 2001 From: Baoyuan Qi Date: Wed, 10 Jul 2024 07:43:24 +0800 Subject: [PATCH] [Bugfix]fix and needs_scalar_to_array logic check (#6238) Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> --- vllm/model_executor/layers/linear.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 3cc25783..1dda5d37 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -387,7 +387,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear): if loaded_shard_id is None: # Loaded weight is already fused on disk (qkv/mlp). if output_dim is None: - if needs_scalar_to_array is not None: + if needs_scalar_to_array: param_data, loaded_weight = adjust_scalar_to_fused_array( param_data, loaded_weight, 0) @@ -549,7 +549,7 @@ class QKVParallelLinear(ColumnParallelLinear): if loaded_shard_id is None: # Loaded weight is already fused on disk (qkv/mlp). if output_dim is None: - if needs_scalar_to_array is not None: + if needs_scalar_to_array: param_data, loaded_weight = adjust_scalar_to_fused_array( param_data, loaded_weight, 0)