[ Bugfix ] Fix AutoFP8 fp8 marlin (#6609)

2024-07-20 19:25:56 -04:00 · 2024-07-20 19:25:56 -04:00 · 082ecd80d5
commit 082ecd80d5
parent f952bbc8ff
1 changed files with 2 additions and 1 deletions
--- a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
+++ b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
@ -76,7 +76,8 @@ def prepare_fp8_layer_for_marlin(layer: torch.nn.Module) -> None:
    # WEIGHT SCALES
    # Currently Marlin doesn't support per-tensor scales, so we
    # expand it to channelwise
-    is_channelwise = layer.weight_scale.shape[0] == part_size_n
+    is_channelwise = (len(layer.weight_scale.shape) > 0
+                      and layer.weight_scale.shape[0] == part_size_n)
    if is_channelwise:
        scales = layer.weight_scale
    else: