From 21906a6f50ee0edf49ede856a82e8840bab41471 Mon Sep 17 00:00:00 2001 From: Ahmad Fahadh Ilyas <37577369+fahadh4ilyas@users.noreply.github.com> Date: Wed, 9 Oct 2024 05:10:44 -0700 Subject: [PATCH] [Bugfix] Fix lora loading for Compressed Tensors in #9120 (#9179) --- vllm/lora/layers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py index b9ac498b..6254c675 100644 --- a/vllm/lora/layers.py +++ b/vllm/lora/layers.py @@ -39,6 +39,9 @@ def _get_lora_device(base_layer: nn.Module) -> torch.device: # unquantizedLinear if hasattr(base_layer, "weight"): return base_layer.weight.device + # Compressed Tensor + elif hasattr(base_layer, "weight_packed"): + return base_layer.weight_packed.device # GPTQ/AWQ elif hasattr(base_layer, "qweight"): return base_layer.qweight.device