From 8334c39f373b787a20eff8b7655363dd764dfe57 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Fri, 9 Aug 2024 04:42:44 +0800 Subject: [PATCH] [Bugfix] Fix new Llama3.1 GGUF model loading (#7269) --- .../model_loader/weight_utils.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 25056165..a9a04b42 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -435,23 +435,25 @@ def gguf_quant_weights_iterator( reader = gguf.GGUFReader(gguf_file) for tensor in reader.tensors: - weight_type = tensor.tensor_type - name = gguf_to_hf_name_map[tensor.name] + if tensor.name in gguf_to_hf_name_map: + weight_type = tensor.tensor_type + name = gguf_to_hf_name_map[tensor.name] - if weight_type.name != "F32": - weight_type_name = name.replace("weight", "qweight_type") - weight_type = torch.tensor(weight_type) - yield weight_type_name, weight_type + if weight_type.name != "F32": + weight_type_name = name.replace("weight", "qweight_type") + weight_type = torch.tensor(weight_type) + yield weight_type_name, weight_type for tensor in reader.tensors: - weight = tensor.data - weight_type = tensor.tensor_type - name = gguf_to_hf_name_map[tensor.name] + if tensor.name in gguf_to_hf_name_map: + weight = tensor.data + weight_type = tensor.tensor_type + name = gguf_to_hf_name_map[tensor.name] - if weight_type.name != "F32": - name = name.replace("weight", "qweight") - param = torch.tensor(weight) - yield name, param + if weight_type.name != "F32": + name = name.replace("weight", "qweight") + param = torch.tensor(weight) + yield name, param def kv_cache_scales_loader(