[Misc] Skip loading extra bias for Qwen2-MOE GPTQ models (#8329)

2024-09-11 08:59:19 +08:00 · 2024-09-11 08:59:19 +08:00 · e497b8aeff
commit e497b8aeff
parent 94144e726c
1 changed files with 8 additions and 2 deletions
--- a/vllm/model_executor/models/qwen2_moe.py
+++ b/vllm/model_executor/models/qwen2_moe.py
@ -469,7 +469,8 @@ class Qwen2MoeForCausalLM(nn.Module):
                    continue
                name = name.replace(weight_name, param_name)
                # Skip loading extra bias for GPTQ models.
-                if name.endswith(".bias") and name not in params_dict:
+                if ((name.endswith(".bias") or name.endswith("_bias"))
+                        and name not in params_dict):
                    continue
                # Skip layers on other devices.
                if is_pp_missing_parameter(name, self):
@ -490,6 +491,10 @@ class Qwen2MoeForCausalLM(nn.Module):
                    # Skip layers on other devices.
                    if is_pp_missing_parameter(name, self):
                        continue
+                    # Skip loading extra bias for GPTQ models.
+                    if ((name.endswith(".bias") or name.endswith("_bias"))
+                            and name not in params_dict):
+                        continue
                    param = params_dict[name]
                    weight_loader = param.weight_loader
                    weight_loader(param,
@ -500,7 +505,8 @@ class Qwen2MoeForCausalLM(nn.Module):
                    break
                else:
                    # Skip loading extra bias for GPTQ models.
-                    if name.endswith(".bias") and name not in params_dict:
+                    if ((name.endswith(".bias") or name.endswith("_bias"))
+                            and name not in params_dict):
                        continue
                    # Skip layers on other devices.
                    if is_pp_missing_parameter(name, self):