From 48d5985a088c6e13e9ad9b0c7a0ce846e30b529f Mon Sep 17 00:00:00 2001 From: eigenLiu <33959526+eigen2017@users.noreply.github.com> Date: Sat, 18 May 2024 00:43:19 +0800 Subject: [PATCH] Sync huggingface modifications of qwen Moe model (#4774) --- vllm/model_executor/models/qwen2_moe.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/qwen2_moe.py b/vllm/model_executor/models/qwen2_moe.py index 2a3b0173..a0d3b040 100644 --- a/vllm/model_executor/models/qwen2_moe.py +++ b/vllm/model_executor/models/qwen2_moe.py @@ -283,8 +283,9 @@ class Qwen2MoeDecoderLayer(nn.Module): cache_config=cache_config, quant_config=quant_config, ) - if (config.num_experts is not None - and (layer_idx + 1) % config.decoder_sparse_step == 0): + if (layer_idx not in config.mlp_only_layers) and ( + config.num_experts > 0 and + (layer_idx + 1) % config.decoder_sparse_step == 0): self.mlp = Qwen2MoeSparseMoeBlock(config=config, quant_config=quant_config) else: @@ -439,6 +440,9 @@ class Qwen2MoeForCausalLM(nn.Module): if (("mlp.experts." in name or "mlp.shared_expert." in name) and name not in params_dict): continue + if name not in params_dict: + continue + param = params_dict[name] weight_loader = param.weight_loader weight_loader(param, loaded_weight, shard_id) @@ -451,6 +455,9 @@ class Qwen2MoeForCausalLM(nn.Module): if (("mlp.experts." in name or "mlp.shared_expert." in name) and name not in params_dict): continue + if name not in params_dict: + continue + param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader)