From 33d3914b1e6d85a855da1a69193030c1915cb6f9 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Mon, 13 May 2024 16:00:27 -0700 Subject: [PATCH] [Bugfix] Fix dynamic FP8 quantization for Mixtral (#4793) --- vllm/model_executor/models/mixtral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 113abbaa..e3ac33e0 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -95,7 +95,7 @@ class MixtralMoE(nn.Module): params_dtype=self.params_dtype, quant_config=None) - if self.use_fp8: + if self.use_fp8 and self.quant_config.is_checkpoint_fp8_serialized: params_dtype = torch.float8_e4m3fn self.w13_weight = nn.Parameter(