diff --git a/vllm/model_executor/layers/quantization/bitsandbytes.py b/vllm/model_executor/layers/quantization/bitsandbytes.py index 7a039a78..718967a0 100644 --- a/vllm/model_executor/layers/quantization/bitsandbytes.py +++ b/vllm/model_executor/layers/quantization/bitsandbytes.py @@ -119,7 +119,12 @@ class BitsAndBytesConfig(QuantizationConfig): def is_layer_skipped_bnb(prefix: str, llm_int8_skip_modules: List[str]): - return any(module_name in prefix for module_name in llm_int8_skip_modules) + # Split the prefix into its dot-separated components + components = prefix.split('.') + + # Check if any of the skip modules exactly matches any component + return any(module_name in components + for module_name in llm_int8_skip_modules) class BitsAndBytesLinearMethod(LinearMethodBase):