[Bugfix] Fix awq_marlin and gptq_marlin flags (#6745)
This commit is contained in:
parent
309aaef825
commit
0310029a2f
@ -25,7 +25,7 @@ class AWQMarlinConfig(QuantizationConfig):
|
|||||||
def __init__(self, weight_bits: int, group_size: int, has_zp: bool,
|
def __init__(self, weight_bits: int, group_size: int, has_zp: bool,
|
||||||
lm_head_quantized: bool) -> None:
|
lm_head_quantized: bool) -> None:
|
||||||
self.weight_bits = weight_bits
|
self.weight_bits = weight_bits
|
||||||
self.pack_factor = 32 // self.weight_bits # packed into int32
|
self.pack_factor = 32 // self.weight_bits # packed into 32bits
|
||||||
self.group_size = group_size
|
self.group_size = group_size
|
||||||
self.has_zp = has_zp
|
self.has_zp = has_zp
|
||||||
self.lm_head_quantized = lm_head_quantized
|
self.lm_head_quantized = lm_head_quantized
|
||||||
@ -69,7 +69,8 @@ class AWQMarlinConfig(QuantizationConfig):
|
|||||||
def override_quantization_method(cls, hf_quant_cfg,
|
def override_quantization_method(cls, hf_quant_cfg,
|
||||||
user_quant) -> Optional[str]:
|
user_quant) -> Optional[str]:
|
||||||
can_convert = cls.is_awq_marlin_compatible(hf_quant_cfg)
|
can_convert = cls.is_awq_marlin_compatible(hf_quant_cfg)
|
||||||
is_valid_user_quant = (user_quant is None or user_quant == "marlin")
|
is_valid_user_quant = (user_quant is None or user_quant == "marlin"
|
||||||
|
or user_quant == "awq_marlin")
|
||||||
|
|
||||||
if can_convert and is_valid_user_quant:
|
if can_convert and is_valid_user_quant:
|
||||||
msg = ("The model is convertible to {} during runtime."
|
msg = ("The model is convertible to {} during runtime."
|
||||||
|
|||||||
@ -79,7 +79,8 @@ class GPTQMarlinConfig(QuantizationConfig):
|
|||||||
user_quant) -> Optional[str]:
|
user_quant) -> Optional[str]:
|
||||||
can_convert = cls.is_gptq_marlin_compatible(hf_quant_cfg)
|
can_convert = cls.is_gptq_marlin_compatible(hf_quant_cfg)
|
||||||
|
|
||||||
is_valid_user_quant = (user_quant is None or user_quant == "marlin")
|
is_valid_user_quant = (user_quant is None or user_quant == "marlin"
|
||||||
|
or user_quant == "gptq_marlin")
|
||||||
|
|
||||||
if can_convert and is_valid_user_quant:
|
if can_convert and is_valid_user_quant:
|
||||||
msg = ("The model is convertible to {} during runtime."
|
msg = ("The model is convertible to {} during runtime."
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user