[Kernel] Fix CUTLASS 3.x custom broadcast load epilogue (#5516)
This commit is contained in:
parent
d47af2bc02
commit
703475f6c2
@ -153,7 +153,7 @@ struct Sm90RowOrScalarBroadcast {
|
|||||||
|
|
||||||
CUTLASS_DEVICE void
|
CUTLASS_DEVICE void
|
||||||
begin(uint64_t* full_mbarrier_ptr, int load_iteration, bool issue_tma_load) {
|
begin(uint64_t* full_mbarrier_ptr, int load_iteration, bool issue_tma_load) {
|
||||||
if (params.ptr_row == nullptr) {
|
if (!params.row_broadcast) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -257,9 +257,7 @@ class Fp8LinearMethod(LinearMethodBase):
|
|||||||
# If dynamic, layer.input_scale is None and x_scale computed from x.
|
# If dynamic, layer.input_scale is None and x_scale computed from x.
|
||||||
# If static, layer.input_scale is scalar and x_scale is input_scale.
|
# If static, layer.input_scale is scalar and x_scale is input_scale.
|
||||||
|
|
||||||
# Temporarily disable CUTLASS kernels due to an illegal memory access
|
if bias is None and self.cutlass_fp8_supported:
|
||||||
#if bias is None and self.cutlass_fp8_supported:
|
|
||||||
if False:
|
|
||||||
qinput, x_scale = ops.scaled_fp8_quant(x, layer.input_scale)
|
qinput, x_scale = ops.scaled_fp8_quant(x, layer.input_scale)
|
||||||
|
|
||||||
# Fused GEMM_DQ
|
# Fused GEMM_DQ
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user