diff --git a/include/cutlass/gemm/kernel/gemm_streamk_with_fused_epilogue.h b/include/cutlass/gemm/kernel/gemm_streamk_with_fused_epilogue.h index aba6e7fd..36f47c66 100644 --- a/include/cutlass/gemm/kernel/gemm_streamk_with_fused_epilogue.h +++ b/include/cutlass/gemm/kernel/gemm_streamk_with_fused_epilogue.h @@ -1552,14 +1552,17 @@ struct GemmStreamkWithFusedEpilogue // Initialize the block mapping structure block_mapping = ThreadblockSwizzle( - typename ThreadblockSwizzle::template KernelTraits(), args.mode, args.problem_size, {ThreadblockShape::kM, ThreadblockShape::kN, ThreadblockShape::kK}, args.batch_count, sm_occupancy, device_sms, - avail_sms); + avail_sms, + sizeof(ElementA), + sizeof(ElementB), + sizeof(ElementC), + Epilogue::kAccumulatorFragments); } /// Returns the workspace size (in bytes) needed for these parameters