From f4a021660162510572f90ea715b018cff9c0f12f Mon Sep 17 00:00:00 2001 From: Ali Hassani <68103095+alihassanijr@users.noreply.github.com> Date: Thu, 7 Dec 2023 11:12:02 -0500 Subject: [PATCH] Fix bug in single source GEMM with residual + streamk (#1249) Followup to #1224. A change in the stream-k threadblock swizzle ctor since 3.3 breaks single source GEMM with fused epilogue and stream-k. Multi-source was already corrected. Co-authored-by: Ali Hassani --- .../cutlass/gemm/kernel/gemm_streamk_with_fused_epilogue.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/cutlass/gemm/kernel/gemm_streamk_with_fused_epilogue.h b/include/cutlass/gemm/kernel/gemm_streamk_with_fused_epilogue.h index aba6e7fd..36f47c66 100644 --- a/include/cutlass/gemm/kernel/gemm_streamk_with_fused_epilogue.h +++ b/include/cutlass/gemm/kernel/gemm_streamk_with_fused_epilogue.h @@ -1552,14 +1552,17 @@ struct GemmStreamkWithFusedEpilogue // Initialize the block mapping structure block_mapping = ThreadblockSwizzle( - typename ThreadblockSwizzle::template KernelTraits(), args.mode, args.problem_size, {ThreadblockShape::kM, ThreadblockShape::kN, ThreadblockShape::kK}, args.batch_count, sm_occupancy, device_sms, - avail_sms); + avail_sms, + sizeof(ElementA), + sizeof(ElementB), + sizeof(ElementC), + Epilogue::kAccumulatorFragments); } /// Returns the workspace size (in bytes) needed for these parameters