Fix bug in single source GEMM with residual + streamk (#1249)
Followup to #1224. A change in the stream-k threadblock swizzle ctor since 3.3 breaks single source GEMM with fused epilogue and stream-k. Multi-source was already corrected. Co-authored-by: Ali Hassani <ahassanijr@gmail.com>
This commit is contained in:
parent
f188f9b709
commit
f4a0216601
@ -1552,14 +1552,17 @@ struct GemmStreamkWithFusedEpilogue<Mma_, Epilogue_, ThreadblockSwizzle_, true>
|
||||
|
||||
// Initialize the block mapping structure
|
||||
block_mapping = ThreadblockSwizzle(
|
||||
typename ThreadblockSwizzle::template KernelTraits<GemmStreamkWithFusedEpilogue>(),
|
||||
args.mode,
|
||||
args.problem_size,
|
||||
{ThreadblockShape::kM, ThreadblockShape::kN, ThreadblockShape::kK},
|
||||
args.batch_count,
|
||||
sm_occupancy,
|
||||
device_sms,
|
||||
avail_sms);
|
||||
avail_sms,
|
||||
sizeof(ElementA),
|
||||
sizeof(ElementB),
|
||||
sizeof(ElementC),
|
||||
Epilogue::kAccumulatorFragments);
|
||||
}
|
||||
|
||||
/// Returns the workspace size (in bytes) needed for these parameters
|
||||
|
Loading…
Reference in New Issue
Block a user