Added missing synchronization to avoid WAR hazards between tiles. (#386)

This commit is contained in:
Andrew Kerr 2021-12-20 11:34:08 -05:00 committed by GitHub
parent 0dc3ba60b3
commit 288af365db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -546,6 +546,9 @@ public:
// Compute threadblock-scoped matrix multiply-add
int gemm_k_iterations = (problem_size.k() + Mma::Shape::kK - 1) / Mma::Shape::kK;
// Wait for all threads to finish their epilogue phases from the previous tile.
__syncthreads();
// Compute threadblock-scoped matrix multiply-add
mma(
gemm_k_iterations,