Added missing synchronization to avoid WAR hazards between tiles. (#386)
This commit is contained in:
parent
0dc3ba60b3
commit
288af365db
@ -546,6 +546,9 @@ public:
|
||||
// Compute threadblock-scoped matrix multiply-add
|
||||
int gemm_k_iterations = (problem_size.k() + Mma::Shape::kK - 1) / Mma::Shape::kK;
|
||||
|
||||
// Wait for all threads to finish their epilogue phases from the previous tile.
|
||||
__syncthreads();
|
||||
|
||||
// Compute threadblock-scoped matrix multiply-add
|
||||
mma(
|
||||
gemm_k_iterations,
|
||||
|
Loading…
Reference in New Issue
Block a user