Added missing synchronization to avoid WAR hazards between tiles. (#386)
This commit is contained in:
parent
0dc3ba60b3
commit
288af365db
@ -546,6 +546,9 @@ public:
|
|||||||
// Compute threadblock-scoped matrix multiply-add
|
// Compute threadblock-scoped matrix multiply-add
|
||||||
int gemm_k_iterations = (problem_size.k() + Mma::Shape::kK - 1) / Mma::Shape::kK;
|
int gemm_k_iterations = (problem_size.k() + Mma::Shape::kK - 1) / Mma::Shape::kK;
|
||||||
|
|
||||||
|
// Wait for all threads to finish their epilogue phases from the previous tile.
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
// Compute threadblock-scoped matrix multiply-add
|
// Compute threadblock-scoped matrix multiply-add
|
||||||
mma(
|
mma(
|
||||||
gemm_k_iterations,
|
gemm_k_iterations,
|
||||||
|
Loading…
Reference in New Issue
Block a user