Fix the racing condition of mixed-input gemm when writing the registers (#1931)
* move two warpgroup_wait * merge main --------- Co-authored-by: Siyuan Fu <siyuanf@nvidia.com>
This commit is contained in:
parent
d656afbd2a
commit
8aa95dbb88
@ -724,4 +724,4 @@ int main(int argc, char const **args) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -1024,8 +1024,8 @@ public:
|
|||||||
tiled_mma.accumulate_ = GMMA::ScaleOut::One;
|
tiled_mma.accumulate_ = GMMA::ScaleOut::One;
|
||||||
warpgroup_commit_batch();
|
warpgroup_commit_batch();
|
||||||
|
|
||||||
|
warpgroup_wait<K_BLOCK_MAX - 1>(); // We have K_BLOCK_MAX - 1 GMMA instructions pending for this stage, so we can release prior barrier
|
||||||
if (k_block == K_BLOCK_MAX - 1) {
|
if (k_block == K_BLOCK_MAX - 1) {
|
||||||
warpgroup_wait<K_BLOCK_MAX - 1>(); // We have K_BLOCK_MAX - 1 GMMA instructions pending for this stage, so we can release prior barrier
|
|
||||||
pipeline.consumer_release(smem_pipe_release); // UNLOCK smem_pipe_release, done _computing_ on it
|
pipeline.consumer_release(smem_pipe_release); // UNLOCK smem_pipe_release, done _computing_ on it
|
||||||
++smem_pipe_release;
|
++smem_pipe_release;
|
||||||
}
|
}
|
||||||
@ -1076,8 +1076,9 @@ public:
|
|||||||
cute::gemm(tiled_mma, tCrA_mma(_,_,k_block), tCrB(_,_,k_block,read_stage), accum);
|
cute::gemm(tiled_mma, tCrA_mma(_,_,k_block), tCrB(_,_,k_block,read_stage), accum);
|
||||||
tiled_mma.accumulate_ = GMMA::ScaleOut::One;
|
tiled_mma.accumulate_ = GMMA::ScaleOut::One;
|
||||||
warpgroup_commit_batch();
|
warpgroup_commit_batch();
|
||||||
|
|
||||||
|
warpgroup_wait<K_BLOCK_MAX - 1>();
|
||||||
if (k_block == K_BLOCK_MAX - 1) { // release prior barrier
|
if (k_block == K_BLOCK_MAX - 1) { // release prior barrier
|
||||||
warpgroup_wait<K_BLOCK_MAX - 1>();
|
|
||||||
pipeline.consumer_release(smem_pipe_release); // UNLOCK smem_pipe_release, done _computing_ on it
|
pipeline.consumer_release(smem_pipe_release); // UNLOCK smem_pipe_release, done _computing_ on it
|
||||||
++smem_pipe_release;
|
++smem_pipe_release;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user