Fix for public issue #211

- Add a slice-K tile size to the profiler
- fix num warps calculations in implicit gemm header
This commit is contained in:
Manikandan Ananth 2021-04-01 14:42:00 -07:00
parent 8a3e4b8d02
commit 75a4737cfe
2 changed files with 3 additions and 1 deletions

View File

@ -74,7 +74,8 @@ public:
static int const kWarpCount =
(ThreadblockShape::kM / WarpShape::kM) *
(ThreadblockShape::kN / WarpShape::kN);
(ThreadblockShape::kN / WarpShape::kN) *
(ThreadblockShape::kK / WarpShape::kK);
/// Argument structure
using Arguments = typename ImplicitGemmKernel::Arguments;

View File

@ -701,6 +701,7 @@ def GenerateSM75_TensorOp_1688(manifest, args):
TileDescription([ 64, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([128, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([ 64, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([ 64, 128, 64], 2, [1, 2, 2], math_inst, min_cc, max_cc),
]
data_type = [