Fix for public issue #211

- Add a slice-K tile size to the profiler
- fix num warps calculations in implicit gemm header
This commit is contained in:
Manikandan Ananth 2021-04-01 14:42:00 -07:00
parent 8a3e4b8d02
commit 75a4737cfe
2 changed files with 3 additions and 1 deletions

View File

@ -74,7 +74,8 @@ public:
static int const kWarpCount = static int const kWarpCount =
(ThreadblockShape::kM / WarpShape::kM) * (ThreadblockShape::kM / WarpShape::kM) *
(ThreadblockShape::kN / WarpShape::kN); (ThreadblockShape::kN / WarpShape::kN) *
(ThreadblockShape::kK / WarpShape::kK);
/// Argument structure /// Argument structure
using Arguments = typename ImplicitGemmKernel::Arguments; using Arguments = typename ImplicitGemmKernel::Arguments;

View File

@ -701,6 +701,7 @@ def GenerateSM75_TensorOp_1688(manifest, args):
TileDescription([ 64, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), TileDescription([ 64, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([128, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), TileDescription([128, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([ 64, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), TileDescription([ 64, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([ 64, 128, 64], 2, [1, 2, 2], math_inst, min_cc, max_cc),
] ]
data_type = [ data_type = [