From 75a4737cfee53dc5fedb5463489d06edbdb98b60 Mon Sep 17 00:00:00 2001 From: Manikandan Ananth Date: Thu, 1 Apr 2021 14:42:00 -0700 Subject: [PATCH] Fix for public issue #211 - Add a slice-K tile size to the profiler - fix num warps calculations in implicit gemm header --- include/cutlass/conv/device/implicit_gemm_convolution.h | 3 ++- tools/library/scripts/generator.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/cutlass/conv/device/implicit_gemm_convolution.h b/include/cutlass/conv/device/implicit_gemm_convolution.h index dff737ff..5535b09a 100644 --- a/include/cutlass/conv/device/implicit_gemm_convolution.h +++ b/include/cutlass/conv/device/implicit_gemm_convolution.h @@ -74,7 +74,8 @@ public: static int const kWarpCount = (ThreadblockShape::kM / WarpShape::kM) * - (ThreadblockShape::kN / WarpShape::kN); + (ThreadblockShape::kN / WarpShape::kN) * + (ThreadblockShape::kK / WarpShape::kK); /// Argument structure using Arguments = typename ImplicitGemmKernel::Arguments; diff --git a/tools/library/scripts/generator.py b/tools/library/scripts/generator.py index 4d49f52e..5316c5b9 100644 --- a/tools/library/scripts/generator.py +++ b/tools/library/scripts/generator.py @@ -701,6 +701,7 @@ def GenerateSM75_TensorOp_1688(manifest, args): TileDescription([ 64, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), TileDescription([128, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), TileDescription([ 64, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), + TileDescription([ 64, 128, 64], 2, [1, 2, 2], math_inst, min_cc, max_cc), ] data_type = [