From e2d439ee7e5c31451b64c6f1900e150239e25fe9 Mon Sep 17 00:00:00 2001 From: Shuai Shao Date: Thu, 6 Apr 2023 07:00:52 -0700 Subject: [PATCH] Add tile_n=32 and tile_k=32 kernels in generator.py (#858) --- tools/library/scripts/generator.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/library/scripts/generator.py b/tools/library/scripts/generator.py index 8f64de9c..77a5138b 100644 --- a/tools/library/scripts/generator.py +++ b/tools/library/scripts/generator.py @@ -1443,6 +1443,20 @@ def GenerateSM75_TensorOp_8816_TN(manifest, cuda_version): TileDescription([ 64, 128, 64], 2, [2, 2, 1], math_inst, min_cc, max_cc), TileDescription([128, 64, 64], 2, [2, 2, 1], math_inst, min_cc, max_cc), TileDescription([ 64, 64, 64], 2, [2, 2, 1], math_inst, min_cc, max_cc), + TileDescription([ 256, 32, 64], 2, [4, 1, 1], math_inst, min_cc, max_cc), + TileDescription([ 128, 32, 64], 2, [2, 1, 1], math_inst, min_cc, max_cc), + TileDescription([ 64, 32, 64], 2, [2, 1, 1], math_inst, min_cc, max_cc), + + TileDescription([256, 128, 32], 2, [4, 2, 1], math_inst, min_cc, max_cc), + TileDescription([128, 256, 32], 2, [2, 4, 1], math_inst, min_cc, max_cc), + TileDescription([128, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), + TileDescription([ 64, 256, 32], 2, [1, 4, 1], math_inst, min_cc, max_cc), + TileDescription([256, 64, 32], 2, [4, 1, 1], math_inst, min_cc, max_cc), + TileDescription([ 64, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), + TileDescription([128, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), + TileDescription([ 64, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), + TileDescription([ 128, 32, 32], 2, [2, 1, 1], math_inst, min_cc, max_cc), + TileDescription([ 64, 32, 32], 2, [2, 1, 1], math_inst, min_cc, max_cc), ] data_type = [