Add tile_n=32 and tile_k=32 kernels in generator.py (#858)

This commit is contained in:
Shuai Shao 2023-04-06 07:00:52 -07:00 committed by GitHub
parent 0435979f59
commit e2d439ee7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1443,6 +1443,20 @@ def GenerateSM75_TensorOp_8816_TN(manifest, cuda_version):
TileDescription([ 64, 128, 64], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([128, 64, 64], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([ 64, 64, 64], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([ 256, 32, 64], 2, [4, 1, 1], math_inst, min_cc, max_cc),
TileDescription([ 128, 32, 64], 2, [2, 1, 1], math_inst, min_cc, max_cc),
TileDescription([ 64, 32, 64], 2, [2, 1, 1], math_inst, min_cc, max_cc),
TileDescription([256, 128, 32], 2, [4, 2, 1], math_inst, min_cc, max_cc),
TileDescription([128, 256, 32], 2, [2, 4, 1], math_inst, min_cc, max_cc),
TileDescription([128, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([ 64, 256, 32], 2, [1, 4, 1], math_inst, min_cc, max_cc),
TileDescription([256, 64, 32], 2, [4, 1, 1], math_inst, min_cc, max_cc),
TileDescription([ 64, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([128, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([ 64, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
TileDescription([ 128, 32, 32], 2, [2, 1, 1], math_inst, min_cc, max_cc),
TileDescription([ 64, 32, 32], 2, [2, 1, 1], math_inst, min_cc, max_cc),
]
data_type = [