add the missing pieces (#392)

Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
This commit is contained in:
Haicheng Wu 2021-12-25 07:29:54 -05:00 committed by GitHub
parent dceabd4c5a
commit f78994bb40
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 8 additions and 0 deletions

View File

@ -41,6 +41,7 @@ class GemmOperation:
complex_operators = [
MathOperation.multiply_add_complex,
MathOperation.multiply_add_complex_gaussian,
MathOperation.multiply_add_complex_fast_f32
]
return self.tile_description.math_instruction.math_operation in complex_operators

View File

@ -2641,6 +2641,9 @@ def GenerateSM80(manifest, args):
GenerateSM80_TensorOp_1688_fast_math(manifest, args)
GenerateSM80_SparseTensorOp_16816_fast_math(manifest, args)
GenerateSM80_TensorOp_1688_complex(manifest, args)
# 3xTF32
GenerateSM80_TensorOp_1688_fast_fp32_math(manifest, args)
GenerateSM80_TensorOp_1688_fast_fp32_math_complex(manifest, args)
GenerateSM80_TensorOp_884(manifest, args)
GenerateSM80_TensorOp_884_complex(manifest, args)
GenerateSM80_TensorOp_884_complex_gaussian(manifest, args)

View File

@ -240,6 +240,8 @@ class MathOperation(enum.Enum):
xor_popc = enum_auto()
multiply_add_fast_bf16 = enum_auto()
multiply_add_fast_f16 = enum_auto()
multiply_add_fast_f32 = enum_auto()
multiply_add_complex_fast_f32 = enum_auto()
multiply_add_complex = enum_auto()
multiply_add_complex_gaussian = enum_auto()
@ -250,6 +252,8 @@ MathOperationTag = {
MathOperation.xor_popc: 'cutlass::arch::OpXorPopc',
MathOperation.multiply_add_fast_bf16: 'cutlass::arch::OpMultiplyAddFastBF16',
MathOperation.multiply_add_fast_f16: 'cutlass::arch::OpMultiplyAddFastF16',
MathOperation.multiply_add_fast_f32: 'cutlass::arch::OpMultiplyAddFastF32',
MathOperation.multiply_add_complex_fast_f32: 'cutlass::arch::OpMultiplyAddComplexFastF32',
MathOperation.multiply_add_complex: 'cutlass::arch::OpMultiplyAddComplex',
MathOperation.multiply_add_complex_gaussian: 'cutlass::arch::OpMultiplyAddGaussianComplex',
}