add the missing pieces (#392)
Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
This commit is contained in:
parent
dceabd4c5a
commit
f78994bb40
@ -41,6 +41,7 @@ class GemmOperation:
|
||||
complex_operators = [
|
||||
MathOperation.multiply_add_complex,
|
||||
MathOperation.multiply_add_complex_gaussian,
|
||||
MathOperation.multiply_add_complex_fast_f32
|
||||
]
|
||||
return self.tile_description.math_instruction.math_operation in complex_operators
|
||||
|
||||
|
@ -2641,6 +2641,9 @@ def GenerateSM80(manifest, args):
|
||||
GenerateSM80_TensorOp_1688_fast_math(manifest, args)
|
||||
GenerateSM80_SparseTensorOp_16816_fast_math(manifest, args)
|
||||
GenerateSM80_TensorOp_1688_complex(manifest, args)
|
||||
# 3xTF32
|
||||
GenerateSM80_TensorOp_1688_fast_fp32_math(manifest, args)
|
||||
GenerateSM80_TensorOp_1688_fast_fp32_math_complex(manifest, args)
|
||||
GenerateSM80_TensorOp_884(manifest, args)
|
||||
GenerateSM80_TensorOp_884_complex(manifest, args)
|
||||
GenerateSM80_TensorOp_884_complex_gaussian(manifest, args)
|
||||
|
@ -240,6 +240,8 @@ class MathOperation(enum.Enum):
|
||||
xor_popc = enum_auto()
|
||||
multiply_add_fast_bf16 = enum_auto()
|
||||
multiply_add_fast_f16 = enum_auto()
|
||||
multiply_add_fast_f32 = enum_auto()
|
||||
multiply_add_complex_fast_f32 = enum_auto()
|
||||
multiply_add_complex = enum_auto()
|
||||
multiply_add_complex_gaussian = enum_auto()
|
||||
|
||||
@ -250,6 +252,8 @@ MathOperationTag = {
|
||||
MathOperation.xor_popc: 'cutlass::arch::OpXorPopc',
|
||||
MathOperation.multiply_add_fast_bf16: 'cutlass::arch::OpMultiplyAddFastBF16',
|
||||
MathOperation.multiply_add_fast_f16: 'cutlass::arch::OpMultiplyAddFastF16',
|
||||
MathOperation.multiply_add_fast_f32: 'cutlass::arch::OpMultiplyAddFastF32',
|
||||
MathOperation.multiply_add_complex_fast_f32: 'cutlass::arch::OpMultiplyAddComplexFastF32',
|
||||
MathOperation.multiply_add_complex: 'cutlass::arch::OpMultiplyAddComplex',
|
||||
MathOperation.multiply_add_complex_gaussian: 'cutlass::arch::OpMultiplyAddGaussianComplex',
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user