add the missing pieces (#392)
Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
This commit is contained in:
parent
dceabd4c5a
commit
f78994bb40
@ -41,6 +41,7 @@ class GemmOperation:
|
|||||||
complex_operators = [
|
complex_operators = [
|
||||||
MathOperation.multiply_add_complex,
|
MathOperation.multiply_add_complex,
|
||||||
MathOperation.multiply_add_complex_gaussian,
|
MathOperation.multiply_add_complex_gaussian,
|
||||||
|
MathOperation.multiply_add_complex_fast_f32
|
||||||
]
|
]
|
||||||
return self.tile_description.math_instruction.math_operation in complex_operators
|
return self.tile_description.math_instruction.math_operation in complex_operators
|
||||||
|
|
||||||
|
@ -2641,6 +2641,9 @@ def GenerateSM80(manifest, args):
|
|||||||
GenerateSM80_TensorOp_1688_fast_math(manifest, args)
|
GenerateSM80_TensorOp_1688_fast_math(manifest, args)
|
||||||
GenerateSM80_SparseTensorOp_16816_fast_math(manifest, args)
|
GenerateSM80_SparseTensorOp_16816_fast_math(manifest, args)
|
||||||
GenerateSM80_TensorOp_1688_complex(manifest, args)
|
GenerateSM80_TensorOp_1688_complex(manifest, args)
|
||||||
|
# 3xTF32
|
||||||
|
GenerateSM80_TensorOp_1688_fast_fp32_math(manifest, args)
|
||||||
|
GenerateSM80_TensorOp_1688_fast_fp32_math_complex(manifest, args)
|
||||||
GenerateSM80_TensorOp_884(manifest, args)
|
GenerateSM80_TensorOp_884(manifest, args)
|
||||||
GenerateSM80_TensorOp_884_complex(manifest, args)
|
GenerateSM80_TensorOp_884_complex(manifest, args)
|
||||||
GenerateSM80_TensorOp_884_complex_gaussian(manifest, args)
|
GenerateSM80_TensorOp_884_complex_gaussian(manifest, args)
|
||||||
|
@ -240,6 +240,8 @@ class MathOperation(enum.Enum):
|
|||||||
xor_popc = enum_auto()
|
xor_popc = enum_auto()
|
||||||
multiply_add_fast_bf16 = enum_auto()
|
multiply_add_fast_bf16 = enum_auto()
|
||||||
multiply_add_fast_f16 = enum_auto()
|
multiply_add_fast_f16 = enum_auto()
|
||||||
|
multiply_add_fast_f32 = enum_auto()
|
||||||
|
multiply_add_complex_fast_f32 = enum_auto()
|
||||||
multiply_add_complex = enum_auto()
|
multiply_add_complex = enum_auto()
|
||||||
multiply_add_complex_gaussian = enum_auto()
|
multiply_add_complex_gaussian = enum_auto()
|
||||||
|
|
||||||
@ -250,6 +252,8 @@ MathOperationTag = {
|
|||||||
MathOperation.xor_popc: 'cutlass::arch::OpXorPopc',
|
MathOperation.xor_popc: 'cutlass::arch::OpXorPopc',
|
||||||
MathOperation.multiply_add_fast_bf16: 'cutlass::arch::OpMultiplyAddFastBF16',
|
MathOperation.multiply_add_fast_bf16: 'cutlass::arch::OpMultiplyAddFastBF16',
|
||||||
MathOperation.multiply_add_fast_f16: 'cutlass::arch::OpMultiplyAddFastF16',
|
MathOperation.multiply_add_fast_f16: 'cutlass::arch::OpMultiplyAddFastF16',
|
||||||
|
MathOperation.multiply_add_fast_f32: 'cutlass::arch::OpMultiplyAddFastF32',
|
||||||
|
MathOperation.multiply_add_complex_fast_f32: 'cutlass::arch::OpMultiplyAddComplexFastF32',
|
||||||
MathOperation.multiply_add_complex: 'cutlass::arch::OpMultiplyAddComplex',
|
MathOperation.multiply_add_complex: 'cutlass::arch::OpMultiplyAddComplex',
|
||||||
MathOperation.multiply_add_complex_gaussian: 'cutlass::arch::OpMultiplyAddGaussianComplex',
|
MathOperation.multiply_add_complex_gaussian: 'cutlass::arch::OpMultiplyAddGaussianComplex',
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user