Add missing schedules argument in SM90 fp16 op generation (#920)
This commit is contained in:
parent
180c5629bf
commit
df02482f1d
@ -4160,7 +4160,7 @@ def GenerateSM90_TensorOp_16b_WGMMA_gemm(manifest, cuda_version):
|
|||||||
elif data_type_mixed["c_type"] in [DataType.f16, DataType.bf16]:
|
elif data_type_mixed["c_type"] in [DataType.f16, DataType.bf16]:
|
||||||
layout[2][1] = 8
|
layout[2][1] = 8
|
||||||
|
|
||||||
CreateGemmUniversal3xOperator(manifest, layouts, tile_descriptions, data_type_mixed)
|
CreateGemmUniversal3xOperator(manifest, layouts, tile_descriptions, data_type_mixed, schedules)
|
||||||
# persistent kernels with TMA epilogues
|
# persistent kernels with TMA epilogues
|
||||||
if data_type_mixed["c_type"] in [DataType.f16, DataType.bf16] and CudaToolkitVersionSatisfies(cuda_version, 12, 1):
|
if data_type_mixed["c_type"] in [DataType.f16, DataType.bf16] and CudaToolkitVersionSatisfies(cuda_version, 12, 1):
|
||||||
CreateGemmUniversal3xOperator(manifest, layouts, tile_descriptions, data_type_mixed,
|
CreateGemmUniversal3xOperator(manifest, layouts, tile_descriptions, data_type_mixed,
|
||||||
|
Loading…
Reference in New Issue
Block a user