2023-09-27 05:24:26 +08:00
|
|
|
#################################################################################################
|
|
|
|
#
|
|
|
|
# Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
|
|
# SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
#
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
|
|
# modification, are permitted provided that the following conditions are met:
|
|
|
|
#
|
|
|
|
# 1. Redistributions of source code must retain the above copyright notice, this
|
|
|
|
# list of conditions and the following disclaimer.
|
|
|
|
#
|
|
|
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
|
|
# and/or other materials provided with the distribution.
|
|
|
|
#
|
|
|
|
# 3. Neither the name of the copyright holder nor the names of its
|
|
|
|
# contributors may be used to endorse or promote products derived from
|
|
|
|
# this software without specific prior written permission.
|
|
|
|
#
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
|
|
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
|
|
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
|
|
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#
|
|
|
|
#################################################################################################
|
|
|
|
|
|
|
|
"""
|
|
|
|
Utilities for filtering CUTLASS library kernels and emitting library intitialization
|
|
|
|
and building code
|
|
|
|
"""
|
|
|
|
|
|
|
|
import enum
|
2023-11-02 23:09:05 +08:00
|
|
|
import logging
|
2023-09-27 05:24:26 +08:00
|
|
|
import os.path
|
|
|
|
import shutil
|
|
|
|
|
2023-11-02 23:09:05 +08:00
|
|
|
try:
|
|
|
|
import builtins
|
|
|
|
if hasattr(builtins, "CUTLASS_IGNORE_PACKAGE") and CUTLASS_IGNORE_PACKAGE == True:
|
|
|
|
raise ImportError("Disabling attempt to import cutlass_library")
|
|
|
|
from cutlass_library.library import *
|
|
|
|
from cutlass_library.gemm_operation import *
|
|
|
|
from cutlass_library.rank_k_operation import *
|
|
|
|
from cutlass_library.rank_2k_operation import *
|
|
|
|
from cutlass_library.trmm_operation import *
|
|
|
|
from cutlass_library.symm_operation import *
|
|
|
|
from cutlass_library.conv2d_operation import *
|
|
|
|
from cutlass_library.conv3d_operation import *
|
|
|
|
except ImportError:
|
|
|
|
from library import *
|
|
|
|
from gemm_operation import *
|
|
|
|
from rank_k_operation import *
|
|
|
|
from rank_2k_operation import *
|
|
|
|
from trmm_operation import *
|
|
|
|
from symm_operation import *
|
|
|
|
from conv2d_operation import *
|
|
|
|
from conv3d_operation import *
|
2023-09-27 05:24:26 +08:00
|
|
|
|
|
|
|
###################################################################################################
|
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
class EmitOperationKindAll:
|
|
|
|
def __init__(self, generated_path, kind, args):
|
|
|
|
self.generated_path = generated_path
|
|
|
|
self.kind = kind
|
|
|
|
self.args = args
|
|
|
|
|
|
|
|
self.header_template ="""
|
|
|
|
/*
|
|
|
|
Generated by manifest.py - Do not edit.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "cutlass/cutlass.h"
|
|
|
|
#include "cutlass/library/library.h"
|
|
|
|
#include "cutlass/library/manifest.h"
|
|
|
|
|
|
|
|
namespace cutlass {
|
|
|
|
namespace library {
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.entry_template = """
|
|
|
|
|
|
|
|
//
|
|
|
|
// Entry point to construct operations
|
|
|
|
//
|
|
|
|
void initialize_all_${operation_name}_operations(Manifest &manifest) {
|
|
|
|
"""
|
|
|
|
self.configuration_prototype_template = "void initialize_${configuration_name}(Manifest &manifest);\n"
|
|
|
|
self.configuration_template =" initialize_${configuration_name}(manifest);\n"
|
|
|
|
|
|
|
|
self.epilogue_template ="""}
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
} // namespace library
|
|
|
|
} // namespace cutlass
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
#
|
|
|
|
def __enter__(self):
|
|
|
|
self.operation_path = os.path.join(self.generated_path, OperationKindNames[self.kind])
|
|
|
|
os.makedirs(self.operation_path, exist_ok=True)
|
|
|
|
|
|
|
|
self.top_level_path = os.path.join(self.operation_path, f"all_{OperationKindNames[self.kind]}_operations.cu")
|
|
|
|
|
|
|
|
self.top_level_file = open(self.top_level_path, "w")
|
|
|
|
self.top_level_file.write(self.header_template)
|
|
|
|
|
|
|
|
self.source_files = [self.top_level_path,]
|
|
|
|
|
|
|
|
self.configurations = []
|
|
|
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
#
|
|
|
|
def emit(self, operations):
|
|
|
|
for min_cc, configurations in sorted(operations.items()):
|
|
|
|
for configuration_name, _ in configurations.items():
|
|
|
|
self.configurations.append(configuration_name)
|
|
|
|
self.top_level_file.write(SubstituteTemplate(self.configuration_prototype_template, {'configuration_name': configuration_name} ))
|
|
|
|
|
|
|
|
#
|
|
|
|
def __exit__(self, exception_type, exception_value, traceback):
|
|
|
|
self.top_level_file.write(SubstituteTemplate(self.entry_template, {'operation_name': OperationKindNames[self.kind]}))
|
|
|
|
|
|
|
|
for configuration_name in self.configurations:
|
|
|
|
self.top_level_file.write(SubstituteTemplate(self.configuration_template, {'configuration_name': configuration_name}))
|
|
|
|
|
|
|
|
self.top_level_file.write(self.epilogue_template)
|
|
|
|
self.top_level_file.close()
|
|
|
|
|
|
|
|
|
|
|
|
class EmitOperationKindLibrary:
|
|
|
|
def __init__(self, generated_path, min_cc, kind, args):
|
|
|
|
self.generated_path = generated_path
|
|
|
|
self.min_cc = min_cc
|
|
|
|
self.kind = kind
|
|
|
|
self.args = args
|
|
|
|
self.emitters = {
|
|
|
|
OperationKind.Gemm: EmitGemmConfigurationLibrary,
|
|
|
|
OperationKind.Conv2d: EmitConv2dConfigurationLibrary,
|
|
|
|
OperationKind.Conv3d: EmitConv3dConfigurationLibrary,
|
|
|
|
OperationKind.RankK: EmitRankKConfigurationLibrary,
|
|
|
|
OperationKind.Rank2K: EmitRank2KConfigurationLibrary,
|
|
|
|
OperationKind.Trmm: EmitTrmmConfigurationLibrary,
|
|
|
|
OperationKind.Symm: EmitSymmConfigurationLibrary
|
|
|
|
}
|
|
|
|
|
|
|
|
self.header_template ="""
|
|
|
|
/*
|
|
|
|
Generated by manifest.py - Do not edit.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "cutlass/cutlass.h"
|
|
|
|
#include "cutlass/library/library.h"
|
|
|
|
#include "cutlass/library/manifest.h"
|
|
|
|
|
|
|
|
namespace cutlass {
|
|
|
|
namespace library {
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
"""
|
|
|
|
self.entry_template = """
|
|
|
|
|
|
|
|
//
|
|
|
|
// Entry point to construct operations
|
|
|
|
//
|
|
|
|
void initialize_all_sm${min_cc}_${subclass_name}_${operation_name}_operations(Manifest &manifest) {
|
|
|
|
"""
|
|
|
|
self.configuration_prototype_template = "void initialize_${configuration_name}(Manifest &manifest);\n"
|
|
|
|
self.configuration_template = " initialize_${configuration_name}(manifest);\n"
|
|
|
|
self.subclass_call_template = " initialize_all_sm${min_cc}_${subclass_name}_${operation_name}_operations(manifest);\n"
|
2023-12-05 22:50:49 +08:00
|
|
|
self.subclass_prototype_template = "void initialize_all_sm${min_cc}_${subclass_name}_${operation_name}_operations(Manifest &manifest);\n"
|
2023-09-27 05:24:26 +08:00
|
|
|
self.epilogue_template ="""}
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
} // namespace library
|
|
|
|
} // namespace cutlass
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
#
|
|
|
|
def __enter__(self):
|
|
|
|
self.operation_path = os.path.join(self.generated_path, OperationKindNames[self.kind], str(self.min_cc))
|
|
|
|
os.makedirs(self.operation_path)
|
|
|
|
|
|
|
|
self.top_level_path = os.path.join(self.operation_path, f"all_sm{self.min_cc}_{OperationKindNames[self.kind]}_operations.cu")
|
|
|
|
|
|
|
|
self.top_level_file = open(self.top_level_path, "w")
|
|
|
|
self.top_level_file.write(self.header_template)
|
|
|
|
|
|
|
|
self.source_files = {}
|
|
|
|
|
|
|
|
# Each {operation_kind x cc} combination is further decomposed by the instruction
|
|
|
|
# types used. This dictionary used to track the file handles for the top-level
|
|
|
|
# files of each subclass
|
|
|
|
self.subclass_files = {}
|
|
|
|
|
|
|
|
# Configurations in each sub class
|
|
|
|
self.subclass_configurations = {}
|
|
|
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
#
|
|
|
|
def emit(self, configuration_name, operations):
|
|
|
|
assert len(operations) > 0
|
|
|
|
|
|
|
|
# The extended name for all operations of a given configuration_name is guaranteed
|
|
|
|
# to be the same because extended_name() is used in defining configuration_name. Thus,
|
|
|
|
# we can safely use the extended_name() of the first operation.
|
|
|
|
extended_name = operations[0].extended_name()
|
|
|
|
|
|
|
|
# Create a directory for operations with this subclass if it does not exist
|
|
|
|
if extended_name not in self.subclass_files:
|
|
|
|
subclass_path = os.path.join(self.operation_path, extended_name)
|
|
|
|
os.mkdir(subclass_path)
|
|
|
|
|
|
|
|
self.subclass_configurations[extended_name] = []
|
|
|
|
|
|
|
|
# Open a new top-level file for this sub class
|
|
|
|
subclass_top_level_path = os.path.join(
|
|
|
|
subclass_path, f"all_sm{self.min_cc}_{extended_name}_{OperationKindNames[self.kind]}_operations.cu")
|
|
|
|
self.subclass_files[extended_name] = open(subclass_top_level_path, "w")
|
|
|
|
self.subclass_files[extended_name].write(self.header_template)
|
|
|
|
|
|
|
|
self.source_files[extended_name] = [subclass_top_level_path]
|
|
|
|
|
|
|
|
subclass_dir = os.path.dirname(self.subclass_files[extended_name].name)
|
|
|
|
with self.emitters[self.kind](subclass_dir, configuration_name) as configuration_emitter:
|
|
|
|
for operation in operations:
|
|
|
|
configuration_emitter.emit(operation)
|
|
|
|
|
|
|
|
self.source_files[extended_name].append(configuration_emitter.configuration_path)
|
|
|
|
|
|
|
|
self.subclass_configurations[extended_name].append(configuration_name)
|
|
|
|
self.subclass_files[extended_name].write(SubstituteTemplate(self.configuration_prototype_template, {'configuration_name': configuration_name} ))
|
|
|
|
|
|
|
|
#
|
|
|
|
def __exit__(self, exception_type, exception_value, traceback):
|
2023-11-30 13:25:40 +08:00
|
|
|
for subclass_name, subclass_file in sorted(self.subclass_files.items()):
|
|
|
|
subclass_cfg = {
|
|
|
|
'min_cc': str(self.min_cc),
|
|
|
|
'subclass_name': subclass_name,
|
|
|
|
'operation_name': OperationKindNames[self.kind]
|
|
|
|
}
|
|
|
|
self.top_level_file.write(SubstituteTemplate(self.subclass_prototype_template, subclass_cfg))
|
|
|
|
|
2023-09-27 05:24:26 +08:00
|
|
|
self.top_level_file.write(
|
|
|
|
SubstituteTemplate(self.entry_template, {
|
|
|
|
'min_cc': str(self.min_cc),
|
|
|
|
'subclass_name': '',
|
|
|
|
'operation_name': OperationKindNames[self.kind]
|
|
|
|
}))
|
|
|
|
|
|
|
|
# Finish and close all subclass files
|
|
|
|
for subclass_name, subclass_file in sorted(self.subclass_files.items()):
|
|
|
|
subclass_cfg = {
|
|
|
|
'min_cc': str(self.min_cc),
|
|
|
|
'subclass_name': subclass_name,
|
|
|
|
'operation_name': OperationKindNames[self.kind]
|
|
|
|
}
|
|
|
|
subclass_file.write(SubstituteTemplate(self.entry_template, subclass_cfg))
|
|
|
|
|
|
|
|
for configuration in self.subclass_configurations[subclass_name]:
|
|
|
|
subclass_file.write(
|
|
|
|
SubstituteTemplate(self.configuration_template, {
|
|
|
|
'configuration_name': configuration
|
|
|
|
}))
|
|
|
|
|
|
|
|
subclass_file.write(self.epilogue_template)
|
|
|
|
subclass_file.close()
|
|
|
|
|
|
|
|
# Write the call to initialize_all for this subclass to the top-level file
|
|
|
|
self.top_level_file.write(SubstituteTemplate(self.subclass_call_template, subclass_cfg))
|
|
|
|
|
|
|
|
self.top_level_file.write(self.epilogue_template)
|
|
|
|
self.top_level_file.close()
|
|
|
|
|
|
|
|
class EmitInterfaceLibrary:
|
|
|
|
def __init__(self, generated_path, operation_count, args):
|
|
|
|
self.generated_path = generated_path
|
|
|
|
self.args = args
|
|
|
|
|
|
|
|
self.prototypes = []
|
|
|
|
self.fn_calls = []
|
|
|
|
self.operation_count = str(operation_count)
|
|
|
|
|
|
|
|
self.top_level_hdr_template = '''
|
|
|
|
/*
|
|
|
|
Generated by manifest.py - Do not edit.
|
|
|
|
*/
|
|
|
|
'''
|
|
|
|
self.top_level_prologue = '''
|
|
|
|
|
|
|
|
#include "cutlass/library/library.h"
|
|
|
|
#include "cutlass/library/manifest.h"
|
|
|
|
|
|
|
|
namespace cutlass {
|
|
|
|
\tnamespace library {
|
|
|
|
|
|
|
|
${prototypes}
|
|
|
|
'''
|
|
|
|
|
|
|
|
self.top_level_initialize_kind = '''
|
|
|
|
\t\tvoid initialize_all_${kind}_operations(Manifest &manifest) {
|
|
|
|
${fn_calls}
|
|
|
|
\t\t}
|
|
|
|
'''
|
|
|
|
|
|
|
|
self.top_level_initialize = '''
|
|
|
|
\t\tvoid initialize_all(Manifest &manifest) {
|
|
|
|
\t\t\tmanifest.reserve(${operation_count});\n
|
|
|
|
${fn_calls}
|
|
|
|
\t\t}
|
|
|
|
'''
|
|
|
|
|
|
|
|
self.top_level_suffix = '''
|
|
|
|
\t} // namespace library
|
|
|
|
} // namespace cutlass
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
#
|
|
|
|
def __enter__(self):
|
|
|
|
self.top_level_path = os.path.join(self.generated_path, 'initialize_all.cpp')
|
|
|
|
|
|
|
|
self.top_level_file = open(self.top_level_path, "w")
|
|
|
|
self.top_level_file.write(self.top_level_hdr_template)
|
|
|
|
|
|
|
|
self.source_files = [self.top_level_path,]
|
|
|
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
#
|
|
|
|
def emit(self, operation_name):
|
|
|
|
self.prototypes.append(SubstituteTemplate(
|
|
|
|
"\t\tvoid initialize_all_${operation_kind}_operations(Manifest &manifest);",
|
|
|
|
{'operation_kind': operation_name}))
|
|
|
|
|
|
|
|
self.fn_calls.append(SubstituteTemplate(
|
|
|
|
"\t\t\tinitialize_all_${operation_kind}_operations(manifest);",
|
|
|
|
{'operation_kind': operation_name}))
|
|
|
|
|
|
|
|
#
|
|
|
|
def __exit__(self, exception_type, exception_value, traceback):
|
|
|
|
self.top_level_file.write(SubstituteTemplate(self.top_level_prologue, {'prototypes':"\n".join(self.prototypes)}))
|
|
|
|
|
|
|
|
# Write out initialize_all method
|
|
|
|
self.top_level_file.write(SubstituteTemplate(self.top_level_initialize,
|
|
|
|
{'operation_count': self.operation_count, 'fn_calls':"\n".join(self.fn_calls)}))
|
|
|
|
|
|
|
|
self.top_level_file.write(self.top_level_suffix)
|
|
|
|
self.top_level_file.close()
|
|
|
|
|
|
|
|
###################################################################################################
|
|
|
|
###################################################################################################
|
|
|
|
|
|
|
|
class Options:
|
|
|
|
def __init__(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
###################################################################################################
|
|
|
|
|
|
|
|
#
|
|
|
|
class Manifest:
|
|
|
|
|
|
|
|
#
|
|
|
|
def __init__(self, args = None):
|
|
|
|
self.operations = {}
|
|
|
|
self.args = args
|
|
|
|
self.operation_count = 0
|
|
|
|
self.operations_by_name = {}
|
|
|
|
|
|
|
|
self.kernel_filter = ''
|
|
|
|
self.kernel_filter_list = []
|
|
|
|
self.kernel_names = []
|
|
|
|
self.operations_enabled = []
|
|
|
|
self.selected_kernels = []
|
|
|
|
self.ignore_kernel_names = []
|
|
|
|
self.compute_capabilities = [50,]
|
|
|
|
self.curr_build_dir = '.'
|
|
|
|
self.filter_by_cc = True
|
|
|
|
|
|
|
|
if self.args:
|
|
|
|
self.kernel_filter = self.args.kernels
|
|
|
|
self.curr_build_dir = args.curr_build_dir
|
|
|
|
|
|
|
|
architectures = args.architectures.split(';') if len(args.architectures) else ['50',]
|
|
|
|
architectures = [x if x != '90a' else '90' for x in architectures]
|
|
|
|
self.compute_capabilities = [int(x) for x in architectures]
|
|
|
|
|
|
|
|
if args.filter_by_cc in ['false', 'False', '0']:
|
|
|
|
self.filter_by_cc = False
|
|
|
|
|
|
|
|
if args.operations == 'all':
|
|
|
|
self.operations_enabled = []
|
|
|
|
else:
|
|
|
|
operations_list = [
|
|
|
|
OperationKind.Gemm
|
|
|
|
, OperationKind.Conv2d
|
|
|
|
, OperationKind.Conv3d
|
|
|
|
, OperationKind.RankK
|
|
|
|
, OperationKind.Trmm
|
|
|
|
, OperationKind.Symm
|
|
|
|
]
|
|
|
|
self.operations_enabled = [x for x in operations_list if OperationKindNames[x] in args.operations.split(',')]
|
|
|
|
|
|
|
|
if args.kernels == 'all':
|
|
|
|
self.kernel_names = []
|
|
|
|
else:
|
|
|
|
self.kernel_names = [x for x in args.kernels.split(',') if x != '']
|
|
|
|
|
|
|
|
self.ignore_kernel_names = [x for x in args.ignore_kernels.split(',') if x != '']
|
|
|
|
|
|
|
|
if args.kernel_filter_file is None:
|
|
|
|
self.kernel_filter_list = []
|
|
|
|
else:
|
|
|
|
self.kernel_filter_list = self.get_kernel_filters(args.kernel_filter_file)
|
2023-12-30 04:21:31 +08:00
|
|
|
_LOGGER.debug("Using {filter_count} kernel filters from {filter_file}".format(
|
2023-09-27 05:24:26 +08:00
|
|
|
filter_count = len(self.kernel_filter_list),
|
|
|
|
filter_file = args.kernel_filter_file))
|
|
|
|
|
|
|
|
self.operation_count = 0
|
|
|
|
self.operations_by_name = {}
|
|
|
|
self.disable_full_archs_compilation = args.disable_full_archs_compilation
|
|
|
|
|
|
|
|
|
|
|
|
def get_kernel_filters (self, kernelListFile):
|
|
|
|
if os.path.isfile(kernelListFile):
|
|
|
|
with open(kernelListFile, 'r') as fileReader:
|
|
|
|
lines = [line.rstrip() for line in fileReader if not line.startswith("#")]
|
|
|
|
|
|
|
|
lines = [re.compile(line) for line in lines if line]
|
|
|
|
return lines
|
|
|
|
else:
|
|
|
|
return []
|
|
|
|
|
|
|
|
#
|
|
|
|
def filter_out_kernels(self, kernel_name, kernel_filter_list):
|
|
|
|
|
|
|
|
for kernel_filter_re in kernel_filter_list:
|
|
|
|
if kernel_filter_re.search(kernel_name) is not None:
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
def _filter_string_matches(self, filter_string, haystack):
|
|
|
|
''' Returns true if all substrings appear in the haystack in order'''
|
|
|
|
substrings = filter_string.split('*')
|
|
|
|
for sub in substrings:
|
|
|
|
idx = haystack.find(sub)
|
|
|
|
if idx < 0:
|
|
|
|
return False
|
|
|
|
haystack = haystack[idx + len(sub):]
|
|
|
|
return True
|
|
|
|
|
|
|
|
#
|
|
|
|
def filter(self, operation):
|
|
|
|
''' Filtering operations based on various criteria'''
|
|
|
|
|
|
|
|
# filter based on compute capability
|
|
|
|
enabled = not (self.filter_by_cc)
|
|
|
|
|
|
|
|
for cc in self.compute_capabilities:
|
|
|
|
if cc >= operation.tile_description.minimum_compute_capability and \
|
|
|
|
cc <= operation.tile_description.maximum_compute_capability and \
|
|
|
|
(cc not in SharedMemPerCC or SharedMemPerCC[cc] >= CalculateSmemUsage(operation)):
|
|
|
|
|
|
|
|
enabled = True
|
|
|
|
break
|
|
|
|
|
|
|
|
if not enabled:
|
|
|
|
return False
|
|
|
|
|
|
|
|
if len(self.operations_enabled) and not operation.operation_kind in self.operations_enabled:
|
|
|
|
return False
|
|
|
|
|
|
|
|
# eliminate duplicates
|
|
|
|
if operation.procedural_name() in self.operations_by_name.keys():
|
|
|
|
return False
|
|
|
|
|
|
|
|
# Filter based on list of valid substrings
|
|
|
|
if len(self.kernel_names):
|
|
|
|
name = operation.procedural_name()
|
|
|
|
enabled = False
|
|
|
|
|
|
|
|
# compare against the include list
|
|
|
|
for name_substr in self.kernel_names:
|
|
|
|
if self._filter_string_matches(name_substr, name):
|
|
|
|
_LOGGER.debug("Kernel {kernel} included due to filter string '{filt}'.".format(
|
|
|
|
kernel = operation.procedural_name(),
|
|
|
|
filt = name_substr))
|
|
|
|
enabled = True
|
|
|
|
break
|
|
|
|
|
|
|
|
# compare against the exclude list
|
|
|
|
for name_substr in self.ignore_kernel_names:
|
|
|
|
if self._filter_string_matches(name_substr, name):
|
|
|
|
_LOGGER.debug("Kernel {kernel} ignored due to filter string '{filt}'.".format(
|
|
|
|
kernel = operation.procedural_name(),
|
|
|
|
filt = name_substr))
|
|
|
|
enabled = False
|
|
|
|
break
|
|
|
|
|
|
|
|
if len(self.kernel_filter_list) > 0:
|
|
|
|
if self.filter_out_kernels(operation.procedural_name(), self.kernel_filter_list):
|
|
|
|
_LOGGER.debug("Kernel {kernel} matched via kernel filter file.".format(kernel = operation.procedural_name()))
|
|
|
|
enabled = True
|
|
|
|
else:
|
|
|
|
_LOGGER.debug("Kernel {kernel} culled due to no match in kernel filter file.".format(kernel = operation.procedural_name()))
|
|
|
|
enabled = False
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: filter based on compute data type
|
|
|
|
return enabled
|
|
|
|
#
|
|
|
|
|
|
|
|
#
|
|
|
|
def append(self, operation):
|
|
|
|
'''
|
|
|
|
Inserts the operation.
|
|
|
|
|
|
|
|
operation_kind -> configuration_name -> []
|
|
|
|
'''
|
|
|
|
|
|
|
|
if self.filter(operation):
|
|
|
|
|
|
|
|
self.selected_kernels.append(operation.procedural_name())
|
|
|
|
|
|
|
|
self.operations_by_name[operation.procedural_name()] = operation
|
|
|
|
|
|
|
|
# add the configuration
|
|
|
|
configuration_name = operation.configuration_name()
|
|
|
|
|
|
|
|
# Split operations by minimum CC
|
|
|
|
min_cc = operation.arch
|
|
|
|
|
|
|
|
if operation.operation_kind not in self.operations.keys():
|
|
|
|
self.operations[operation.operation_kind] = {}
|
|
|
|
|
|
|
|
if min_cc not in self.operations[operation.operation_kind]:
|
|
|
|
self.operations[operation.operation_kind][min_cc] = {}
|
|
|
|
|
|
|
|
if configuration_name not in self.operations[operation.operation_kind][min_cc].keys():
|
|
|
|
self.operations[operation.operation_kind][min_cc][configuration_name] = []
|
|
|
|
|
|
|
|
self.operations[operation.operation_kind][min_cc][configuration_name].append(operation)
|
|
|
|
self.operation_count += 1
|
|
|
|
else:
|
|
|
|
_LOGGER.debug("Culled {} from manifest".format(operation.procedural_name()))
|
|
|
|
#
|
|
|
|
|
|
|
|
def emit_manifest_cmake(self, manifest_path, top_level_path, source_files):
|
|
|
|
with open(manifest_path, "w") as manifest_file:
|
|
|
|
|
|
|
|
target_text = SubstituteTemplate("""cutlass_target_sources(cutlass_library_objs PRIVATE
|
|
|
|
""", { })
|
|
|
|
manifest_file.write(target_text + '\n\n')
|
|
|
|
manifest_file.write(" %s\n" % str(top_level_path.replace('\\', '/')))
|
|
|
|
generated_path = os.path.join(self.curr_build_dir, 'generated')
|
|
|
|
for kind in self.operations.keys():
|
|
|
|
kind_str = OperationKindNames[kind]
|
|
|
|
all_kind_file = os.path.join(generated_path, kind_str, f"all_{kind_str}_operations.cu").replace('\\', '/')
|
|
|
|
manifest_file.write(f" {all_kind_file}\n")
|
|
|
|
manifest_file.write(')\n\n')
|
|
|
|
|
|
|
|
for kind in self.operations.keys():
|
|
|
|
for min_cc in sorted(self.operations[kind].keys()):
|
|
|
|
for subclass in sorted(source_files[kind][min_cc].keys()):
|
|
|
|
target_text = SubstituteTemplate("""cutlass_add_cutlass_library(
|
|
|
|
SUFFIX ${kind}_sm${min_cc}_${subclass}
|
|
|
|
""", { 'min_cc': str(min_cc), 'kind': OperationKindNames[kind], 'subclass': subclass })
|
|
|
|
manifest_file.write(target_text + '\n\n')
|
|
|
|
|
|
|
|
for source_file in source_files[kind][min_cc][subclass]:
|
|
|
|
manifest_file.write(" %s\n" % str(source_file.replace('\\', '/')))
|
|
|
|
|
|
|
|
manifest_file.write(")\n")
|
|
|
|
|
|
|
|
if self.disable_full_archs_compilation:
|
|
|
|
self.emit_disable_full_archs_compilation(manifest_file, source_files)
|
|
|
|
|
|
|
|
def emit_disable_full_archs_compilation(manifest_file, source_files):
|
|
|
|
def for_hopper(name):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def for_ampere(name):
|
|
|
|
return "16816" in name or \
|
|
|
|
"16832" in name or \
|
|
|
|
"16864" in name or \
|
|
|
|
("1688" in name and "tf32" in name)
|
|
|
|
|
|
|
|
def for_turing(name):
|
|
|
|
return ("1688" in name and "tf32" not in name) or \
|
|
|
|
"8816" in name
|
|
|
|
|
|
|
|
def for_volta(name):
|
|
|
|
return "884" in name
|
|
|
|
|
|
|
|
def is_cpp(name):
|
|
|
|
return name.endswith(".cpp")
|
|
|
|
|
|
|
|
def get_src_archs_str_given_requested_cuda_archs(archs, source_file):
|
|
|
|
intersected_archs = archs & set(self.compute_capabilities)
|
|
|
|
if intersected_archs == set():
|
|
|
|
raise RuntimeError(
|
|
|
|
"""
|
|
|
|
Empty archs set for file {} after taking
|
|
|
|
the intersection of {} (global requested archs) and
|
|
|
|
{} (per file requested archs)
|
|
|
|
""".format(source_file, set(self.compute_capabilities), archs))
|
|
|
|
else:
|
|
|
|
return " ".join(map(str, intersected_archs))
|
|
|
|
|
|
|
|
for min_cc in sorted(source_files.keys()):
|
|
|
|
for source_file in source_files[min_cc]:
|
|
|
|
if is_cpp(source_file):
|
|
|
|
continue # skip because source is cpp
|
|
|
|
elif for_ampere(source_file):
|
|
|
|
archs_str = get_src_archs_str_given_requested_cuda_archs({80, 87, 90}, source_file)
|
|
|
|
elif for_turing(source_file):
|
|
|
|
archs_str = get_src_archs_str_given_requested_cuda_archs({75}, source_file)
|
|
|
|
elif for_volta(source_file):
|
|
|
|
archs_str = get_src_archs_str_given_requested_cuda_archs({70, 72}, source_file)
|
|
|
|
else:
|
|
|
|
raise RuntimeError("Per file archs are not set {}, as there is no rule specified for this file pattern".format(source_file))
|
|
|
|
|
|
|
|
manifest_file.write("cutlass_apply_cuda_gencode_flags({} SM_ARCHS {})\n".format(str(source_file.replace('\\', '/')), archs_str))
|
|
|
|
|
|
|
|
#
|
|
|
|
def emit(self, target = GeneratorTarget.Library):
|
|
|
|
|
|
|
|
operation_emitters = {
|
|
|
|
GeneratorTarget.Library: EmitOperationKindLibrary
|
|
|
|
}
|
|
|
|
|
|
|
|
# Emitters for all operations that fall under a particular kind (e.g., GEMM, Conv2d)
|
|
|
|
kind_emitters = {
|
|
|
|
GeneratorTarget.Library: EmitOperationKindAll
|
|
|
|
}
|
|
|
|
|
|
|
|
interface_emitters = {
|
|
|
|
GeneratorTarget.Library: EmitInterfaceLibrary
|
|
|
|
}
|
|
|
|
|
|
|
|
generated_path = os.path.join(self.curr_build_dir, 'generated')
|
|
|
|
|
|
|
|
# create generated/
|
|
|
|
if os.path.exists(generated_path):
|
|
|
|
shutil.rmtree(generated_path)
|
|
|
|
|
|
|
|
os.mkdir(generated_path)
|
|
|
|
|
|
|
|
with interface_emitters[target](generated_path, self.operation_count, self.args) as iface_emitter:
|
|
|
|
top_level_path = iface_emitter.top_level_path
|
|
|
|
for operation_kind in self.operations.keys():
|
|
|
|
iface_emitter.emit(OperationKindNames[operation_kind])
|
|
|
|
|
|
|
|
source_files = {}
|
|
|
|
for kind in self.operations.keys():
|
|
|
|
source_files[kind] = {}
|
|
|
|
for min_cc in self.operations[kind].keys():
|
|
|
|
source_files[kind][min_cc] = {}
|
|
|
|
|
|
|
|
for operation_kind, ops in self.operations.items():
|
|
|
|
for min_cc, configurations in sorted(ops.items()):
|
|
|
|
with operation_emitters[target](generated_path, min_cc, operation_kind, self.args) as operation_kind_emitter:
|
|
|
|
for configuration_name, operations in configurations.items():
|
|
|
|
_LOGGER.info("Emitting {config} with {num_ops} operations.".format(
|
|
|
|
config = configuration_name, num_ops = len(operations)))
|
|
|
|
operation_kind_emitter.emit(configuration_name, operations)
|
|
|
|
|
|
|
|
for subclass, files in operation_kind_emitter.source_files.items():
|
|
|
|
if subclass not in source_files[operation_kind][min_cc]:
|
|
|
|
source_files[operation_kind][min_cc][subclass] = []
|
|
|
|
source_files[operation_kind][min_cc][subclass].extend(operation_kind_emitter.source_files[subclass])
|
|
|
|
|
|
|
|
# Emit top level all_{gemm, conv2d, ...}_operations.cu files
|
|
|
|
with kind_emitters[target](generated_path, operation_kind, self.args) as operation_kind_emitter:
|
|
|
|
operation_kind_emitter.emit(ops)
|
|
|
|
|
|
|
|
# write the manifest.cmake file containing paths from all targets
|
|
|
|
manifest_path = os.path.join(generated_path, "manifest.cmake")
|
|
|
|
|
|
|
|
self.emit_manifest_cmake(manifest_path, top_level_path, source_files)
|
|
|
|
|
|
|
|
###################################################################################################
|