
CUTLASS 2.0 Substantially refactored for - Better performance, particularly for native Turing Tensor Cores - Robust and durable templates spanning the design space - Encapsulated functionality embodying modern C++11 programming techniques - Optimized containers and data types for efficient, generic, portable device code Updates to: - Quick start guide - Documentation - Utilities - CUTLASS Profiler Native Turing Tensor Cores - Efficient GEMM kernels targeting Turing Tensor Cores - Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands Coverage of existing CUTLASS functionality: - GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs - Volta Tensor Cores through native mma.sync and through WMMA API - Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions - Batched GEMM operations - Complex-valued GEMMs Note: this commit and all that follow require a host compiler supporting C++11 or greater.
273 lines
7.8 KiB
Python
273 lines
7.8 KiB
Python
#
|
|
# \file generator.py
|
|
#
|
|
# \brief Generates the CUTLASS Library's instances
|
|
#
|
|
|
|
import enum
|
|
import os.path
|
|
import shutil
|
|
|
|
from library import *
|
|
from gemm_operation import *
|
|
###################################################################################################
|
|
|
|
class EmitOperationKindLibrary:
|
|
def __init__(self, generated_path, kind, args):
|
|
self.generated_path = generated_path
|
|
self.kind = kind
|
|
self.args = args
|
|
|
|
self.emitters = {
|
|
OperationKind.Gemm: EmitGemmConfigurationLibrary
|
|
}
|
|
|
|
self.configurations = [];
|
|
|
|
self.header_template ="""
|
|
/*
|
|
Generated by manifest.py - Do not edit.
|
|
*/
|
|
|
|
#include "cutlass/cutlass.h"
|
|
#include "cutlass/library/library.h"
|
|
#include "cutlass/library/manifest.h"
|
|
|
|
namespace cutlass {
|
|
namespace library {
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
"""
|
|
self.entry_template = """
|
|
|
|
//
|
|
// Entry point to construct operations
|
|
//
|
|
void initialize_all_${operation_name}_operations(Manifest &manifest) {
|
|
"""
|
|
self.configuration_prototype_template = "void initialize_${configuration_name}(Manifest &manifest);\n"
|
|
self.configuration_template =" initialize_${configuration_name}(manifest);\n"
|
|
|
|
self.epilogue_template ="""
|
|
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
} // namespace library
|
|
} // namespace cutlass
|
|
|
|
"""
|
|
|
|
#
|
|
def __enter__(self):
|
|
self.operation_path = os.path.join(self.generated_path, OperationKindNames[self.kind])
|
|
os.mkdir(self.operation_path)
|
|
|
|
self.top_level_path = os.path.join(self.operation_path, "all_%s_operations.cu" % OperationKindNames[self.kind])
|
|
|
|
self.top_level_file = open(self.top_level_path, "w")
|
|
self.top_level_file.write(self.header_template)
|
|
|
|
self.source_files = [self.top_level_path,]
|
|
|
|
return self
|
|
|
|
#
|
|
def emit(self, configuration_name, operations):
|
|
|
|
with self.emitters[self.kind](self.operation_path, configuration_name) as configuration_emitter:
|
|
for operation in operations:
|
|
configuration_emitter.emit(operation)
|
|
|
|
self.source_files.append(configuration_emitter.configuration_path)
|
|
|
|
self.configurations.append(configuration_name)
|
|
self.top_level_file.write(SubstituteTemplate(self.configuration_prototype_template, {'configuration_name': configuration_name} ))
|
|
|
|
#
|
|
def __exit__(self, exception_type, exception_value, traceback):
|
|
self.top_level_file.write(SubstituteTemplate(self.entry_template, {'operation_name': OperationKindNames[self.kind]}))
|
|
|
|
for configuration_name in self.configurations:
|
|
self.top_level_file.write(SubstituteTemplate(self.configuration_template, {'configuration_name': configuration_name}))
|
|
|
|
self.top_level_file.write(self.epilogue_template)
|
|
self.top_level_file.close()
|
|
|
|
###################################################################################################
|
|
###################################################################################################
|
|
|
|
class Options:
|
|
def __init__(self):
|
|
pass
|
|
|
|
###################################################################################################
|
|
|
|
#
|
|
class Manifest:
|
|
|
|
#
|
|
def __init__(self, args):
|
|
self.operations = {}
|
|
self.args = args
|
|
self.compute_capabilities = [int(x) for x in args.architectures.split(';')]
|
|
|
|
if args.kernels == 'all':
|
|
self.kernel_names = []
|
|
else:
|
|
self.kernel_names = args.kernels.split(',')
|
|
|
|
self.operation_count = 0
|
|
self.operations_by_name = {}
|
|
self.top_level_prologue = '''
|
|
|
|
#include "cutlass/library/library.h"
|
|
#include "cutlass/library/manifest.h"
|
|
|
|
namespace cutlass {
|
|
namespace library {
|
|
|
|
${prototypes}
|
|
|
|
void initialize_all(Manifest &manifest) {
|
|
|
|
'''
|
|
self.top_level_reserve = ' manifest.reserve(${operation_count});\n\n'
|
|
self.top_level_epilogue = '''
|
|
}
|
|
|
|
} // namespace library
|
|
} // namespace cutlass
|
|
|
|
'''
|
|
|
|
#
|
|
def filter(self, operation):
|
|
''' Filtering operations based on various criteria'''
|
|
|
|
# filter based on compute capability
|
|
enabled = False
|
|
for cc in self.compute_capabilities:
|
|
if cc >= operation.tile_description.minimum_compute_capability and \
|
|
cc <= operation.tile_description.maximum_compute_capability:
|
|
|
|
enabled = True
|
|
break
|
|
|
|
if not enabled:
|
|
return False
|
|
|
|
# eliminate duplicates
|
|
if operation.procedural_name() in self.operations_by_name.keys():
|
|
return False
|
|
|
|
# Filter based on list of valid substrings
|
|
if len(self.kernel_names):
|
|
name = operation.procedural_name()
|
|
enabled = False
|
|
for name_substr in self.kernel_names:
|
|
if name_substr in name:
|
|
enabled = True
|
|
break
|
|
|
|
# todo: filter based on operation kind
|
|
# todo: filter based on compute data type
|
|
return enabled
|
|
#
|
|
|
|
#
|
|
def append(self, operation):
|
|
'''
|
|
Inserts the operation.
|
|
|
|
operation_kind -> configuration_name -> []
|
|
'''
|
|
|
|
if self.filter(operation):
|
|
|
|
self.operations_by_name[operation.procedural_name()] = operation
|
|
|
|
# add the configuration
|
|
configuration_name = operation.configuration_name()
|
|
|
|
if operation.operation_kind not in self.operations.keys():
|
|
self.operations[operation.operation_kind] = {}
|
|
|
|
if configuration_name not in self.operations[operation.operation_kind].keys():
|
|
self.operations[operation.operation_kind][configuration_name] = []
|
|
|
|
self.operations[operation.operation_kind][configuration_name].append(operation)
|
|
self.operation_count += 1
|
|
#
|
|
|
|
#
|
|
def emit(self, target = GeneratorTarget.Library):
|
|
|
|
operation_emitters = {
|
|
GeneratorTarget.Library: EmitOperationKindLibrary
|
|
}
|
|
|
|
generated_path = os.path.join(self.args.curr_build_dir, 'generated')
|
|
|
|
# create generated/
|
|
if os.path.exists(generated_path):
|
|
shutil.rmtree(generated_path)
|
|
|
|
os.mkdir(generated_path)
|
|
|
|
source_files = []
|
|
|
|
top_level_path = os.path.join(generated_path, 'initialize_all.cpp')
|
|
with open(top_level_path, 'w') as top_level_file:
|
|
|
|
if target == GeneratorTarget.Library:
|
|
source_files.append(top_level_path)
|
|
|
|
prototypes = []
|
|
for operation_kind, configurations in self.operations.items():
|
|
prototypes.append(SubstituteTemplate(
|
|
"void initialize_all_${operation_kind}_operations(Manifest &manifest);",
|
|
{'operation_kind': OperationKindNames[operation_kind]}))
|
|
|
|
top_level_file.write(SubstituteTemplate(self.top_level_prologue,
|
|
{'prototypes': "\n".join(prototypes)}))
|
|
|
|
top_level_file.write(SubstituteTemplate(
|
|
self.top_level_reserve, {'operation_count': str(self.operation_count)}))
|
|
|
|
# for each operation kind, emit initializer for all configurations
|
|
for operation_kind, configurations in self.operations.items():
|
|
with operation_emitters[target](generated_path, operation_kind, self.args) as operation_kind_emitter:
|
|
for configuration_name, operations in configurations.items():
|
|
operation_kind_emitter.emit(configuration_name, operations)
|
|
|
|
source_files += operation_kind_emitter.source_files
|
|
|
|
top_level_file.write(SubstituteTemplate(
|
|
" initialize_all_${operation_kind}_operations(manifest);\n",
|
|
{'operation_kind': OperationKindNames[operation_kind]}))
|
|
|
|
top_level_file.write(self.top_level_epilogue)
|
|
|
|
# write the manifest.cmake file containing paths from all targets
|
|
manifest_path = os.path.join(generated_path, "manifest.cmake")
|
|
with open(manifest_path, "w") as manifest_file:
|
|
|
|
target_name = 'cutlass_lib'
|
|
|
|
target_text = SubstituteTemplate("""cutlass_target_sources(
|
|
${target_name}
|
|
PRIVATE
|
|
""", { 'target_name': target_name})
|
|
|
|
manifest_file.write(target_text)
|
|
|
|
for source_file in source_files:
|
|
manifest_file.write(" %s\n" % str(source_file.replace('\\', '/')))
|
|
manifest_file.write(")")
|
|
#
|
|
|
|
###################################################################################################
|