
CUTLASS 2.0 Substantially refactored for - Better performance, particularly for native Turing Tensor Cores - Robust and durable templates spanning the design space - Encapsulated functionality embodying modern C++11 programming techniques - Optimized containers and data types for efficient, generic, portable device code Updates to: - Quick start guide - Documentation - Utilities - CUTLASS Profiler Native Turing Tensor Cores - Efficient GEMM kernels targeting Turing Tensor Cores - Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands Coverage of existing CUTLASS functionality: - GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs - Volta Tensor Cores through native mma.sync and through WMMA API - Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions - Batched GEMM operations - Complex-valued GEMMs Note: this commit and all that follow require a host compiler supporting C++11 or greater.
16 lines
4.8 KiB
JavaScript
16 lines
4.8 KiB
JavaScript
var searchData=
|
|
[
|
|
['a',['A',['../structcutlass_1_1library_1_1GemmDescription.html#aa821b15ab4f4c51c4890c7cac685fba4',1,'cutlass::library::GemmDescription::A()'],['../structcutlass_1_1library_1_1GemmArguments.html#ab4c9ecf49885b1e2ff88ff9c7a4ffad9',1,'cutlass::library::GemmArguments::A()'],['../structcutlass_1_1library_1_1GemmArrayArguments.html#afcce27cdc91247a9e4e8e610c3a38e63',1,'cutlass::library::GemmArrayArguments::A()']]],
|
|
['a_5ftile',['A_tile',['../structcutlass_1_1reference_1_1device_1_1thread_1_1Gemm.html#a2d63fe67429aa6441e6e247563db1a11',1,'cutlass::reference::device::thread::Gemm']]],
|
|
['accum',['accum',['../structcutlass_1_1reference_1_1device_1_1thread_1_1Gemm.html#a304c308d4cf13915cf1ba796c506dda6',1,'cutlass::reference::device::thread::Gemm']]],
|
|
['advance_5fcluster',['advance_cluster',['../structcutlass_1_1epilogue_1_1threadblock_1_1PredicatedTileIterator_1_1Params.html#a38a6dcfddaf9078334107eb8a38595fb',1,'cutlass::epilogue::threadblock::PredicatedTileIterator::Params']]],
|
|
['advance_5fcolumn',['advance_column',['../structcutlass_1_1epilogue_1_1threadblock_1_1InterleavedPredicatedTileIterator_1_1Params.html#a5593ba3ee47ba8bb6dc2b0b001b38824',1,'cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator::Params']]],
|
|
['advance_5fgroup',['advance_group',['../structcutlass_1_1epilogue_1_1threadblock_1_1PredicatedTileIterator_1_1Params.html#a010385e2b1e39fb0a42ce65c68e07e8e',1,'cutlass::epilogue::threadblock::PredicatedTileIterator::Params']]],
|
|
['advance_5frow',['advance_row',['../structcutlass_1_1epilogue_1_1threadblock_1_1PredicatedTileIterator_1_1Params.html#a4ceaade8da07a3951a30c6d24b79f557',1,'cutlass::epilogue::threadblock::PredicatedTileIterator::Params::advance_row()'],['../structcutlass_1_1epilogue_1_1threadblock_1_1InterleavedPredicatedTileIterator_1_1Params.html#a31c6472b5e890a37657d573646114156',1,'cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator::Params::advance_row()']]],
|
|
['advance_5ftile',['advance_tile',['../structcutlass_1_1epilogue_1_1threadblock_1_1PredicatedTileIterator_1_1Params.html#a93f34cf9a98ab9bf6b2f7156848c9efd',1,'cutlass::epilogue::threadblock::PredicatedTileIterator::Params']]],
|
|
['alignment',['alignment',['../structcutlass_1_1library_1_1TensorDescription.html#a9fed369aad059bda36c528e72f8bb8fd',1,'cutlass::library::TensorDescription']]],
|
|
['alpha',['alpha',['../structcutlass_1_1epilogue_1_1thread_1_1LinearCombination_1_1Params.html#aebf79db277fc69c1fafae975538f8c57',1,'cutlass::epilogue::thread::LinearCombination::Params::alpha()'],['../structcutlass_1_1epilogue_1_1thread_1_1LinearCombinationClamp_1_1Params.html#a92b42f2498efb637ba4e2eb2bc7710c8',1,'cutlass::epilogue::thread::LinearCombinationClamp::Params::alpha()'],['../structcutlass_1_1epilogue_1_1thread_1_1LinearCombinationRelu_1_1Params.html#a48d23248d58b6f7b5aa6aae7cd66afa0',1,'cutlass::epilogue::thread::LinearCombinationRelu::Params::alpha()'],['../structcutlass_1_1epilogue_1_1thread_1_1LinearCombinationRelu_3_01ElementOutput___00_01Count_00_00274a94522c46cd041d0b10d484e2ef3.html#a4a225cb1f8a09c643a8016024ccbf6cb',1,'cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::Params::alpha()'],['../structcutlass_1_1gemm_1_1kernel_1_1detail_1_1GemvBatchedStridedEpilogueScaling.html#afd6c2151dbdd056d214d5ccee685c676',1,'cutlass::gemm::kernel::detail::GemvBatchedStridedEpilogueScaling::alpha()'],['../structcutlass_1_1reduction_1_1BatchedReductionTraits_1_1Params.html#afada1cbad87636228fb58d8577bb8470',1,'cutlass::reduction::BatchedReductionTraits::Params::alpha()'],['../structcutlass_1_1library_1_1GemmArguments.html#a94a7760d5bbee524c2381adf7a2fd683',1,'cutlass::library::GemmArguments::alpha()'],['../structcutlass_1_1library_1_1GemmArrayArguments.html#a2de0ca18e82590a98ac2ffea6a741840',1,'cutlass::library::GemmArrayArguments::alpha()']]],
|
|
['alpha_5fptr',['alpha_ptr',['../structcutlass_1_1epilogue_1_1thread_1_1LinearCombination_1_1Params.html#aabd9260f2a8a1b809864fe20dc3c7553',1,'cutlass::epilogue::thread::LinearCombination::Params::alpha_ptr()'],['../structcutlass_1_1epilogue_1_1thread_1_1LinearCombinationClamp_1_1Params.html#a0ddbb24a81370a342d05509bb78f122c',1,'cutlass::epilogue::thread::LinearCombinationClamp::Params::alpha_ptr()'],['../structcutlass_1_1epilogue_1_1thread_1_1LinearCombinationRelu_1_1Params.html#a6c266e54081c08d25f4d6832ca24e673',1,'cutlass::epilogue::thread::LinearCombinationRelu::Params::alpha_ptr()'],['../structcutlass_1_1epilogue_1_1thread_1_1LinearCombinationRelu_3_01ElementOutput___00_01Count_00_00274a94522c46cd041d0b10d484e2ef3.html#aa4d227d5db2999f4738a207183fc4fc3',1,'cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::Params::alpha_ptr()']]],
|
|
['args',['args',['../structcutlass_1_1CommandLine.html#a6a338671a8d323882f9d9463863eb1c1',1,'cutlass::CommandLine']]]
|
|
];
|