
CUTLASS 2.0 Substantially refactored for - Better performance, particularly for native Turing Tensor Cores - Robust and durable templates spanning the design space - Encapsulated functionality embodying modern C++11 programming techniques - Optimized containers and data types for efficient, generic, portable device code Updates to: - Quick start guide - Documentation - Utilities - CUTLASS Profiler Native Turing Tensor Cores - Efficient GEMM kernels targeting Turing Tensor Cores - Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands Coverage of existing CUTLASS functionality: - GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs - Volta Tensor Cores through native mma.sync and through WMMA API - Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions - Batched GEMM operations - Complex-valued GEMMs Note: this commit and all that follow require a host compiler supporting C++11 or greater.
20 lines
6.8 KiB
JavaScript
20 lines
6.8 KiB
JavaScript
var searchData=
|
|
[
|
|
['data',['data',['../structcutlass_1_1AlignedBuffer.html#a8ed8b9d3469621fc82d0041846c59da2',1,'cutlass::AlignedBuffer::data()'],['../structcutlass_1_1AlignedBuffer.html#acbfc684b16c9c717df5712bcb729acf3',1,'cutlass::AlignedBuffer::data() const '],['../classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#af47ab51582aa1e4c811a9e111b594556',1,'cutlass::Array< T, N, true >::data()'],['../classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a3d3d2637b7051145a2048cff1b55c0bf',1,'cutlass::Array< T, N, true >::data() const '],['../classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a1949c8a8c81dc2743328a56ff19fc933',1,'cutlass::Array< T, N, false >::data()'],['../classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#ab617ed6c9cc6336baf1030713d6dfbbb',1,'cutlass::Array< T, N, false >::data() const '],['../structcutlass_1_1epilogue_1_1threadblock_1_1EpilogueBase_1_1SharedStorage.html#a2d57be4f0bdad670c7eb67e64dd1a9f5',1,'cutlass::epilogue::threadblock::EpilogueBase::SharedStorage::data()'],['../classcutlass_1_1TensorRef.html#ac7db3ca62ab1dfe0d3ea08bcadbc9352',1,'cutlass::TensorRef::data() const '],['../classcutlass_1_1TensorRef.html#a965e8b3b7f92dc51d4d3821ea6a25012',1,'cutlass::TensorRef::data(LongIndex idx) const ']]],
|
|
['debug_5fprint',['debug_print',['../structcutlass_1_1epilogue_1_1threadblock_1_1EpilogueBase_1_1SharedStorage.html#afd521c2dc754bb30024e8767bfc51e49',1,'cutlass::epilogue::threadblock::EpilogueBase::SharedStorage']]],
|
|
['debugtypefunc',['DebugTypeFunc',['../tools_2util_2include_2cutlass_2util_2debug_8h.html#ab7e23b523490567225b20e2c72649f20',1,'debug.h']]],
|
|
['defaultblockswizzle',['DefaultBlockSwizzle',['../structcutlass_1_1reduction_1_1DefaultBlockSwizzle.html#a1ad8edda7b73d23fb5592a531f5736cc',1,'cutlass::reduction::DefaultBlockSwizzle']]],
|
|
['denorm_5fmin',['denorm_min',['../structstd_1_1numeric__limits_3_01cutlass_1_1half__t_01_4.html#a2c05c19022c183e8734ada65c8970af5',1,'std::numeric_limits< cutlass::half_t >']]],
|
|
['description',['description',['../classcutlass_1_1library_1_1Operation.html#a62b9fbee4b72857214ca6c01874a27ce',1,'cutlass::library::Operation']]],
|
|
['device_5fbacked',['device_backed',['../classcutlass_1_1HostTensor.html#a73430856f79bedb64f9cf6b2044f38e3',1,'cutlass::HostTensor']]],
|
|
['device_5fdata',['device_data',['../classcutlass_1_1HostTensor.html#aca2b28a16fc92d29102d00f154a1dfd1',1,'cutlass::HostTensor::device_data()'],['../classcutlass_1_1HostTensor.html#abecb0dce978ea2c542d7d87a35f7997a',1,'cutlass::HostTensor::device_data() const ']]],
|
|
['device_5fdata_5fptr_5foffset',['device_data_ptr_offset',['../classcutlass_1_1HostTensor.html#a81043b0539c8d18c40957411dd149e28',1,'cutlass::HostTensor']]],
|
|
['device_5fref',['device_ref',['../classcutlass_1_1HostTensor.html#a55a73e5ff7c7404c0bdee5f2b578b876',1,'cutlass::HostTensor::device_ref(LongIndex ptr_element_offset=0)'],['../classcutlass_1_1HostTensor.html#a4bf91f711ef17492809c09d53364cb35',1,'cutlass::HostTensor::device_ref(LongIndex ptr_element_offset=0) const ']]],
|
|
['device_5fview',['device_view',['../classcutlass_1_1HostTensor.html#a075b666917a43c9bc168bfff6db27203',1,'cutlass::HostTensor::device_view(LongIndex ptr_element_offset=0)'],['../classcutlass_1_1HostTensor.html#a6d1c49888cf678d3d5469eba4e911337',1,'cutlass::HostTensor::device_view(LongIndex ptr_element_offset=0) const ']]],
|
|
['directepiloguetensorop',['DirectEpilogueTensorOp',['../classcutlass_1_1epilogue_1_1threadblock_1_1DirectEpilogueTensorOp.html#a7a64b4523780869f4b7dde2225572b2f',1,'cutlass::epilogue::threadblock::DirectEpilogueTensorOp']]],
|
|
['distribution',['Distribution',['../structcutlass_1_1Distribution.html#a40f0b9d0f92199f8a49c931d34dd8c8a',1,'cutlass::Distribution']]],
|
|
['dot',['dot',['../structcutlass_1_1Coord.html#a057a417a4d4a6e2f69e0b55a6f7ee902',1,'cutlass::Coord::dot()'],['../namespacecutlass_1_1arch.html#aa36dc224381add086ca4e0f96a04a964',1,'cutlass::arch::dot(Array< T, N > const &a, Array< T, N > const &b, Accumulator accum)'],['../namespacecutlass_1_1arch.html#a47b07bdb36714f93b31ad14bec925274',1,'cutlass::arch::dot(Array< half_t, 2 > const &a, Array< half_t, 2 > const &b, half_t accum)'],['../namespacecutlass_1_1arch.html#acea872f9068fc5e07ce359984fe793c3',1,'cutlass::arch::dot(Array< half_t, 2 > const &a, Array< half_t, 2 > const &b, float accum)'],['../namespacecutlass_1_1arch.html#af6adcb969a1e4acfed289a7839013695',1,'cutlass::arch::dot(Array< int8_t, 4 > const &a, Array< int8_t, 4 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#a027d23864f8145417feecf3f019f9ef4',1,'cutlass::arch::dot(Array< uint8_t, 4 > const &a, Array< int8_t, 4 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#a40582b9a769301d83e532fc5215a5259',1,'cutlass::arch::dot(Array< int8_t, 4 > const &a, Array< uint8_t, 4 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#a262f27261d801dfd9a9d1cde280321ac',1,'cutlass::arch::dot(Array< uint8_t, 4 > const &a, Array< uint8_t, 4 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#a2c67269e7497315437d5ade0ab313ec8',1,'cutlass::arch::dot(Array< int16_t, 2 > const &a, Array< int8_t, 2 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#a9dc4e9c5eddc624e2aecd15ef4b55f35',1,'cutlass::arch::dot(Array< uint16_t, 2 > const &a, Array< int8_t, 2 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#aeedc20bfc0ea4dde354a9eee802bdea8',1,'cutlass::arch::dot(Array< int16_t, 2 > const &a, Array< uint8_t, 2 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#ad4b65852d862718f9917ea2019752abb',1,'cutlass::arch::dot(Array< uint16_t, 2 > const &a, Array< uint8_t, 2 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#ab3255aee11ed0bc172e248673576c37a',1,'cutlass::arch::dot(Array< int16_t, 2 > const &a, Array< int16_t, 2 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#a4af405b474cc766adcaec63d46cbbc49',1,'cutlass::arch::dot(Array< uint16_t, 2 > const &a, Array< int16_t, 2 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#ab42df2f28bc1b03350884df1048f060c',1,'cutlass::arch::dot(Array< int16_t, 2 > const &a, Array< uint16_t, 2 > const &b, int32_t accum)'],['../namespacecutlass_1_1arch.html#ae01862e2b75604eaca84e3b95bf110bf',1,'cutlass::arch::dot(Array< uint16_t, 2 > const &a, Array< uint16_t, 2 > const &b, int32_t accum)']]],
|
|
['dump_5ffragment',['dump_fragment',['../namespacecutlass_1_1debug.html#a59e178f3c9b305571b12d80e5604b2c0',1,'cutlass::debug']]],
|
|
['dump_5fshmem',['dump_shmem',['../namespacecutlass_1_1debug.html#a6c7e23e12761423f54ccc4518b1f5fed',1,'cutlass::debug']]]
|
|
];
|