
CUTLASS 2.0 Substantially refactored for - Better performance, particularly for native Turing Tensor Cores - Robust and durable templates spanning the design space - Encapsulated functionality embodying modern C++11 programming techniques - Optimized containers and data types for efficient, generic, portable device code Updates to: - Quick start guide - Documentation - Utilities - CUTLASS Profiler Native Turing Tensor Cores - Efficient GEMM kernels targeting Turing Tensor Cores - Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands Coverage of existing CUTLASS functionality: - GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs - Volta Tensor Cores through native mma.sync and through WMMA API - Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions - Batched GEMM operations - Complex-valued GEMMs Note: this commit and all that follow require a host compiler supporting C++11 or greater.
11 lines
2.7 KiB
JavaScript
11 lines
2.7 KiB
JavaScript
var searchData=
|
|
[
|
|
['h',['h',['../structcutlass_1_1Tensor4DCoord.html#a71dda571a04037e564f238bb9a76f213',1,'cutlass::Tensor4DCoord::h() const '],['../structcutlass_1_1Tensor4DCoord.html#ae399c4159fb4e799c42bd882df2ccce7',1,'cutlass::Tensor4DCoord::h()']]],
|
|
['half_5ft',['half_t',['../structcutlass_1_1half__t.html#af51056b4916dfea5b3621cdeddab67e9',1,'cutlass::half_t::half_t()'],['../structcutlass_1_1half__t.html#a25f60fd3977cd11e26819cc1d3b0dc07',1,'cutlass::half_t::half_t(half const &x)'],['../structcutlass_1_1half__t.html#a81af113694141b181d96d81f562bb325',1,'cutlass::half_t::half_t(float x)'],['../structcutlass_1_1half__t.html#a4a4ece050c5bb376d419299c228b270e',1,'cutlass::half_t::half_t(double x)'],['../structcutlass_1_1half__t.html#ab6e3d4d165819edabf8b4757d42ccf3c',1,'cutlass::half_t::half_t(int x)'],['../structcutlass_1_1half__t.html#a338d628504f733a23e69f2a60204c1b8',1,'cutlass::half_t::half_t(unsigned x)']]],
|
|
['host_5fdata',['host_data',['../classcutlass_1_1HostTensor.html#a7d34307e09d4ea09d68abbf31fe33788',1,'cutlass::HostTensor::host_data()'],['../classcutlass_1_1HostTensor.html#adafe2bee53260d47bc60479c50953f57',1,'cutlass::HostTensor::host_data(LongIndex idx)'],['../classcutlass_1_1HostTensor.html#a3dd62ca461666b3ac7e690e2befecaae',1,'cutlass::HostTensor::host_data() const '],['../classcutlass_1_1HostTensor.html#a7741d51bf7d241d4821e52e47b704c69',1,'cutlass::HostTensor::host_data(LongIndex idx) const ']]],
|
|
['host_5fdata_5fptr_5foffset',['host_data_ptr_offset',['../classcutlass_1_1HostTensor.html#a252ce9281a40863f32be25d3b40d6373',1,'cutlass::HostTensor']]],
|
|
['host_5fref',['host_ref',['../classcutlass_1_1HostTensor.html#ac921be812016052c690d2dc808d415f1',1,'cutlass::HostTensor::host_ref(LongIndex ptr_element_offset=0)'],['../classcutlass_1_1HostTensor.html#a3c5b60678dfa105b10c87dfaab4cf395',1,'cutlass::HostTensor::host_ref(LongIndex ptr_element_offset=0) const ']]],
|
|
['host_5fview',['host_view',['../classcutlass_1_1HostTensor.html#a75ce60d02c9d8fdcbcaceecc6b3ec7fa',1,'cutlass::HostTensor::host_view(LongIndex ptr_element_offset=0)'],['../classcutlass_1_1HostTensor.html#a232e640ebb4b8cf21b44653d7800c5a7',1,'cutlass::HostTensor::host_view(LongIndex ptr_element_offset=0) const ']]],
|
|
['hosttensor',['HostTensor',['../classcutlass_1_1HostTensor.html#af50ce8b091c106ac88b75e15ab028868',1,'cutlass::HostTensor::HostTensor()'],['../classcutlass_1_1HostTensor.html#abf381d496b50aad3df4bb72e3d891bac',1,'cutlass::HostTensor::HostTensor(TensorCoord const &extent, bool device_backed=true)'],['../classcutlass_1_1HostTensor.html#a9d71a824b0e1a4f1e8a88abecdff10b0',1,'cutlass::HostTensor::HostTensor(TensorCoord const &extent, Layout const &layout, bool device_backed=true)']]]
|
|
];
|