
CUTLASS 2.0 Substantially refactored for - Better performance, particularly for native Turing Tensor Cores - Robust and durable templates spanning the design space - Encapsulated functionality embodying modern C++11 programming techniques - Optimized containers and data types for efficient, generic, portable device code Updates to: - Quick start guide - Documentation - Utilities - CUTLASS Profiler Native Turing Tensor Cores - Efficient GEMM kernels targeting Turing Tensor Cores - Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands Coverage of existing CUTLASS functionality: - GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs - Volta Tensor Cores through native mma.sync and through WMMA API - Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions - Batched GEMM operations - Complex-valued GEMMs Note: this commit and all that follow require a host compiler supporting C++11 or greater.
10 lines
3.9 KiB
JavaScript
10 lines
3.9 KiB
JavaScript
var searchData=
|
|
[
|
|
['c',['C',['../structcutlass_1_1library_1_1GemmDescription.html#adb43e15d8d870f6a4fca038a401125e7',1,'cutlass::library::GemmDescription::C()'],['../structcutlass_1_1library_1_1GemmArguments.html#a55d2e1d991fa6cfee6c8a4f3b71cbdd9',1,'cutlass::library::GemmArguments::C()'],['../structcutlass_1_1library_1_1GemmArrayArguments.html#a49928c947bec63af706d31f51fb10fd9',1,'cutlass::library::GemmArrayArguments::C()']]],
|
|
['capacity',['capacity',['../structcutlass_1_1device__memory_1_1allocation.html#a81d1c8ae7ffc695ae1e6a190ebfe8bb6',1,'cutlass::device_memory::allocation']]],
|
|
['contains',['contains',['../structcutlass_1_1reference_1_1host_1_1detail_1_1TensorContainsFunc.html#a36f817e5b6e993ac3c9aaf78186a1ffb',1,'cutlass::reference::host::detail::TensorContainsFunc']]],
|
|
['convert',['convert',['../structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html#aabbedd03888a6090f049f53f53bf4e45',1,'cutlass::gemm::device::GemmSplitKParallel::Arguments::convert()'],['../structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_3_01ElementA___00_01LayoutA___00_01Elementafcb1aeaf2035a7ac769d7acc233423b.html#a48ced96adaf371f03c1c9a50db9f50f2',1,'cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::Arguments::convert()'],['../structcutlass_1_1reference_1_1host_1_1detail_1_1TensorCopyIf.html#ad9e5902883076ca684487d276a79c47e',1,'cutlass::reference::host::detail::TensorCopyIf::convert()']]],
|
|
['convert_5fop',['convert_op',['../structcutlass_1_1epilogue_1_1threadblock_1_1DirectEpilogueTensorOp_1_1Params.html#a97f1044e7b7cec1ddd1f120b8b7539f2',1,'cutlass::epilogue::threadblock::DirectEpilogueTensorOp::Params']]],
|
|
['cublas_5ftype',['cublas_type',['../structcutlass_1_1TypeTraits_3_01int8__t_01_4.html#ac801fb97ec8a1a8cce0dbab46a614eff',1,'cutlass::TypeTraits< int8_t >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01uint8__t_01_4.html#ae5edc866e5de8527b6ddf06c3844684b',1,'cutlass::TypeTraits< uint8_t >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01int_01_4.html#abe5b201de5b1ef7a4e23f5ab6ed06f4a',1,'cutlass::TypeTraits< int >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01unsigned_01_4.html#aeafbc657f1a9020e36bbe523a33990b5',1,'cutlass::TypeTraits< unsigned >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01int64__t_01_4.html#a24cf2f6d484f30a1b329c3f8c1fb573d',1,'cutlass::TypeTraits< int64_t >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01uint64__t_01_4.html#a9ef28cd1f430f25cdda594f060f4e718',1,'cutlass::TypeTraits< uint64_t >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01half__t_01_4.html#a0491882d302a1038f1bb3c3d09374bb4',1,'cutlass::TypeTraits< half_t >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01float_01_4.html#aa835af229fbe3c00ccc6ea164bb1eb62',1,'cutlass::TypeTraits< float >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01double_01_4.html#ae0e23f7459fa1586160ae47e151428ae',1,'cutlass::TypeTraits< double >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01complex_3_01half_01_4_01_4.html#afe1a23ad5e158fc64fac88bd6095602e',1,'cutlass::TypeTraits< complex< half > >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01complex_3_01half__t_01_4_01_4.html#a5ca73eeea32d33e33e8a98890a78593d',1,'cutlass::TypeTraits< complex< half_t > >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01complex_3_01float_01_4_01_4.html#a6885f2871ac12091946d8f9a833efc0e',1,'cutlass::TypeTraits< complex< float > >::cublas_type()'],['../structcutlass_1_1TypeTraits_3_01complex_3_01double_01_4_01_4.html#a474db90f9990e15f86a822e6a226eeb7',1,'cutlass::TypeTraits< complex< double > >::cublas_type()']]]
|
|
];
|