cutlass/docs/search/functions_15.js
Andrew Kerr fb335f6a5f
CUTLASS 2.0 (#62)
CUTLASS 2.0

Substantially refactored for

- Better performance, particularly for native Turing Tensor Cores
- Robust and durable templates spanning the design space
- Encapsulated functionality embodying modern C++11 programming techniques
- Optimized containers and data types for efficient, generic, portable device code

Updates to:
- Quick start guide
- Documentation
- Utilities
- CUTLASS Profiler

Native Turing Tensor Cores
- Efficient GEMM kernels targeting Turing Tensor Cores
- Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands

Coverage of existing CUTLASS functionality:
- GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs
- Volta Tensor Cores through native mma.sync and through WMMA API
- Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions
- Batched GEMM operations
- Complex-valued GEMMs

Note: this commit and all that follow require a host compiler supporting C++11 or greater.
2019-11-19 16:55:34 -08:00

9 lines
4.4 KiB
JavaScript

var searchData=
[
['valid',['valid',['../classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen784a0e9da3f55064c47e5613791f51f7.html#ae95bba63dc3b336a5ebfe6bac4d59723',1,'cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::valid()'],['../classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen89c687c583745a73cb485041911a4c4e.html#a9135a1ca1ddc279c85df584d23fa7003',1,'cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessType_ >::valid()'],['../classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen9838736ad62fae54213fbaf722a989ab.html#a0d3a78ab6cc435681f257465bd452f79',1,'cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, AccessType_ >::valid()'],['../classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemenab63a1e105bf37f6371516cb9e2c5a7a.html#ad55ebaa9c0e4fa51489ee0bfbe122d46',1,'cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::valid()'],['../classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator_3_01Shape___00_01Elemen809793e785fb4211888c6b4e5dcfcb39.html#a2b45d4be2bf0e228ca658771f5948b2d',1,'cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::RowMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::valid()'],['../classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator2dThreadTile_3_01Shape__1790abaa54a01f277d75766d5882fec8.html#a2e81b47fa12aacff752bb8a2a3e817fa',1,'cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::valid()'],['../classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator2dThreadTile_3_01Shape__da632779aba661c0f4cfaaa78126b771.html#a1b603c7b4f0e3dcfc9c617b156050de3',1,'cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessType_ >::valid()'],['../classcutlass_1_1transform_1_1threadblock_1_1PredicatedTileAccessIterator2dThreadTile_3_01Shape__7327fa15996bcb8502cdfcc192350fe1.html#adb5537726f507544df0167a6fcd0fcd6',1,'cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, AccessType_ >::valid()']]],
['view',['view',['../classcutlass_1_1thread_1_1Matrix.html#a7c727fa4d536a9e82957b5a3f4ae1c92',1,'cutlass::thread::Matrix']]],
['voltatensoropmultiplicandbcongruous',['VoltaTensorOpMultiplicandBCongruous',['../structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandBCongruous.html#ac31322c93fd5973b3652c8127c0b8f3a',1,'cutlass::layout::VoltaTensorOpMultiplicandBCongruous::VoltaTensorOpMultiplicandBCongruous(Index ldm=0)'],['../structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandBCongruous.html#af096631a3d336c4d3c6d3c9706a0766c',1,'cutlass::layout::VoltaTensorOpMultiplicandBCongruous::VoltaTensorOpMultiplicandBCongruous(Stride stride)']]],
['voltatensoropmultiplicandcongruous',['VoltaTensorOpMultiplicandCongruous',['../structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandCongruous.html#ace186f69fd389edff3909fe39598e93d',1,'cutlass::layout::VoltaTensorOpMultiplicandCongruous::VoltaTensorOpMultiplicandCongruous(Index ldm=0)'],['../structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandCongruous.html#ab804ea82f631e3950d476e12a92a7189',1,'cutlass::layout::VoltaTensorOpMultiplicandCongruous::VoltaTensorOpMultiplicandCongruous(Stride stride)']]],
['voltatensoropmultiplicandcrosswise',['VoltaTensorOpMultiplicandCrosswise',['../structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandCrosswise.html#aa68d825db94190611ce9bd54e25b0d48',1,'cutlass::layout::VoltaTensorOpMultiplicandCrosswise::VoltaTensorOpMultiplicandCrosswise(Index ldm=0)'],['../structcutlass_1_1layout_1_1VoltaTensorOpMultiplicandCrosswise.html#af7cedde7974be824cef4ed7248903320',1,'cutlass::layout::VoltaTensorOpMultiplicandCrosswise::VoltaTensorOpMultiplicandCrosswise(Stride stride)']]]
];