
CUTLASS 2.0 Substantially refactored for - Better performance, particularly for native Turing Tensor Cores - Robust and durable templates spanning the design space - Encapsulated functionality embodying modern C++11 programming techniques - Optimized containers and data types for efficient, generic, portable device code Updates to: - Quick start guide - Documentation - Utilities - CUTLASS Profiler Native Turing Tensor Cores - Efficient GEMM kernels targeting Turing Tensor Cores - Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands Coverage of existing CUTLASS functionality: - GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs - Volta Tensor Cores through native mma.sync and through WMMA API - Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions - Batched GEMM operations - Complex-valued GEMMs Note: this commit and all that follow require a host compiler supporting C++11 or greater.
6 lines
2.0 KiB
JavaScript
6 lines
2.0 KiB
JavaScript
var searchData=
|
|
[
|
|
['_5f_5falign_5f_5f',['__align__',['../namespacecutlass_1_1platform.html#ac9068e2d027ffdf5cd564deecc2cb9e8',1,'cutlass::platform::__align__(1) aligned_chunk< 1 >'],['../namespacecutlass_1_1platform.html#a0bcb016704ec57f9499e662ba6156f98',1,'cutlass::platform::__align__(2) aligned_chunk< 2 >'],['../namespacecutlass_1_1platform.html#a71be5af25eeffa4077777f919e67d8da',1,'cutlass::platform::__align__(4) aligned_chunk< 4 >'],['../namespacecutlass_1_1platform.html#a42440254a16d4b6b95b95cc3360ee372',1,'cutlass::platform::__align__(8) aligned_chunk< 8 >'],['../namespacecutlass_1_1platform.html#a91d5e970d6ebe619914f40a9510bdb1e',1,'cutlass::platform::__align__(16) aligned_chunk< 16 >'],['../namespacecutlass_1_1platform.html#a210f4d360b1f9c3d074e71129fe4c0d9',1,'cutlass::platform::__align__(32) aligned_chunk< 32 >'],['../namespacecutlass_1_1platform.html#ae792b1c7ada1a33e306cd552f583bdce',1,'cutlass::platform::__align__(64) aligned_chunk< 64 >'],['../namespacecutlass_1_1platform.html#a5712ec4fed335a9b7f863fb3abe3c5eb',1,'cutlass::platform::__align__(128) aligned_chunk< 128 >'],['../namespacecutlass_1_1platform.html#a595cc98db29fb4d59772d2e2f52e347a',1,'cutlass::platform::__align__(256) aligned_chunk< 256 >'],['../namespacecutlass_1_1platform.html#ae70bb5d14a66500b47d2e3f83063d4a5',1,'cutlass::platform::__align__(512) aligned_chunk< 512 >'],['../namespacecutlass_1_1platform.html#a181e44e9c66f704175590727aaa9e5a1',1,'cutlass::platform::__align__(1024) aligned_chunk< 1024 >'],['../namespacecutlass_1_1platform.html#ae72c8fa997bb251d4140dceb03147154',1,'cutlass::platform::__align__(2048) aligned_chunk< 2048 >'],['../namespacecutlass_1_1platform.html#ada29683f1b408ae7b73cc8fbe2108628',1,'cutlass::platform::__align__(4096) aligned_chunk< 4096 >']]],
|
|
['_5f_5flaunch_5fbounds_5f_5f',['__launch_bounds__',['../namespacecutlass_1_1reduction.html#a9665e8f438a7b290d6e2eb640d93045f',1,'cutlass::reduction']]]
|
|
];
|