cutlass/docs/search/classes_14.js
Andrew Kerr fb335f6a5f
CUTLASS 2.0 (#62)
CUTLASS 2.0

Substantially refactored for

- Better performance, particularly for native Turing Tensor Cores
- Robust and durable templates spanning the design space
- Encapsulated functionality embodying modern C++11 programming techniques
- Optimized containers and data types for efficient, generic, portable device code

Updates to:
- Quick start guide
- Documentation
- Utilities
- CUTLASS Profiler

Native Turing Tensor Cores
- Efficient GEMM kernels targeting Turing Tensor Cores
- Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands

Coverage of existing CUTLASS functionality:
- GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs
- Volta Tensor Cores through native mma.sync and through WMMA API
- Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions
- Batched GEMM operations
- Complex-valued GEMMs

Note: this commit and all that follow require a host compiler supporting C++11 or greater.
2019-11-19 16:55:34 -08:00

10 lines
2.4 KiB
JavaScript

var searchData=
[
['warpsize',['WarpSize',['../structcutlass_1_1gemm_1_1warp_1_1WarpSize.html',1,'cutlass::gemm::warp']]],
['wmma_3c_20shape_5f_2c_20cutlass_3a_3ahalf_5ft_2c_20layouta_5f_2c_20cutlass_3a_3ahalf_5ft_2c_20layoutb_5f_2c_20elementc_5f_2c_20layoutc_5f_2c_20cutlass_3a_3aarch_3a_3aopmultiplyadd_20_3e',['Wmma< Shape_, cutlass::half_t, LayoutA_, cutlass::half_t, LayoutB_, ElementC_, LayoutC_, cutlass::arch::OpMultiplyAdd >',['../structcutlass_1_1arch_1_1Wmma_3_01Shape___00_01cutlass_1_1half__t_00_01LayoutA___00_01cutlass_1_84e30c8cc93eeb7ca02f651bd16d4c38.html',1,'cutlass::arch']]],
['wmma_3c_20shape_5f_2c_20cutlass_3a_3aint4b_5ft_2c_20layouta_5f_2c_20cutlass_3a_3aint4b_5ft_2c_20layoutb_5f_2c_20int32_5ft_2c_20layoutc_5f_2c_20cutlass_3a_3aarch_3a_3aopmultiplyadd_20_3e',['Wmma< Shape_, cutlass::int4b_t, LayoutA_, cutlass::int4b_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpMultiplyAdd >',['../structcutlass_1_1arch_1_1Wmma_3_01Shape___00_01cutlass_1_1int4b__t_00_01LayoutA___00_01cutlass_16fd808a90b3cf9d7cfc99f30888ca3fe.html',1,'cutlass::arch']]],
['wmma_3c_20shape_5f_2c_20cutlass_3a_3auint1b_5ft_2c_20layouta_5f_2c_20cutlass_3a_3auint1b_5ft_2c_20layoutb_5f_2c_20int32_5ft_2c_20layoutc_5f_2c_20cutlass_3a_3aarch_3a_3aopxorpopc_20_3e',['Wmma< Shape_, cutlass::uint1b_t, LayoutA_, cutlass::uint1b_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpXorPopc >',['../structcutlass_1_1arch_1_1Wmma_3_01Shape___00_01cutlass_1_1uint1b__t_00_01LayoutA___00_01cutlass_c80a7ea4d219cd9b13b560b493338028.html',1,'cutlass::arch']]],
['wmma_3c_20shape_5f_2c_20int8_5ft_2c_20layouta_5f_2c_20int8_5ft_2c_20layoutb_5f_2c_20int32_5ft_2c_20layoutc_5f_2c_20cutlass_3a_3aarch_3a_3aopmultiplyadd_20_3e',['Wmma< Shape_, int8_t, LayoutA_, int8_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpMultiplyAdd >',['../structcutlass_1_1arch_1_1Wmma_3_01Shape___00_01int8__t_00_01LayoutA___00_01int8__t_00_01LayoutB_505c57bb6818a941dc16f00cf35a9ec0.html',1,'cutlass::arch']]],
['wmma_3c_20shape_5f_2c_20uint8_5ft_2c_20layouta_5f_2c_20uint8_5ft_2c_20layoutb_5f_2c_20int32_5ft_2c_20layoutc_5f_2c_20cutlass_3a_3aarch_3a_3aopmultiplyadd_20_3e',['Wmma< Shape_, uint8_t, LayoutA_, uint8_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpMultiplyAdd >',['../structcutlass_1_1arch_1_1Wmma_3_01Shape___00_01uint8__t_00_01LayoutA___00_01uint8__t_00_01Layout219a464a1248ebfc37aa29bcb10cb1b0.html',1,'cutlass::arch']]]
];