cutlass/docs/search/variables_3.js
Andrew Kerr fb335f6a5f
CUTLASS 2.0 (#62)
CUTLASS 2.0

Substantially refactored for

- Better performance, particularly for native Turing Tensor Cores
- Robust and durable templates spanning the design space
- Encapsulated functionality embodying modern C++11 programming techniques
- Optimized containers and data types for efficient, generic, portable device code

Updates to:
- Quick start guide
- Documentation
- Utilities
- CUTLASS Profiler

Native Turing Tensor Cores
- Efficient GEMM kernels targeting Turing Tensor Cores
- Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands

Coverage of existing CUTLASS functionality:
- GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs
- Volta Tensor Cores through native mma.sync and through WMMA API
- Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions
- Batched GEMM operations
- Complex-valued GEMMs

Note: this commit and all that follow require a host compiler supporting C++11 or greater.
2019-11-19 16:55:34 -08:00

15 lines
2.5 KiB
JavaScript

var searchData=
[
['d',['D',['../structcutlass_1_1library_1_1GemmArguments.html#a2f4c0652e6632aebe6d9159c425ecc3f',1,'cutlass::library::GemmArguments::D()'],['../structcutlass_1_1library_1_1GemmArrayArguments.html#ae6ccc3b91e9a77ad170d276f70fe2c30',1,'cutlass::library::GemmArrayArguments::D()']]],
['d_5fa',['d_a',['../structcutlass_1_1reduction_1_1BatchedReductionTraits_1_1Params.html#af1b12ba220602692e84616b420b00f1c',1,'cutlass::reduction::BatchedReductionTraits::Params']]],
['d_5fc',['d_c',['../structcutlass_1_1reduction_1_1BatchedReductionTraits_1_1Params.html#ac79830eaf080ea0ffddd2100db6cf3e1',1,'cutlass::reduction::BatchedReductionTraits::Params']]],
['d_5fd',['d_d',['../structcutlass_1_1reduction_1_1BatchedReductionTraits_1_1Params.html#abf9744373a72f3819a616b5a5b3bff22',1,'cutlass::reduction::BatchedReductionTraits::Params']]],
['delta',['delta',['../structcutlass_1_1Distribution.html#a77613df810c3f8f68b595599802cedb4',1,'cutlass::Distribution']]],
['destination',['destination',['../structcutlass_1_1reduction_1_1kernel_1_1ReduceSplitK_1_1Params.html#a08089218798599f5f47184f8c94723cb',1,'cutlass::reduction::kernel::ReduceSplitK::Params']]],
['destination_5fref',['destination_ref',['../structcutlass_1_1epilogue_1_1threadblock_1_1DirectEpilogueTensorOp_1_1Params.html#a89d406e705b7817243e3aa9d4253bb14',1,'cutlass::epilogue::threadblock::DirectEpilogueTensorOp::Params']]],
['diag',['diag',['../structcutlass_1_1reference_1_1device_1_1detail_1_1TensorFillDiagonalFunc_1_1Params.html#abcbca40684cd478a53c0cc80c8e418e1',1,'cutlass::reference::device::detail::TensorFillDiagonalFunc::Params::diag()'],['../structcutlass_1_1reference_1_1device_1_1detail_1_1TensorUpdateDiagonalFunc_1_1Params.html#adc562519d503d235a49b11a8f2fc2bf6',1,'cutlass::reference::device::detail::TensorUpdateDiagonalFunc::Params::diag()'],['../structcutlass_1_1reference_1_1host_1_1detail_1_1TensorFillDiagonalFunc.html#a027d9ae77e068454e8df798018276c18',1,'cutlass::reference::host::detail::TensorFillDiagonalFunc::diag()']]],
['digits',['digits',['../structstd_1_1numeric__limits_3_01cutlass_1_1half__t_01_4.html#a92152311525685a53c6a0db4cb74f193',1,'std::numeric_limits< cutlass::half_t >']]],
['dst',['dst',['../structcutlass_1_1reference_1_1host_1_1detail_1_1TensorCopyIf.html#a0d94963e36e238233ddb550845b37004',1,'cutlass::reference::host::detail::TensorCopyIf']]],
['dynamic_5fsmem',['dynamic_smem',['../structcutlass_1_1KernelLaunchConfiguration.html#a4a6ac693d4284c84301279219623e2bc',1,'cutlass::KernelLaunchConfiguration']]]
];