cutlass/docs/search/functions_8.js
Andrew Kerr fb335f6a5f
CUTLASS 2.0 (#62)
CUTLASS 2.0

Substantially refactored for

- Better performance, particularly for native Turing Tensor Cores
- Robust and durable templates spanning the design space
- Encapsulated functionality embodying modern C++11 programming techniques
- Optimized containers and data types for efficient, generic, portable device code

Updates to:
- Quick start guide
- Documentation
- Utilities
- CUTLASS Profiler

Native Turing Tensor Cores
- Efficient GEMM kernels targeting Turing Tensor Cores
- Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands

Coverage of existing CUTLASS functionality:
- GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs
- Volta Tensor Cores through native mma.sync and through WMMA API
- Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions
- Batched GEMM operations
- Complex-valued GEMMs

Note: this commit and all that follow require a host compiler supporting C++11 or greater.
2019-11-19 16:55:34 -08:00

11 lines
2.7 KiB
JavaScript

var searchData=
[
['h',['h',['../structcutlass_1_1Tensor4DCoord.html#a71dda571a04037e564f238bb9a76f213',1,'cutlass::Tensor4DCoord::h() const '],['../structcutlass_1_1Tensor4DCoord.html#ae399c4159fb4e799c42bd882df2ccce7',1,'cutlass::Tensor4DCoord::h()']]],
['half_5ft',['half_t',['../structcutlass_1_1half__t.html#af51056b4916dfea5b3621cdeddab67e9',1,'cutlass::half_t::half_t()'],['../structcutlass_1_1half__t.html#a25f60fd3977cd11e26819cc1d3b0dc07',1,'cutlass::half_t::half_t(half const &x)'],['../structcutlass_1_1half__t.html#a81af113694141b181d96d81f562bb325',1,'cutlass::half_t::half_t(float x)'],['../structcutlass_1_1half__t.html#a4a4ece050c5bb376d419299c228b270e',1,'cutlass::half_t::half_t(double x)'],['../structcutlass_1_1half__t.html#ab6e3d4d165819edabf8b4757d42ccf3c',1,'cutlass::half_t::half_t(int x)'],['../structcutlass_1_1half__t.html#a338d628504f733a23e69f2a60204c1b8',1,'cutlass::half_t::half_t(unsigned x)']]],
['host_5fdata',['host_data',['../classcutlass_1_1HostTensor.html#a7d34307e09d4ea09d68abbf31fe33788',1,'cutlass::HostTensor::host_data()'],['../classcutlass_1_1HostTensor.html#adafe2bee53260d47bc60479c50953f57',1,'cutlass::HostTensor::host_data(LongIndex idx)'],['../classcutlass_1_1HostTensor.html#a3dd62ca461666b3ac7e690e2befecaae',1,'cutlass::HostTensor::host_data() const '],['../classcutlass_1_1HostTensor.html#a7741d51bf7d241d4821e52e47b704c69',1,'cutlass::HostTensor::host_data(LongIndex idx) const ']]],
['host_5fdata_5fptr_5foffset',['host_data_ptr_offset',['../classcutlass_1_1HostTensor.html#a252ce9281a40863f32be25d3b40d6373',1,'cutlass::HostTensor']]],
['host_5fref',['host_ref',['../classcutlass_1_1HostTensor.html#ac921be812016052c690d2dc808d415f1',1,'cutlass::HostTensor::host_ref(LongIndex ptr_element_offset=0)'],['../classcutlass_1_1HostTensor.html#a3c5b60678dfa105b10c87dfaab4cf395',1,'cutlass::HostTensor::host_ref(LongIndex ptr_element_offset=0) const ']]],
['host_5fview',['host_view',['../classcutlass_1_1HostTensor.html#a75ce60d02c9d8fdcbcaceecc6b3ec7fa',1,'cutlass::HostTensor::host_view(LongIndex ptr_element_offset=0)'],['../classcutlass_1_1HostTensor.html#a232e640ebb4b8cf21b44653d7800c5a7',1,'cutlass::HostTensor::host_view(LongIndex ptr_element_offset=0) const ']]],
['hosttensor',['HostTensor',['../classcutlass_1_1HostTensor.html#af50ce8b091c106ac88b75e15ab028868',1,'cutlass::HostTensor::HostTensor()'],['../classcutlass_1_1HostTensor.html#abf381d496b50aad3df4bb72e3d891bac',1,'cutlass::HostTensor::HostTensor(TensorCoord const &extent, bool device_backed=true)'],['../classcutlass_1_1HostTensor.html#a9d71a824b0e1a4f1e8a88abecdff10b0',1,'cutlass::HostTensor::HostTensor(TensorCoord const &extent, Layout const &layout, bool device_backed=true)']]]
];