
CUTLASS 2.0 Substantially refactored for - Better performance, particularly for native Turing Tensor Cores - Robust and durable templates spanning the design space - Encapsulated functionality embodying modern C++11 programming techniques - Optimized containers and data types for efficient, generic, portable device code Updates to: - Quick start guide - Documentation - Utilities - CUTLASS Profiler Native Turing Tensor Cores - Efficient GEMM kernels targeting Turing Tensor Cores - Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands Coverage of existing CUTLASS functionality: - GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs - Volta Tensor Cores through native mma.sync and through WMMA API - Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions - Batched GEMM operations - Complex-valued GEMMs Note: this commit and all that follow require a host compiler supporting C++11 or greater.
27 lines
6.3 KiB
JavaScript
27 lines
6.3 KiB
JavaScript
var searchData=
|
|
[
|
|
['fast_5fdivmod',['fast_divmod',['../namespacecutlass.html#ab9726f5a6b39322cf13cd916257fd9a7',1,'cutlass::fast_divmod(int &quo, int &rem, int src, int div, unsigned int mul, unsigned int shr)'],['../namespacecutlass.html#ae051c9a8142f8ccea23a3998a7c4a8dc',1,'cutlass::fast_divmod(int &quo, int64_t &rem, int64_t src, int div, unsigned int mul, unsigned int shr)']]],
|
|
['fetch',['fetch',['../classcutlass_1_1Semaphore.html#af7e78f85e6106c1c82c10bee0b76d454',1,'cutlass::Semaphore']]],
|
|
['fill',['fill',['../classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a0b3f29a6d79dd9cd55de367c96ecfc5c',1,'cutlass::Array< T, N, true >::fill()'],['../classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#a1c31d3673a48b2ed275bd56714fbcfbe',1,'cutlass::Array< T, N, false >::fill()'],['../structcutlass_1_1PredicateVector.html#a236bd1a822479750a809452fd58dd917',1,'cutlass::PredicateVector::fill()']]],
|
|
['find_5fdivisor',['find_divisor',['../namespacecutlass.html#aac63a770acddafd828619834cf2c99d3',1,'cutlass']]],
|
|
['find_5flog2',['find_log2',['../namespacecutlass.html#a58a119c3f7b33d97c43ae8c114004d9e',1,'cutlass']]],
|
|
['floor_5fpow_5f2',['floor_pow_2',['../namespacecutlass.html#ac16d8caf23537912eb02123c4bdacd14',1,'cutlass']]],
|
|
['fpclassify',['fpclassify',['../namespacecutlass.html#a48c34ea7c666e34c2d8c5eb913b1c89b',1,'cutlass']]],
|
|
['fragmentiteratorcomplextensorop',['FragmentIteratorComplexTensorOp',['../classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorComplexTensorOp_3_01WarpShape___00_01Operato8cf03c624cf3210c71b7cbd580b080f8.html#acad4794bc84fbe582f58b4328c0d84a4',1,'cutlass::epilogue::warp::FragmentIteratorComplexTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >']]],
|
|
['fragmentiteratorsimt',['FragmentIteratorSimt',['../classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorSimt_3_01WarpShape___00_01Operator___00_01la3f2abc523201c1b0228df99119ab88e1.html#a3dee783224164a08c24654aba39ddbdb',1,'cutlass::epilogue::warp::FragmentIteratorSimt< WarpShape_, Operator_, layout::RowMajor, MmaSimtPolicy_ >']]],
|
|
['fragmentiteratortensorop',['FragmentIteratorTensorOp',['../classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorTensorOp_3_01WarpShape___00_01OperatorShape_5e78dabe303f20d76b00c600aab61eda.html#a2fcc41a59fce4e01d3ea7917a73c5ec8',1,'cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >::FragmentIteratorTensorOp()'],['../classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorTensorOp_3_01WarpShape___00_01OperatorShape_e459aab140a2ce78336e584f95886726.html#ad690dd7f54217a7d1e2033e557306e8d',1,'cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::ColumnMajorInterleaved< InterleavedK > >::FragmentIteratorTensorOp()']]],
|
|
['fragmentiteratorvoltatensorop',['FragmentIteratorVoltaTensorOp',['../classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorVoltaTensorOp_3_01WarpShape___00_01gemm_1_1G16e08718cffa0989cce3fe8dbc4b075b.html#a11fda4be46e143b204c0a6e2325d8490',1,'cutlass::epilogue::warp::FragmentIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::FragmentIteratorVoltaTensorOp()'],['../classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorVoltaTensorOp_3_01WarpShape___00_01gemm_1_1Gdb805a2dc5571ac3b66e0fe6ffdcede2.html#ae7d7fe96fc26493283a5b3b35c9f1c56',1,'cutlass::epilogue::warp::FragmentIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::FragmentIteratorVoltaTensorOp()']]],
|
|
['fragmentiteratorwmmatensorop',['FragmentIteratorWmmaTensorOp',['../classcutlass_1_1epilogue_1_1warp_1_1FragmentIteratorWmmaTensorOp_3_01WarpShape___00_01OperatorShfdb1f120c6797383663f9fd11d0fc599.html#aaa5e511cd6d134528901955222e0bfe6',1,'cutlass::epilogue::warp::FragmentIteratorWmmaTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >']]],
|
|
['free',['free',['../namespacecutlass_1_1device__memory.html#ae1978e48f195a60ad01e94815b35bdac',1,'cutlass::device_memory']]],
|
|
['from_5freal_3c_20cutlass_3a_3acomplex_3c_20double_20_3e_20_3e',['from_real< cutlass::complex< double > >',['../namespacecutlass.html#a39965d02765d0e4aa491b7654d0ea7e4',1,'cutlass']]],
|
|
['from_5freal_3c_20cutlass_3a_3acomplex_3c_20float_20_3e_20_3e',['from_real< cutlass::complex< float > >',['../namespacecutlass.html#a7604eaf7d08b2362906b87ebeab6b761',1,'cutlass']]],
|
|
['from_5freal_3c_20cutlass_3a_3acomplex_3c_20half_5ft_20_3e_20_3e',['from_real< cutlass::complex< half_t > >',['../namespacecutlass.html#ac0bfc6f3e3e772bf15ecad24481194cb',1,'cutlass']]],
|
|
['from_5fstring',['from_string',['../namespacecutlass_1_1library.html#afd140a6d1d6a843abf778ba593a9264b',1,'cutlass::library']]],
|
|
['from_5fstring_3c_20layouttypeid_20_3e',['from_string< LayoutTypeID >',['../namespacecutlass_1_1library.html#a47a7cb0c24191de74f00d8be5787e939',1,'cutlass::library']]],
|
|
['from_5fstring_3c_20numerictypeid_20_3e',['from_string< NumericTypeID >',['../namespacecutlass_1_1library.html#a197f9c916d053d7e44af6155ef4f211a',1,'cutlass::library']]],
|
|
['from_5fstring_3c_20opcodeclassid_20_3e',['from_string< OpcodeClassID >',['../namespacecutlass_1_1library.html#afceeff02431874a5d43d23a7da1dcb72',1,'cutlass::library']]],
|
|
['from_5fstring_3c_20operationkind_20_3e',['from_string< OperationKind >',['../namespacecutlass_1_1library.html#a80d2fabcc7152472d6dc6885c55f7d7d',1,'cutlass::library']]],
|
|
['front',['front',['../classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a5adbb5bb00cca5e538cd1215d1de08a4',1,'cutlass::Array< T, N, true >::front()'],['../classcutlass_1_1Array_3_01T_00_01N_00_01true_01_4.html#a0a692495c5f7a7d098e60b9292a07e4f',1,'cutlass::Array< T, N, true >::front() const '],['../classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#aa89dd0781c0a81421589182a5402df8b',1,'cutlass::Array< T, N, false >::front()'],['../classcutlass_1_1Array_3_01T_00_01N_00_01false_01_4.html#ab7ebd33505e48ab3beb6b551e8b762e5',1,'cutlass::Array< T, N, false >::front() const ']]],
|
|
['functor_5fcaller',['functor_caller',['../structcutlass_1_1reduction_1_1BatchedReduction.html#a7c1d173cbe3abd93bd7bd4c4bf0e0d26',1,'cutlass::reduction::BatchedReduction']]]
|
|
];
|