Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <igemm_epilogue.h>
Public Types | |
typedef GemmEpilogue< GemmEpilogueTraits_ > | Base |
The base class. More... | |
![]() | |
typedef GemmEpilogueTraits_ | Traits |
The traits class. More... | |
typedef Traits::Params | Params |
The params. More... | |
typedef Traits::SharedStorage | SharedStorage |
The shared storage. More... | |
typedef Traits::OutputTile | OutputTile |
The output tile. More... | |
typedef Traits::Iterations | Iterations |
The number of iterations. More... | |
typedef Traits::Accumulators | Accumulators |
The accumulators. More... | |
typedef Traits::Scalar | Scalar |
The scalar. More... | |
typedef Traits::Functor | Functor |
The functor in charge of the math. More... | |
typedef Traits::GlobalLoadIteratorC | GlobalLoadIteratorC |
We do not support 3D or 4D shapes. More... | |
typedef Traits::GlobalTransformerC | GlobalTransformerC |
The transformer for C. More... | |
typedef Traits::GlobalTransformerD | GlobalTransformerD |
The transformer for D. More... | |
typedef Traits::GlobalStoreIteratorD | GlobalStoreIteratorD |
The iterator for D in global memory. More... | |
typedef Traits::SharedStoreIteratorD | SharedStoreIteratorD |
The iterator to store D in shared memory. More... | |
typedef Traits::SharedStoreTransformerD | SharedStoreTransformerD |
The shared store transformer for D. More... | |
typedef Traits::SharedLoadIteratorD | SharedLoadIteratorD |
The iterator to load D in shared memory. More... | |
typedef Copy< typename SharedLoadIteratorD::Fragment > | SharedLoadTransformerD |
The shared load transformer for D. More... | |
typedef Traits::Index | Index |
The index. More... | |
typedef GlobalLoadIteratorC::Scalar | ScalarC |
The scalar for C. More... | |
typedef GlobalStoreIteratorD::Scalar | ScalarD |
The scalar for D. More... | |
Public Member Functions | |
CUTLASS_DEVICE | IgemmEpilogue (typename Base::Params const ¶ms_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_) |
Ctor. More... | |
![]() | |
CUTLASS_DEVICE | GemmEpilogue (Params const ¶ms_, SharedStorage &shared_storage_, Index m_, Index n_) |
Ctor. More... | |
CUTLASS_DEVICE void | epilogue (Coord< 3 > const &block, Accumulators &accumulators) |
Execute the epilogue. More... | |
template<bool kBetaIsZero_> | |
CUTLASS_DEVICE void | epilogue_with_or_without_beta (Coord< 3 > const &block, Accumulators &accumulators) |
CUTLASS_DEVICE void | shared_load_fence () |
The memory fence for shared loads. More... | |
CUTLASS_DEVICE void | shared_store_fence () |
The memory fence for shared stores. More... | |
Additional Inherited Members | |
![]() | |
Params const & | params |
The params. More... | |
SharedStorage & | shared_storage |
The shared storage. More... | |
Index | m |
The dimensions of the GEMM. More... | |
Index | n |
typedef GemmEpilogue<GemmEpilogueTraits_> cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >::Base |
|
inline |