|
Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <hgemm_traits.h>
Additional Inherited Members | |
Public Types inherited from cutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 > | |
| typedef half | ScalarA |
| The scalar for A. More... | |
| typedef half | ScalarB |
| The scalar for B. More... | |
| typedef half | ScalarC |
| The scalar for C. More... | |
| typedef half | ScalarD |
| The scalar for D. More... | |
| typedef OutputTile_ | OutputTile |
| The tile. More... | |
| typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half > | MultiplyAdd |
| The functor to do D = A*B + C. More... | |
| typedef MultiplyAdd::InstructionShape | InstructionShape |
| The shape of the instruction. More... | |
| typedef MultiplyAdd::AccumulatorsPerWarp | AccumulatorsPerWarp |
| The number of accumulators per warp. More... | |
| typedef MultiplyAdd::Accumulators | Accumulators |
| The accumulators. More... | |
| typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape | Warps |
| The number of warps. More... | |
Static Public Attributes inherited from cutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 > | |
| static int const | kWarpSize |
| The default warp size (32 threads per warp). More... | |
| static int const | kThreads |
| The numnber of threads. More... | |
| static int const | kScalarsPerLdgA |
| The number of scalars per LDG/STS/LDS for A. More... | |
| static int const | kScalarsPerStsA |
| static int const | kScalarsPerLdsA |
| static int const | kScalarsPerLdgB |
| The number of scalars per LDG/STS/LDS for B. More... | |
| static int const | kScalarsPerStsB |
| static int const | kScalarsPerLdsB |
| static int const | kScalarsPerLdgC |
| The number of scalars per LDG for C. More... | |
| static int const | kScalarsPerStgD |
| The number of scalars per STS/LDS/STG for D. More... | |
| static int const | kScalarsPerStsD |
| static int const | kScalarsPerLdsD |
| static int const | kAccumulatorsPerLdsA |
| The number of accumulators that are going to be fed from one LDS A/B. More... | |
| static int const | kAccumulatorsPerLdsB |
| static int const | kStages |
| The number of stages in shared memory to implement double, triple, more-buffering. More... | |
1.8.14