Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | List of all members
cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ > Struct Template Reference

Template performing matrix multiply-add operation within a thread.

#include <thread_multiply_add.h>

Public Types

typedef Shape< 1, 1, 1, 1 > InstructionShape
 The shape of the instruction. More...
 
typedef AccumulatorsPerThread_ AccumulatorsPerThread
 The number of accumulators per thread. More...
 
typedef ThreadsPerWarp_ ThreadsPerWarp
 The number of threads per warp. More...
 
typedef ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
 The number of accumulators per warp. More...
 
typedef ScalarA_ ScalarA
 The type for A. More...
 
typedef Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
 The fragment for A. More...
 
typedef ScalarB_ ScalarB
 The type for B. More...
 
typedef Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
 The fragment for B. More...
 
typedef ScalarC_ ScalarC
 The type for C and D. More...
 
typedef Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW, 16 > Accumulators
 The accumulators. More...
 

Public Member Functions

CUTLASS_DEVICE ThreadMultiplyAdd ()
 Ctor. More...
 
CUTLASS_DEVICE void multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
 Multiply : d = a*b + c. More...
 

Member Typedef Documentation

◆ Accumulators

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
typedef Fragment<ScalarC, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW, 16> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::Accumulators

◆ AccumulatorsPerThread

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
typedef AccumulatorsPerThread_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::AccumulatorsPerThread

◆ AccumulatorsPerWarp

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
typedef ShapeMul<AccumulatorsPerThread, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::AccumulatorsPerWarp

◆ FragmentA

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
typedef Fragment<ScalarA, AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::FragmentA

◆ FragmentB

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
typedef Fragment<ScalarB, AccumulatorsPerThread::kH> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::FragmentB

◆ InstructionShape

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
typedef Shape<1, 1, 1, 1> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::InstructionShape

◆ ScalarA

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
typedef ScalarA_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ScalarA

◆ ScalarB

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
typedef ScalarB_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ScalarB

◆ ScalarC

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
typedef ScalarC_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ScalarC

◆ ThreadsPerWarp

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ThreadsPerWarp

Constructor & Destructor Documentation

◆ ThreadMultiplyAdd()

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
CUTLASS_DEVICE cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ThreadMultiplyAdd ( )
inline

Member Function Documentation

◆ multiply_add()

template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
CUTLASS_DEVICE void cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::multiply_add ( FragmentA const &  a,
FragmentB const &  b,
Accumulators const &  c,
Accumulators d 
)
inline

The documentation for this struct was generated from the following file: