Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction. More...

#include <cutlass/fragment.h>
#include <cutlass/gemm/thread_multiply_add.h>

Classes
struct	cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
	Template performing matrix multiply-add operation within a thread. More...

Classes

Namespaces