56 typename AccumulatorsPerThread_>
70 ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, int8_t, int8_t, int>,
94 template <
typename OutputTile_,
typename AccumulatorsPerThread_>
95 struct IgemmConfig<OutputTile_, int8_t, AccumulatorsPerThread_>
108 ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, int8_t, int8_t, int>,
132 template <enum MatrixLayout::Kind kLayout_,
typename GemmConfig_>
137 template <
typename GemmConfig_>
144 static int const kScalarsPerStsA = 16;
166 Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kW * 4>,
176 template <enum MatrixLayout::Kind kLayout_,
typename GemmConfig_>
181 template <
typename GemmConfig_>
188 static int const kScalarsPerStsB = 16;
210 Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kH * 4>,
220 template <enum MatrixLayout::Kind kLayout_,
typename Iterator_>
223 template <
typename Iterator_>
228 template <
typename Iterator_>
235 template <enum MatrixLayout::Kind kLayout_,
typename Iterator_>
238 template <
typename Iterator_>
243 template <
typename Iterator_>
256 typename OutputTile_,
260 typename EpilogueFunctor_,
264 typename Index_ =
int>
281 typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
297 typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
306 typedef TileLoadIterator<
typename GemmTileTraitsHelperA::SharedLoadTileTraits,
307 typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
315 typedef TileLoadIterator<
typename GemmTileTraitsHelperB::SharedLoadTileTraits,
316 typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
335 template <
typename ScalarD_>
355 typename ScalarD_ = int,
361 typename Index_ = int,
368 AccumulatorsPerThread_,
372 typename Helper_::GemmConfig,
374 typename Helper_::GlobalLoadStreamA,
376 typename Helper_::GlobalLoadStreamB,
378 typename Helper_::SharedLoadStreamA,
380 typename Helper_::SharedLoadStreamB,
382 typename Helper_::Epilogue,
384 IdentityBlockSwizzle,
388 typename Helper_::ClearAccumulators> {};
Definition: load_store.h:42
TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
The iterator to load B from shared memory.
Definition: igemm_traits.h:319
Defines iterators for efficiently loading and storing to global memory.
GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
The iterator to load A from global memory.
Definition: igemm_traits.h:275
Transposes a fragment of data containing packed 8-bit integer elements.
Defines structural properties of complete GEMM computation.
GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
The stream to load B from global memory to shared memory.
Definition: igemm_traits.h:303
Definition: igemm_traits.h:133
TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
The iterator to store B to shared memory.
Definition: igemm_traits.h:300
IgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
Definition: igemm_traits.h:294
Definition: igemm_epilogue.h:290
IgemmContiguousGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
The traits class to build the iterator to load data from global memory for B^T.
Definition: igemm_traits.h:203
GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Base
The base config.
Definition: igemm_traits.h:141
IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > GemmConfig
The IGEMM config.
Definition: igemm_traits.h:267
Definition: gemm_shared_tile.h:38
Definition: tile_iterator.h:62
Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction.
Definition: gemm_global_tile.h:159
GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kH *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsB > SharedStoreTileTraits
The traits class to build the iterator to store data to shared memory for B^N.
Definition: igemm_traits.h:215
Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
Definition: gemm_global_stream.h:161
Definition: gemm_traits.h:273
GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
The iterator to load B from global memory.
Definition: igemm_traits.h:291
IgemmContiguousGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
The traits class to build the iterator to load data from global memory for A^N.
Definition: igemm_traits.h:159
int Scalar
Definition: igemm_traits.h:342
Describes layouts of matrices.
Definition: matrix_traits.h:35
IgemmTileTraitsHelperB< kLayoutB_, GemmConfig > GemmTileTraitsHelperB
The GEMM config for B.
Definition: igemm_traits.h:271
Definition: igemm_swizzle.h:38
Definition: igemm_traits.h:177
Definition: igemm_traits.h:265
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
The stream to load A from global memory to shared memory.
Definition: igemm_traits.h:287
SharedLoadStream< SharedLoadIteratorB, Copy< typename SharedLoadIteratorB::Fragment > > SharedLoadStreamB
The stream to load B from shared memory.
Definition: igemm_traits.h:322
Defines iterators for efficiently loading and storing tiles to and from shared memory.
Definition: matrix_traits.h:36
IgemmTileTraitsHelperA< kLayoutA_, GemmConfig > GemmTileTraitsHelperA
The GEMM config for A.
Definition: igemm_traits.h:269
Definition: gemm_shared_stream.h:44
Defines a type for restructuring a tile.
TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
The iterator to load A from shared memory.
Definition: igemm_traits.h:310
ClearAccumulators< typename MultiplyAdd::ScalarC > ClearAccumulators
The object to clear accumulators.
Definition: igemm_traits.h:327
Definition: gemm_traits.h:79
Definition: gemm_traits.h:137
Definition: matrix_traits.h:43
Definition: igemm_traits.h:57
Definition: igemm_global_tile.h:50
float Scalar
Definition: igemm_traits.h:337
Definition: gemm_traits.h:428
Definition: igemm_traits.h:370
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kW *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsA > SharedStoreTileTraits
The traits class to build the iterator to store data to shared memory for A^N.
Definition: igemm_traits.h:171
Definition: gemm_traits.h:346
Template performing matrix multiply-add operation within a thread.
Definition: thread_multiply_add.h:43
Definition: matrix_traits.h:36
Definition: gemm_traits.h:142
IgemmEpilogue< IgemmEpilogueTraits< GemmConfig, EpilogueFunctor_ > > Epilogue
The epilogue.
Definition: igemm_traits.h:330
IgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
The default transformer for A.
Definition: igemm_traits.h:278
Kind
Definition: matrix_traits.h:36
TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
The iterator to store A to shared memory.
Definition: igemm_traits.h:284
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40
Definition: matrix_traits.h:43
Implements a software-pipelined efficient GEMM.
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
Defines structural properties of the GEMM epilogue.
Definition: igemm_traits.h:336
Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point o...
Defines conversion operations among Fragments of different base type.
GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Base
The base config.
Definition: igemm_traits.h:185
SharedLoadStream< SharedLoadIteratorA, Copy< typename SharedLoadIteratorA::Fragment > > SharedLoadStreamA
The stream to load A from shared memory.
Definition: igemm_traits.h:313
Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load ea...
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
GemmConfig::MultiplyAdd MultiplyAdd
The multiply-add functor.
Definition: igemm_traits.h:325