|
struct | AlignedStruct |
|
struct | ComputeOffsetFromShape |
| Compute the offset for the given coordinates in a cube. More...
|
|
struct | ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > |
| Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
|
|
struct | ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > |
| Compute the offset for the given coordinates in a cube with a depth of 1. More...
|
|
struct | ComputeOffsetFromStrides |
| Compute the offset for the given coordinates in a cube. More...
|
|
struct | ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > |
| Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
|
|
struct | ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > |
| Compute the offset for the given coordinates in a cube with a depth of 1. More...
|
|
struct | ComputeThreadOffsetFromStrides |
| Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_. More...
|
|
struct | ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > |
| Specialization for D=1 and C=1. More...
|
|
struct | ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > |
| Specialization for D=1. More...
|
|
struct | ConstPredicateTileAdapter |
| Adapter to enable random access to predicates via logical coordinate within a tile. More...
|
|
struct | Convert |
|
struct | Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > |
|
struct | Coord |
| Statically-sized array specifying Coords within a tensor. More...
|
|
struct | Copy |
|
struct | divide_assert |
|
struct | Extent |
| Returns the extent of a scalar or vector. More...
|
|
struct | Extent< Vector< T, Lanes > > |
| Returns the number of lanes of a vector if need be. More...
|
|
struct | Extent< Vector< T, Lanes > const > |
| Returns the number of lanes of a vector if need be. More...
|
|
struct | Fragment |
| A template defining Fragment Concept. More...
|
|
struct | FragmentConstIterator |
|
struct | FragmentIterator |
| A template defining Fragment Iterator Concept. More...
|
|
struct | FragmentLoad |
|
struct | FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > |
|
struct | FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > |
|
struct | FragmentStore |
|
struct | FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > |
|
struct | FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > |
|
struct | GemmOperand |
| Gemm operand - D = A * B + C. More...
|
|
struct | Identity |
| Describes identity elements. More...
|
|
struct | is_pow2 |
|
struct | IteratorAdvance |
| Specifies dimension in which post-increment accesses advance. More...
|
|
struct | IteratorFragment |
| Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. More...
|
|
struct | Load |
|
struct | Load< double, 2, Memory_, true, 16 > |
|
struct | Load< Scalar_, Lanes_, Memory_, true, 16 > |
|
struct | Load< Scalar_, Lanes_, Memory_, true, 4 > |
|
struct | Load< Scalar_, Lanes_, Memory_, true, 8 > |
|
struct | log2_down |
|
struct | log2_down< N, 1, Count > |
|
struct | log2_up |
|
struct | log2_up< N, 1, Count > |
|
struct | MatrixLayout |
| Describes layouts of matrices. More...
|
|
struct | MemorySpace |
| Enum to specify which memory space data resides in. More...
|
|
struct | PredicateTileAdapter |
| Adapter to enable random access to predicates via logical coordinate within a tile. More...
|
|
struct | PredicateVector |
| Statically sized array of bits implementing. More...
|
|
struct | ReshapeTile |
|
struct | ReshapeTile< Tile_, kAccessSize_, true > |
|
struct | Shape |
| A Shape implementing Layout Concept describing the dimensions of a cube. More...
|
|
struct | ShapeAdd |
|
struct | ShapeCount |
| Compute derived counted of a Layout Concept based class. More...
|
|
struct | ShapeDiv |
|
struct | ShapeMax |
|
struct | ShapeMin |
|
struct | ShapeMul |
|
struct | ShapeScale |
|
struct | ShapeStrides |
|
struct | ShapeSub |
|
struct | sqrt_est |
|
struct | StorageType |
|
struct | StorageType< 1 > |
|
struct | StorageType< 2 > |
|
struct | StorageType< 4 > |
|
struct | Store |
|
struct | Store< double, 2, Memory_, true, 16 > |
|
struct | Store< Scalar_, Lanes_, Memory_, true, 16 > |
|
struct | Store< Scalar_, Lanes_, Memory_, true, 4 > |
|
struct | Store< Scalar_, Lanes_, Memory_, true, 8 > |
|
class | TensorRef |
| Structure modeling a pointer and stride into a tensor. More...
|
|
class | TensorView |
| Host-side reference implementation of tensor operations. More...
|
|
struct | TiledThreadOffset |
| Basic thread offset function computed from a thread shape. More...
|
|
struct | TileIteratorBase |
| Iterator for accessing a stripmined tile in memory. More...
|
|
struct | TileLoadIterator |
| An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
|
|
struct | TileStoreIterator |
| An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
|
|
struct | TileTraits |
| A template defining Tile Traits Concept. More...
|
|
struct | TileTraitsContiguousMajor |
|
struct | TileTraitsStandard |
| Chooses 'best' shape to enable warp raking along contiguous dimension if possible. More...
|
|
struct | TileTraitsStrideMajor |
|
struct | TileTraitsWarpRake |
| Tiling in which warps rake across the contiguous dimension. More...
|
|
struct | TrivialPredicateTileAdapter |
| Always returns true predicate. More...
|
|
union | Vector |
|
union | Vector< half, kLanes_ > |
|
struct | Vectorize |
|
struct | Vectorize< Element_, 1 > |
|
struct | VectorTraits |
| Traits describing properties of vectors and scalar-as-vectors. More...
|
|
struct | VectorTraits< Vector< T, Lanes > > |
| Partial specialization for actual cutlass::Vector. More...
|
|
struct | VectorTraits< Vector< T, Lanes > const > |
| Partial specialization for actual cutlass::Vector. More...
|
|
|
CUTLASS_HOST_DEVICE Coord< 1 > | make_Coord (int _0) |
| Helper to make a 2-element coordinate. More...
|
|
CUTLASS_HOST_DEVICE Coord< 2 > | make_Coord (int _0, int _1) |
| Helper to make a 2-element coordinate. More...
|
|
CUTLASS_HOST_DEVICE Coord< 3 > | make_Coord (int _0, int _1, int _2) |
| Helper to make a 3-element coordinate. More...
|
|
CUTLASS_HOST_DEVICE Coord< 4 > | make_Coord (int _0, int _1, int _2, int _3) |
| Helper to make a 4-element coordinate. More...
|
|
CUTLASS_HOST_DEVICE Coord< 2 > | get_Coord_hw (Coord< 3 > const &coord) |
| Getter. More...
|
|
CUTLASS_HOST_DEVICE Coord< 2 > | get_Coord_hw (Coord< 4 > const &coord) |
| Getter. More...
|
|
CUTLASS_HOST_DEVICE Coord< 3 > | get_Coord_hwc (Coord< 4 > const &coord) |
| Getter. More...
|
|
CUTLASS_HOST_DEVICE Coord< 3 > | get_Coord_dhw (Coord< 4 > const &coord) |
| Getter. More...
|
|
template<typename InputIterator , typename Fragment > |
CUTLASS_HOST_DEVICE void | iterator_load (InputIterator &iterator, Fragment &fragment) |
| Loads a fragment from an input iterator. More...
|
|
template<typename InputIterator , typename Fragment > |
CUTLASS_DEVICE void | shared_iterator_load (InputIterator &iterator, Fragment &fragment) |
| Loads a fragment from a shared memory input iterator. More...
|
|
template<typename InputIterator , typename Fragment > |
CUTLASS_DEVICE void | shared_iterator_load (InputIterator &iterator, Fragment &fragment, int d) |
| Loads a fragment from a shared memory input iterator. More...
|
|
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter > |
CUTLASS_HOST_DEVICE void | iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter) |
| Loads a fragment from an input iterator, masked by a predicate iterator. More...
|
|
template<typename InputIterator , typename Fragment > |
CUTLASS_HOST_DEVICE void | iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0) |
| Loads a fragment from an input iterator. More...
|
|
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter > |
CUTLASS_HOST_DEVICE void | iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it) |
| Loads a fragment from an input iterator. More...
|
|
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter > |
CUTLASS_HOST_DEVICE void | iterator_load (InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter) |
|
template<typename InputIterator , typename Fragment > |
CUTLASS_HOST_DEVICE void | iterator_load (InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0) |
| Loads a fragment from an input iterator. More...
|
|
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter > |
CUTLASS_HOST_DEVICE void | iterator_load (InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it) |
| Loads a fragment from an input iterator. More...
|
|
template<typename OutputIterator , typename Fragment > |
CUTLASS_HOST_DEVICE void | iterator_store (OutputIterator &iterator, Fragment &fragment) |
| Stores a fragment to an output iterator. More...
|
|
template<typename OutputIterator , typename Fragment > |
CUTLASS_DEVICE void | shared_iterator_store (OutputIterator &iterator, Fragment const &fragment) |
| Stores a fragment to a shared memory output iterator. More...
|
|
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter > |
CUTLASS_HOST_DEVICE void | iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter) |
| Stores a fragment to an output iterator, masked by a predicate iterator. More...
|
|
template<typename OutputIterator , typename Fragment > |
CUTLASS_HOST_DEVICE void | iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0) |
| Stores a fragment to an output iterator. More...
|
|
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter > |
CUTLASS_HOST_DEVICE void | iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it) |
| Stores a fragment to an output iterator. More...
|
|
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter > |
CUTLASS_HOST_DEVICE void | iterator_store (OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter) |
| Stores a fragment to an output iterator, masked by a predicate iterator. More...
|
|
template<typename OutputIterator , typename Fragment > |
CUTLASS_HOST_DEVICE void | iterator_store (OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0) |
| Stores a fragment to an output iterator. More...
|
|
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter > |
CUTLASS_HOST_DEVICE void | iterator_store (OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it) |
| Stores a fragment to an output iterator. More...
|
|
template<typename dividend_t , typename divisor_t > |
CUTLASS_HOST_DEVICE dividend_t | round_nearest (dividend_t dividend, divisor_t divisor) |
|
template<typename value_t > |
CUTLASS_HOST_DEVICE value_t | gcd (value_t a, value_t b) |
|
template<typename value_t > |
CUTLASS_HOST_DEVICE value_t | lcm (value_t a, value_t b) |
|
__host__ CUTLASS_DEVICE cudaError_t | cuda_perror_impl (cudaError_t error, const char *filename, int line) |
| The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
|
|
template<> |
struct | __align__ (1) AlignedStruct< 1 > |
|
template<> |
struct | __align__ (2) AlignedStruct< 2 > |
|
template<> |
struct | __align__ (4) AlignedStruct< 4 > |
|
template<> |
struct | __align__ (8) AlignedStruct< 8 > |
|
template<> |
struct | __align__ (16) AlignedStruct< 16 > |
|
template<> |
struct | __align__ (32) AlignedStruct< 32 > |
|
template<> |
struct | __align__ (64) AlignedStruct< 64 > |
|
template<typename Scalar_ > |
CUTLASS_DEVICE void | make_zero (Scalar_ &x) |
|
template<typename Scalar_ , int kLanes_> |
CUTLASS_DEVICE void | make_zero (Vector< Scalar_, kLanes_ > &vec) |
|