|
| struct | AlignedStruct |
| |
| struct | ComputeOffsetFromShape |
| | Compute the offset for the given coordinates in a cube. More...
|
| |
| struct | ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > |
| | Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
|
| |
| struct | ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > |
| | Compute the offset for the given coordinates in a cube with a depth of 1. More...
|
| |
| struct | ComputeOffsetFromStrides |
| | Compute the offset for the given coordinates in a cube. More...
|
| |
| struct | ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > |
| | Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
|
| |
| struct | ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > |
| | Compute the offset for the given coordinates in a cube with a depth of 1. More...
|
| |
| struct | ComputeThreadOffsetFromStrides |
| | Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_. More...
|
| |
| struct | ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > |
| | Specialization for D=1 and C=1. More...
|
| |
| struct | ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > |
| | Specialization for D=1. More...
|
| |
| struct | ConstPredicateTileAdapter |
| | Adapter to enable random access to predicates via logical coordinate within a tile. More...
|
| |
| struct | Convert |
| |
| struct | Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > |
| |
| struct | Coord |
| | Statically-sized array specifying Coords within a tensor. More...
|
| |
| struct | Copy |
| |
| struct | divide_assert |
| |
| struct | Extent |
| | Returns the extent of a scalar or vector. More...
|
| |
| struct | Extent< Vector< T, Lanes > > |
| | Returns the number of lanes of a vector if need be. More...
|
| |
| struct | Extent< Vector< T, Lanes > const > |
| | Returns the number of lanes of a vector if need be. More...
|
| |
| struct | Fragment |
| | A template defining Fragment Concept. More...
|
| |
| struct | FragmentConstIterator |
| |
| struct | FragmentIterator |
| | A template defining Fragment Iterator Concept. More...
|
| |
| struct | FragmentLoad |
| |
| struct | FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > |
| |
| struct | FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > |
| |
| struct | FragmentStore |
| |
| struct | FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > |
| |
| struct | FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > |
| |
| struct | GemmOperand |
| | Gemm operand - D = A * B + C. More...
|
| |
| struct | Identity |
| | Describes identity elements. More...
|
| |
| struct | is_pow2 |
| |
| struct | IteratorAdvance |
| | Specifies dimension in which post-increment accesses advance. More...
|
| |
| struct | IteratorFragment |
| | Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. More...
|
| |
| struct | Load |
| |
| struct | Load< double, 2, Memory_, true, 16 > |
| |
| struct | Load< Scalar_, Lanes_, Memory_, true, 16 > |
| |
| struct | Load< Scalar_, Lanes_, Memory_, true, 4 > |
| |
| struct | Load< Scalar_, Lanes_, Memory_, true, 8 > |
| |
| struct | log2_down |
| |
| struct | log2_down< N, 1, Count > |
| |
| struct | log2_up |
| |
| struct | log2_up< N, 1, Count > |
| |
| struct | MatrixLayout |
| | Describes layouts of matrices. More...
|
| |
| struct | MemorySpace |
| | Enum to specify which memory space data resides in. More...
|
| |
| struct | PredicateTileAdapter |
| | Adapter to enable random access to predicates via logical coordinate within a tile. More...
|
| |
| struct | PredicateVector |
| | Statically sized array of bits implementing. More...
|
| |
| struct | ReshapeTile |
| |
| struct | ReshapeTile< Tile_, kAccessSize_, true > |
| |
| struct | Shape |
| | A Shape implementing Layout Concept describing the dimensions of a cube. More...
|
| |
| struct | ShapeAdd |
| |
| struct | ShapeCount |
| | Compute derived counted of a Layout Concept based class. More...
|
| |
| struct | ShapeDiv |
| |
| struct | ShapeMax |
| |
| struct | ShapeMin |
| |
| struct | ShapeMul |
| |
| struct | ShapeScale |
| |
| struct | ShapeStrides |
| |
| struct | ShapeSub |
| |
| struct | sqrt_est |
| |
| struct | StorageType |
| |
| struct | StorageType< 1 > |
| |
| struct | StorageType< 2 > |
| |
| struct | StorageType< 4 > |
| |
| struct | Store |
| |
| struct | Store< double, 2, Memory_, true, 16 > |
| |
| struct | Store< Scalar_, Lanes_, Memory_, true, 16 > |
| |
| struct | Store< Scalar_, Lanes_, Memory_, true, 4 > |
| |
| struct | Store< Scalar_, Lanes_, Memory_, true, 8 > |
| |
| class | TensorRef |
| | Structure modeling a pointer and stride into a tensor. More...
|
| |
| class | TensorView |
| | Host-side reference implementation of tensor operations. More...
|
| |
| struct | TiledThreadOffset |
| | Basic thread offset function computed from a thread shape. More...
|
| |
| struct | TileIteratorBase |
| | Iterator for accessing a stripmined tile in memory. More...
|
| |
| struct | TileLoadIterator |
| | An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
|
| |
| struct | TileStoreIterator |
| | An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
|
| |
| struct | TileTraits |
| | A template defining Tile Traits Concept. More...
|
| |
| struct | TileTraitsContiguousMajor |
| |
| struct | TileTraitsStandard |
| | Chooses 'best' shape to enable warp raking along contiguous dimension if possible. More...
|
| |
| struct | TileTraitsStrideMajor |
| |
| struct | TileTraitsWarpRake |
| | Tiling in which warps rake across the contiguous dimension. More...
|
| |
| struct | TrivialPredicateTileAdapter |
| | Always returns true predicate. More...
|
| |
| union | Vector |
| |
| union | Vector< half, kLanes_ > |
| |
| struct | Vectorize |
| |
| struct | Vectorize< Element_, 1 > |
| |
| struct | VectorTraits |
| | Traits describing properties of vectors and scalar-as-vectors. More...
|
| |
| struct | VectorTraits< Vector< T, Lanes > > |
| | Partial specialization for actual cutlass::Vector. More...
|
| |
| struct | VectorTraits< Vector< T, Lanes > const > |
| | Partial specialization for actual cutlass::Vector. More...
|
| |
|
| CUTLASS_HOST_DEVICE Coord< 1 > | make_Coord (int _0) |
| | Helper to make a 2-element coordinate. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 2 > | make_Coord (int _0, int _1) |
| | Helper to make a 2-element coordinate. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 3 > | make_Coord (int _0, int _1, int _2) |
| | Helper to make a 3-element coordinate. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 4 > | make_Coord (int _0, int _1, int _2, int _3) |
| | Helper to make a 4-element coordinate. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 2 > | get_Coord_hw (Coord< 3 > const &coord) |
| | Getter. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 2 > | get_Coord_hw (Coord< 4 > const &coord) |
| | Getter. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 3 > | get_Coord_hwc (Coord< 4 > const &coord) |
| | Getter. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 3 > | get_Coord_dhw (Coord< 4 > const &coord) |
| | Getter. More...
|
| |
| template<typename InputIterator , typename Fragment > |
| CUTLASS_HOST_DEVICE void | iterator_load (InputIterator &iterator, Fragment &fragment) |
| | Loads a fragment from an input iterator. More...
|
| |
| template<typename InputIterator , typename Fragment > |
| CUTLASS_DEVICE void | shared_iterator_load (InputIterator &iterator, Fragment &fragment) |
| | Loads a fragment from a shared memory input iterator. More...
|
| |
| template<typename InputIterator , typename Fragment > |
| CUTLASS_DEVICE void | shared_iterator_load (InputIterator &iterator, Fragment &fragment, int d) |
| | Loads a fragment from a shared memory input iterator. More...
|
| |
| template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter > |
| CUTLASS_HOST_DEVICE void | iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter) |
| | Loads a fragment from an input iterator, masked by a predicate iterator. More...
|
| |
| template<typename InputIterator , typename Fragment > |
| CUTLASS_HOST_DEVICE void | iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0) |
| | Loads a fragment from an input iterator. More...
|
| |
| template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter > |
| CUTLASS_HOST_DEVICE void | iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it) |
| | Loads a fragment from an input iterator. More...
|
| |
| template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter > |
| CUTLASS_HOST_DEVICE void | iterator_load (InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter) |
| |
| template<typename InputIterator , typename Fragment > |
| CUTLASS_HOST_DEVICE void | iterator_load (InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0) |
| | Loads a fragment from an input iterator. More...
|
| |
| template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter > |
| CUTLASS_HOST_DEVICE void | iterator_load (InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it) |
| | Loads a fragment from an input iterator. More...
|
| |
| template<typename OutputIterator , typename Fragment > |
| CUTLASS_HOST_DEVICE void | iterator_store (OutputIterator &iterator, Fragment &fragment) |
| | Stores a fragment to an output iterator. More...
|
| |
| template<typename OutputIterator , typename Fragment > |
| CUTLASS_DEVICE void | shared_iterator_store (OutputIterator &iterator, Fragment const &fragment) |
| | Stores a fragment to a shared memory output iterator. More...
|
| |
| template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter > |
| CUTLASS_HOST_DEVICE void | iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter) |
| | Stores a fragment to an output iterator, masked by a predicate iterator. More...
|
| |
| template<typename OutputIterator , typename Fragment > |
| CUTLASS_HOST_DEVICE void | iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0) |
| | Stores a fragment to an output iterator. More...
|
| |
| template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter > |
| CUTLASS_HOST_DEVICE void | iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it) |
| | Stores a fragment to an output iterator. More...
|
| |
| template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter > |
| CUTLASS_HOST_DEVICE void | iterator_store (OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter) |
| | Stores a fragment to an output iterator, masked by a predicate iterator. More...
|
| |
| template<typename OutputIterator , typename Fragment > |
| CUTLASS_HOST_DEVICE void | iterator_store (OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0) |
| | Stores a fragment to an output iterator. More...
|
| |
| template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter > |
| CUTLASS_HOST_DEVICE void | iterator_store (OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it) |
| | Stores a fragment to an output iterator. More...
|
| |
| template<typename dividend_t , typename divisor_t > |
| CUTLASS_HOST_DEVICE dividend_t | round_nearest (dividend_t dividend, divisor_t divisor) |
| |
| template<typename value_t > |
| CUTLASS_HOST_DEVICE value_t | gcd (value_t a, value_t b) |
| |
| template<typename value_t > |
| CUTLASS_HOST_DEVICE value_t | lcm (value_t a, value_t b) |
| |
| __host__ CUTLASS_DEVICE cudaError_t | cuda_perror_impl (cudaError_t error, const char *filename, int line) |
| | The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
|
| |
| template<> |
| struct | __align__ (1) AlignedStruct< 1 > |
| |
| template<> |
| struct | __align__ (2) AlignedStruct< 2 > |
| |
| template<> |
| struct | __align__ (4) AlignedStruct< 4 > |
| |
| template<> |
| struct | __align__ (8) AlignedStruct< 8 > |
| |
| template<> |
| struct | __align__ (16) AlignedStruct< 16 > |
| |
| template<> |
| struct | __align__ (32) AlignedStruct< 32 > |
| |
| template<> |
| struct | __align__ (64) AlignedStruct< 64 > |
| |
| template<typename Scalar_ > |
| CUTLASS_DEVICE void | make_zero (Scalar_ &x) |
| |
| template<typename Scalar_ , int kLanes_> |
| CUTLASS_DEVICE void | make_zero (Vector< Scalar_, kLanes_ > &vec) |
| |