374882be53f2a7558aeb6c4955b8b9da75b29ecf/docs/wmma__gemm__shared__tile_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include <cutlass/wmma_matrix.h>
 #ifdef CUTLASS_USE_WMMA_API

 #include <cutlass/gemm/gemm_operand.h>
 #include <cutlass/reshape_tile.h>

 namespace cutlass {
 namespace gemm {

 template <class>
 struct Debug {};


 template <MatrixLayout::Kind kLayout_,
           typename Scalar_,
           typename Tile_,
           typename Warps_,
           int kWarpStride_,
           typename Iterations_,
           typename Delta_,
           typename WmmaShape_>
 struct WmmaGemmSharedLoadTileATraits {
   static GemmOperand::Kind const kOperand = GemmOperand::kA;
   static MatrixLayout::Kind const kLayout = kLayout_;
   typedef Scalar_ Scalar;
   typedef Scalar const* Pointer;
   static int const kAccessSize = 1;
   typedef Tile_ Tile;
   typedef Warps_ Warps;
   static int const kWarpStride = kWarpStride_;
   typedef Iterations_ Iterations;
   typedef Delta_ Delta;
   typedef Delta_ ImmediateOffsetStrides;
   typedef WmmaShape_ WmmaShape;
   static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
   struct ThreadOffset {
     CUTLASS_HOST_DEVICE
     Coord<4> operator()() const {
       // The warp id.
       int const warp = threadIdx.x / kWarpSize;
       // The offset.
       int const offset = warp % Warps::kW * kWarpStride;
       return make_Coord(0, 0, offset, 0);
     }
   };
 };


 template <MatrixLayout::Kind kLayout_,
           typename Scalar_,
           typename Tile_,
           typename Warps_,
           int kWarpStride_,
           typename Iterations_,
           typename Delta_,
           typename WmmaShape_>
 struct WmmaGemmSharedLoadTileBTraits {
   static GemmOperand::Kind const kOperand = GemmOperand::kB;
   static MatrixLayout::Kind const kLayout = kLayout_;
   typedef Scalar_ Scalar;
   typedef Scalar const* Pointer;
   static int const kAccessSize = 1;
   typedef Tile_ Tile;
   typedef Warps_ Warps;
   static int const kWarpStride = kWarpStride_;
   typedef Iterations_ Iterations;
   typedef Delta_ Delta;
   typedef Delta_ ImmediateOffsetStrides;
   typedef WmmaShape_ WmmaShape;
   static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
   struct ThreadOffset {
     CUTLASS_HOST_DEVICE
     Coord<4> operator()() const {
       // The warp id.
       int const warp = threadIdx.x / kWarpSize;
       // The offset.
       int const offset = warp / Warps::kW * kWarpStride;
       return make_Coord(0, 0, offset, 0);
     }
   };
 };


 template <MatrixLayout::Kind kLayout_,
           typename Scalar_,
           typename OutputTile_,
           typename Warps_,
           typename WmmaShape_,
           int kSkew_ = 0>
 struct WmmaGemmSharedStoreTileDTraits {
   static GemmOperand::Kind const kOperand = GemmOperand::kC;
   static MatrixLayout::Kind const kLayout = kLayout_;
   typedef Scalar_ Scalar;
   // The access size
   static int const kAccessSize = 1;
   typedef Scalar* Pointer;
   typedef Warps_ Warps;
   typedef WmmaShape_ WmmaShape;
   static int const kSkew = kSkew_;
   static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
   typedef Shape<1, Warps_::kH * WmmaShape_::kH, OutputTile_::kW + kSkew_> Tile;
   typedef Shape<1, 1, OutputTile_::kW / Warps::kW / WmmaShape_::kW> Iterations;
   typedef Shape<0, 0, Warps::kW * WmmaShape_::kW, 0> Delta;
   typedef Shape<0, 0, Warps::kW * WmmaShape_::kW, 0> ImmediateOffsetStrides;

   struct ThreadOffset {
     CUTLASS_HOST_DEVICE
     Coord<4> operator()() const {
       // The warp id.
       int const warp = threadIdx.x / kWarpSize;
       // The starting column.
       int const h = warp / Warps::kW * WmmaShape::kH;
       // The w.
       int const w = warp % Warps::kW * WmmaShape::kW;
       // The offset.
       int const offset = h * Tile::kW + w;
       return make_Coord(0, 0, offset, 0);
     }
   };
 };


 template <typename Scalar_, typename Tile_, typename Threads_, int kScalarsPerLds_>
 struct WmmaGemmSharedLoadTileDTraits {
   typedef Scalar_ Scalar;
   typedef Scalar const* Pointer;
   static int const kAccessSize = kScalarsPerLds_;
   typedef typename ReshapeTile<Tile_, kScalarsPerLds_>::Tile Tile;
   typedef typename ReshapeThreads<Tile, Threads_>::Threads Threads;
   typedef Shape<1, Tile::kW * Tile::kC, Tile::kC> ThreadsStrides;
   static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;

   typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kScalarsPerLds_> Delta;
   typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kScalarsPerLds_>
       ImmediateOffsetStrides;
   typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kScalarsPerLds_>
       Iterations;

   struct ThreadOffset {
     CUTLASS_HOST_DEVICE
     Coord<4> operator()() const {
       // The offset.
       int const offset = ComputeThreadOffsetFromStrides<Threads, ThreadsStrides>::get();
       return make_Coord(0, 0, offset, 0);
     }
   };
 };


 }  // namespace gemm
 }  // namespace cutlass

 #endif  // defined CUTLASS_USE_WMMA_API
cutlass::ComputeThreadOffsetFromStrides::get
static CUTLASS_DEVICE int get()
Definition: shape.h:253

wmma_matrix.h
Abstractions for loading and storing matrices using the CUDA WMMA API.

cutlass::MemorySpace::kShared
Definition: load_store.h:42

cutlass
Definition: convert.h:33

cutlass::make_Coord
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241

cutlass::GemmOperand::kC
Definition: matrix_traits.h:43

cutlass::MemorySpace::Kind
Kind
Definition: load_store.h:40

reshape_tile.h
Defines a type for restructuring a tile.

gemm_operand.h
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...

cutlass::GemmOperand::kB
Definition: matrix_traits.h:43

CUTLASS_HOST_DEVICE
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46

cutlass::MatrixLayout::Kind
Kind
Definition: matrix_traits.h:36

cutlass::ReshapeTile::Tile
Tile_ Tile
Definition: reshape_tile.h:43

cutlass::GemmOperand::Kind
Kind
Definition: matrix_traits.h:43

cutlass::GemmOperand::kA
Definition: matrix_traits.h:43

cutlass::gemm::ReshapeThreads::Threads
Threads_ Threads
Definition: gemm_global_tile.h:54