25ff2824031b98cbbde7f1455c1f04cb305b6fd2/docs/shape_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include <cutlass/cutlass.h>

 namespace cutlass {


 template <int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
 struct Shape {
   static int const kD = kD_;
   static int const kH = kH_;
   static int const kW = kW_;
   static int const kC = kC_;
 };

 template <typename Shape>
 struct ShapeCount {
   static int const kWc = Shape::kW * Shape::kC;
   static int const kHw = Shape::kH * Shape::kW;
   static int const kHwc = Shape::kH * kWc;
   static int const kDhw = Shape::kD * kHw;
   static int const kDhwc = Shape::kD * kHwc;
   static int const kCount = kDhwc;
 };


 template <typename A_, int kScale_>
 struct ShapeScale {
   typedef Shape<A_::kD * kScale_, A_::kH * kScale_, A_::kW * kScale_, A_::kC * kScale_> Shape;
 };


 template <typename A_, typename B_>
 struct ShapeAdd {
   typedef Shape<A_::kD + B_::kD, A_::kH + B_::kH, A_::kW + B_::kW, A_::kC + B_::kC> Shape;
 };


 template <typename A_, typename B_>
 struct ShapeSub {
   typedef Shape<A_::kD - B_::kD, A_::kH - B_::kH, A_::kW - B_::kW, A_::kC - B_::kC> Shape;
 };


 template <typename A_, typename B_>
 struct ShapeMul {
   typedef Shape<A_::kD * B_::kD, A_::kH * B_::kH, A_::kW * B_::kW, A_::kC * B_::kC> Shape;
 };


 template <typename A_, typename B_>
 struct ShapeDiv {
   typedef Shape<A_::kD / B_::kD, A_::kH / B_::kH, A_::kW / B_::kW, A_::kC / B_::kC> Shape;
 };


 template <typename A_, typename B_>
 struct ShapeMax {
   typedef Shape<(A_::kD > B_::kD ? A_::kD : B_::kD),
                 (A_::kH > B_::kH ? A_::kH : B_::kH),
                 (A_::kW > B_::kW ? A_::kW : B_::kW),
                 (A_::kC > B_::kC ? A_::kC : B_::kC)>
       Shape;
 };


 template <typename A_, typename B_>
 struct ShapeMin {
   typedef Shape<(A_::kD < B_::kD ? A_::kD : B_::kD),
                 (A_::kH < B_::kH ? A_::kH : B_::kH),
                 (A_::kW < B_::kW ? A_::kW : B_::kW),
                 (A_::kC < B_::kC ? A_::kC : B_::kC)>
       Shape;
 };


 template <typename Shape_>
 struct ShapeStrides {
   typedef Shape<Shape_::kH * Shape_::kW * Shape_::kC, Shape_::kW * Shape_::kC, Shape_::kC, 1> Shape;
 };


 template <typename Shape_>
 struct ComputeOffsetFromShape {
   static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
     // clang-format off
     return d * Shape_::kH * Shape_::kW * Shape_::kC +
            h * Shape_::kW * Shape_::kC +
            w * Shape_::kC +
            c;
     // clang-format on
   }
 };


 template <int kSh_, int kSw_, int kSc_>
 struct ComputeOffsetFromShape<Shape<1, kSh_, kSw_, kSc_> > {
   static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
     return h * kSw_ * kSc_ + w * kSc_ + c;
   }
 };


 template <int kSh_, int kSw_>
 struct ComputeOffsetFromShape<Shape<1, kSh_, kSw_, 1> > {
   static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * kSw_ + w; }
 };


 template <typename Strides_>
 struct ComputeOffsetFromStrides {
   static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
     return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC;
   }
 };


 template <int S_h_, int S_w_, int S_c_>
 struct ComputeOffsetFromStrides<Shape<1, S_h_, S_w_, S_c_> > {
   static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
     return h * S_h_ + w * S_w_ + c * S_c_;
   }
 };


 template <int S_h_, int S_w_>
 struct ComputeOffsetFromStrides<Shape<1, S_h_, S_w_, 1> > {
   static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * S_h_ + w * S_w_; }
 };


 template <typename Threads_, typename Strides_>
 struct ComputeThreadOffsetFromStrides {
   static CUTLASS_DEVICE int get() {
     // Decompose the thread index.
     int c = threadIdx.x % Threads_::kC;
     int w = threadIdx.x / Threads_::kC % Threads_::kW;
     int h = threadIdx.x / Threads_::kC / Threads_::kW % Threads_::kH;
     int d = threadIdx.x / Threads_::kC / Threads_::kW / Threads_::kH;

     // Compute the offset.
     return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC;
   }
 };


 template <int T_h_, int T_w_, int T_c_, int S_h_, int S_w_, int S_c_>
 struct ComputeThreadOffsetFromStrides<Shape<1, T_h_, T_w_, T_c_>, Shape<1, S_h_, S_w_, S_c_> > {
   static CUTLASS_DEVICE int get() {
     // Decompose the thread index.
     int c = threadIdx.x % T_c_;
     int w = threadIdx.x / T_c_ % T_w_;
     int h = threadIdx.x / T_c_ / T_w_ % T_h_;

     // Compute the offset.
     return h * S_h_ + w * S_w_ + c * S_c_;
   }
 };


 template <int T_h_, int T_w_, int S_h_, int S_w_>
 struct ComputeThreadOffsetFromStrides<Shape<1, T_h_, T_w_, 1>, Shape<1, S_h_, S_w_, 1> > {
   static CUTLASS_DEVICE int get() {
     // Decompose the thread index.
     int w = threadIdx.x % T_w_;
     int h = threadIdx.x / T_w_;

     // Compute the offset.
     return h * S_h_ + w * S_w_;
   }
 };


 }  // namespace cutlass
cutlass::ComputeThreadOffsetFromStrides
Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_.
Definition: shape.h:252

cutlass::ShapeCount::kWc
static int const kWc
The number of elements per row.
Definition: shape.h:81

cutlass
Definition: convert.h:33

cutlass::ShapeAdd::Shape
Shape< A_::kD+B_::kD, A_::kH+B_::kH, A_::kW+B_::kW, A_::kC+B_::kC > Shape
Definition: shape.h:105

cutlass::ShapeScale::Shape
Shape< A_::kD *kScale_, A_::kH *kScale_, A_::kW *kScale_, A_::kC *kScale_ > Shape
Definition: shape.h:98

cutlass::ShapeStrides::Shape
Shape< Shape_::kH *Shape_::kW *Shape_::kC, Shape_::kW *Shape_::kC, Shape_::kC, 1 > Shape
Definition: shape.h:155

cutlass::ShapeMul::Shape
Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
Definition: shape.h:119

cutlass::ShapeSub::Shape
Shape< A_::kD - B_::kD, A_::kH - B_::kH, A_::kW - B_::kW, A_::kC - B_::kC > Shape
Definition: shape.h:112

cutlass::ShapeSub
Definition: shape.h:111

cutlass::Shape::kH
static int const kH
The height of the cube.
Definition: shape.h:68

cutlass::Shape::kC
static int const kC
The number of scalars per element.
Definition: shape.h:72

cutlass::ShapeScale
Definition: shape.h:97

cutlass::ComputeOffsetFromShape
Compute the offset for the given coordinates in a cube.
Definition: shape.h:165

cutlass::ShapeDiv::Shape
Shape< A_::kD/B_::kD, A_::kH/B_::kH, A_::kW/B_::kW, A_::kC/B_::kC > Shape
Definition: shape.h:126

cutlass::ShapeCount::kDhw
static int const kDhw
The number of pixels per cube.
Definition: shape.h:87

cutlass::ShapeMul
Definition: shape.h:118

cutlass::ShapeDiv
Definition: shape.h:125

cutlass::ComputeOffsetFromStrides
Compute the offset for the given coordinates in a cube.
Definition: shape.h:210

cutlass::Shape
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64

cutlass::ShapeMax
Definition: shape.h:132

cutlass::ShapeAdd
Definition: shape.h:104

cutlass::ShapeCount::kCount
static int const kCount
The number of elements in the 4D space.
Definition: shape.h:91

cutlass::ShapeCount::kDhwc
static int const kDhwc
The number of elements in the 4D space.
Definition: shape.h:89

cutlass::Shape::kW
static int const kW
The width of the cube.
Definition: shape.h:70

cutlass::ShapeMin
Definition: shape.h:143

cutlass::ShapeCount::kHw
static int const kHw
The number of pixels per image.
Definition: shape.h:83

cutlass::Shape::kD
static int const kD
The depth of the cube.
Definition: shape.h:66

cutlass::ShapeStrides
Definition: shape.h:154

cutlass::ShapeMax::Shape
Shape<(A_::kD > B_::kD ? A_::kD :B_::kD),(A_::kH > B_::kH ? A_::kH :B_::kH),(A_::kW > B_::kW ? A_::kW :B_::kW),(A_::kC > B_::kC ? A_::kC :B_::kC)> Shape
Definition: shape.h:137

cutlass.h
Basic include for CUTLASS macros.

cutlass::ShapeMin::Shape
Shape<(A_::kD< B_::kD ? A_::kD :B_::kD),(A_::kH< B_::kH ? A_::kH :B_::kH),(A_::kW< B_::kW ? A_::kW :B_::kW),(A_::kC< B_::kC ? A_::kC :B_::kC)> Shape
Definition: shape.h:148

cutlass::ShapeCount
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79

cutlass::ShapeCount::kHwc
static int const kHwc
The number of elements per image.
Definition: shape.h:85