9fd55460c6c16d0edb11beb60087a05470776ede/docs/dgemm__traits_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include <cutlass/gemm/gemm.h>
 #include <cutlass/gemm/gemm_epilogue.h>
 #include <cutlass/gemm/gemm_epilogue_traits.h>
 #include <cutlass/gemm/gemm_global_tile.h>
 #include <cutlass/gemm/gemm_shared_tile.h>
 #include <cutlass/gemm/gemm_traits.h>
 #include <cutlass/gemm/thread_multiply_add.h>

 namespace cutlass {
 namespace gemm {


 template <
     typename OutputTile_,
     typename AccumulatorsPerThread_,
     int kScalarsPerLdgA_ = 1,
     int kScalarsPerLdgB_ = 1>
 struct DgemmConfig
     : public GemmConfig<
           double,
           double,
           double,
           double,
           OutputTile_,
           ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, double, double, double>,
           kScalarsPerLdgA_,
           kScalarsPerLdgA_,
           2,
           kScalarsPerLdgB_,
           kScalarsPerLdgB_,
           2,
           1,
           2,
           1,
           2> {};


 template <
     MatrixLayout::Kind kLayoutA_,
     MatrixLayout::Kind kLayoutB_,
     typename OutputTile_ = Shape<8, 64, 128>,
     typename EpilogueFunctor_ = LinearScaling<double>,
     typename AccumulatorsPerThread_ = Shape<8, 8, 8>,
     int kScalarsPerLdgA_ = 1,
     int kScalarsPerLdgB_ = 1,
     typename Index_ = int,
     typename GemmConfig_ =
         DgemmConfig<OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_>,
     typename GemmEpilogueTraits_ =
         SimplifiedGemmEpilogueTraits<GemmConfig_, EpilogueFunctor_, Index_> >
 struct DgemmTraits : public SimplifiedGemmTraits<
                          // The layout for A.
                          kLayoutA_,
                          // The layout for B.
                          kLayoutB_,
                          // The config.
                          GemmConfig_,
                          // The epilogue.
                          GemmEpilogue<GemmEpilogueTraits_>,
                          // The index.
                          Index_> {};


 }  // namespace gemm
 }  // namespace cutlass
cutlass
Definition: convert.h:33

gemm_global_tile.h
Defines iterators for efficiently loading and storing to global memory.

gemm_traits.h
Defines structural properties of complete GEMM computation.

thread_multiply_add.h
Template implementing matrix multiply-add operations on fragments.

gemm_epilogue.h
Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...

gemm_shared_tile.h
Defines iterators for efficiently loading and storing tiles to and from shared memory.

cutlass::gemm::GemmConfig
Definition: gemm_traits.h:79

cutlass::gemm::DgemmTraits
Definition: dgemm_traits.h:112

cutlass::gemm::DgemmConfig
Definition: dgemm_traits.h:52

cutlass::Shape
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64

cutlass::gemm::SimplifiedGemmEpilogueTraits
Definition: gemm_epilogue_traits.h:300

cutlass::MatrixLayout::Kind
Kind
Definition: matrix_traits.h:36

cutlass::gemm::LinearScaling
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40

gemm.h
Implements a software-pipelined efficient GEMM.

gemm_epilogue_traits.h
Defines structural properties of the GEMM epilogue.

cutlass::gemm::SimplifiedGemmTraits
Definition: gemm_traits.h:723