374882be53f2a7558aeb6c4955b8b9da75b29ecf/docs/fragment__multiply__add_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include <cutlass/fragment.h>

 namespace cutlass {
 namespace gemm {


 template <typename Scalar_>
 struct FragmentMultiplyAdd {
   typedef Shape<1, 1, 1, 1> InstructionShape;
   typedef Scalar_ ScalarA;
   typedef Scalar_ ScalarB;
   typedef Scalar_ ScalarC;

   CUTLASS_DEVICE FragmentMultiplyAdd() {}

   template <typename Fragment_>
   CUTLASS_DEVICE void multiply(Scalar_ a, Fragment_ const& b, Fragment_& d) {
     for (int j = 0; j < Fragment_::kElements; ++j) {
       d[j] = a * b[j];
     }
   }

   template <typename Fragment_>
   CUTLASS_DEVICE void multiply_add(Scalar_ a,
                                    Fragment_ const& b,
                                    Fragment_ const& c,
                                    Fragment_& d) {
     for (int j = 0; j < Fragment_::kElements; ++j) {
       d[j] = a * b[j] + c[j];
     }
   }
 };


 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
 template <>
 struct FragmentMultiplyAdd<half> {
   typedef Shape<1, 1, 1, 1> InstructionShape;
   typedef half ScalarA;
   typedef half ScalarB;
   typedef half ScalarC;

   CUTLASS_DEVICE FragmentMultiplyAdd() {}

   template <typename Fragment_>
   CUTLASS_DEVICE void multiply(half a, Fragment_ const& b, Fragment_& d) {
 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
     // The input.
     __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
     // The output.
     __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);

     // Assemble a half2 from a.
     __half2 const a_half2 = __half2half2(a);

     for (int i = 0; i < Fragment_::kElements / 2; ++i) {
       d_half2[i] = __hmul2(a_half2, b_half2[i]);
     }
 #endif
   }

   template <typename Fragment_>
   CUTLASS_DEVICE void multiply_add(half a, Fragment_ const& b, Fragment_ const& c, Fragment_& d) {
 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
     // The inputs.
     __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
     __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]);
     // The output.
     __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);

     // Assemble a half2 from a.
     __half2 const a_half2 = __half2half2(a);

     for (int i = 0; i < Fragment_::kElements / 2; ++i) {
       d_half2[i] = __hfma2(a_half2, b_half2[i], c_half2[i]);
     }
 #endif
   }
 };

 #endif


 }  // namespace gemm
 }  // namespace cutlass
cutlass::gemm::FragmentMultiplyAdd::ScalarB
Scalar_ ScalarB
The type for B.
Definition: fragment_multiply_add.h:44

cutlass
Definition: convert.h:33

cutlass::gemm::FragmentMultiplyAdd::multiply
CUTLASS_DEVICE void multiply(Scalar_ a, Fragment_ const &b, Fragment_ &d)
Multiply : d = a*b.
Definition: fragment_multiply_add.h:53

cutlass::gemm::FragmentMultiplyAdd< half >::ScalarA
half ScalarA
The type for A.
Definition: fragment_multiply_add.h:79

cutlass::gemm::FragmentMultiplyAdd< half >::FragmentMultiplyAdd
CUTLASS_DEVICE FragmentMultiplyAdd()
Ctor.
Definition: fragment_multiply_add.h:86

cutlass::gemm::FragmentMultiplyAdd::multiply_add
CUTLASS_DEVICE void multiply_add(Scalar_ a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
Multiply : d = a*b + c.
Definition: fragment_multiply_add.h:61

cutlass::gemm::FragmentMultiplyAdd< half >::ScalarC
half ScalarC
The type for C and D.
Definition: fragment_multiply_add.h:83

cutlass::gemm::FragmentMultiplyAdd< half >::multiply_add
CUTLASS_DEVICE void multiply_add(half a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
Multiply : d = a*b + c.
Definition: fragment_multiply_add.h:108

cutlass::Shape
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64

cutlass::gemm::FragmentMultiplyAdd::InstructionShape
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fragment_multiply_add.h:40

cutlass::gemm::FragmentMultiplyAdd::ScalarC
Scalar_ ScalarC
The type for C and D.
Definition: fragment_multiply_add.h:46

cutlass::gemm::FragmentMultiplyAdd::ScalarA
Scalar_ ScalarA
The type for A.
Definition: fragment_multiply_add.h:42

cutlass::gemm::FragmentMultiplyAdd::FragmentMultiplyAdd
CUTLASS_DEVICE FragmentMultiplyAdd()
Ctor.
Definition: fragment_multiply_add.h:49

fragment.h
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...

cutlass::gemm::FragmentMultiplyAdd< half >::multiply
CUTLASS_DEVICE void multiply(half a, Fragment_ const &b, Fragment_ &d)
Multiply : d = a*b.
Definition: fragment_multiply_add.h:90

cutlass::gemm::FragmentMultiplyAdd< half >::InstructionShape
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fragment_multiply_add.h:77

cutlass::gemm::FragmentMultiplyAdd< half >::ScalarB
half ScalarB
The type for B.
Definition: fragment_multiply_add.h:81

cutlass::gemm::FragmentMultiplyAdd
Definition: fragment_multiply_add.h:38