25ff2824031b98cbbde7f1455c1f04cb305b6fd2/docs/vector_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
 #include <cuda_fp16.h>
 #endif

 #include <cutlass/util/platform.h>

 namespace cutlass {


 template <size_t kAlignment_>
 struct AlignedStruct {};

 template <>
 struct __align__(1) AlignedStruct<1>{};
 template <>
 struct __align__(2) AlignedStruct<2>{};
 template <>
 struct __align__(4) AlignedStruct<4>{};
 template <>
 struct __align__(8) AlignedStruct<8>{};
 template <>
 struct __align__(16) AlignedStruct<16>{};
 template <>
 struct __align__(32) AlignedStruct<32>{};
 template <>
 struct __align__(64) AlignedStruct<64>{};


 template <typename Scalar_, int kLanes_>
 union Vector {
   typedef Scalar_ Scalar;

   enum { kLanes = kLanes_ };
   enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
   enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };

   // Make sure that the vector type makes sense.
   static_assert(kVectorSize <= 16, "Vector type is too large");

   AlignedStruct<kVectorSize> aligned_;
   Scalar scalars[kLanes];
   uint32_t registers[kRegisters];

   CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const { return scalars[i]; }
   CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return scalars[i]; }
 };


 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)

 template <int kLanes_>
 union Vector<half, kLanes_> {
   typedef half Scalar;

   enum { kLanes = kLanes_ };
   enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
   enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };

   // Make sure that the vector type makes sense.
   static_assert(kVectorSize <= size_t(16), "Vector type is too large");

   AlignedStruct<kVectorSize> aligned_;
   uint16_t scalars[kLanes];
   uint32_t registers[kRegisters];

   CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const {
     return reinterpret_cast<Scalar const&>(scalars[i]);
   }
   CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return reinterpret_cast<Scalar&>(scalars[i]); }
 };

 #endif


 template <typename Scalar_>
 CUTLASS_DEVICE void make_zero(Scalar_& x) {
   x = Scalar_(0);
 }


 template <typename Element_, int kLanes_ = 1>
 struct Vectorize {
   typedef Vector<Element_, kLanes_> Type;
 };


 template <typename Element_>
 struct Vectorize<Element_, 1> {
   typedef Element_ Type;
 };


 template <typename Scalar_, int kLanes_>
 CUTLASS_DEVICE void make_zero(Vector<Scalar_, kLanes_>& vec) {
   for (int i = 0; i < Vector<Scalar_, kLanes_>::kRegisters; ++i) {
     vec.registers[i] = 0;
   }
 }

 //
 // cutlass::Extent similar to std::extent but applicable to CUTLASS types
 //

 template <typename T>
 struct Extent {
   static size_t const kValue = 1;
 };

 template <typename T, int Lanes>
 struct Extent<Vector<T, Lanes> > {
   static size_t const kValue = Lanes;
 };

 template <typename T, int Lanes>
 struct Extent<Vector<T, Lanes> const> {
   static size_t const kValue = Lanes;
 };


 template <typename T>
 struct VectorTraits {
   typedef T Scalar;

   static int const kLanes = 1;

   static bool const IsVector = false;

   typedef Vector<T, 1> Vector;
 };

 template <typename T, int Lanes>
 struct VectorTraits<Vector<T, Lanes> > {
   typedef T Scalar;

   static int const kLanes = Lanes;

   static bool const IsVector = true;

   typedef Vector<T, Lanes> Vector;
 };

 template <typename T, int Lanes>
 struct VectorTraits<Vector<T, Lanes> const> {
   typedef T Scalar;

   static int const kLanes = Lanes;

   static bool const IsVector = true;

   typedef Vector<T, Lanes> Vector;
 };


 }  // namespace cutlass
cutlass::Vectorize< Element_, 1 >::Type
Element_ Type
Definition: vector.h:142

cutlass
Definition: convert.h:33

cutlass::Vectorize
Definition: vector.h:134

cutlass::make_zero
CUTLASS_DEVICE void make_zero(Scalar_ &x)
Definition: vector.h:127

cutlass::AlignedStruct
Definition: vector.h:41

cutlass::VectorTraits::Scalar
T Scalar
Scalar type.
Definition: vector.h:183

cutlass::__align__
struct __align__(1) AlignedStruct< 1 >
Definition: vector.h:44

platform.h
C++ features that may be otherwise unimplemented for CUDA device functions.

cutlass::Vector::Scalar
Scalar_ Scalar
The scalar type.
Definition: vector.h:63

cutlass::Vector::kLanes
Definition: vector.h:66

cutlass::Vector< half, kLanes_ >::Scalar
half Scalar
The scalar type.
Definition: vector.h:95

cutlass::Vector::registers
uint32_t registers[kRegisters]
The data in registers.
Definition: vector.h:80

cutlass::VectorTraits::Vector
Vector< T, 1 > Vector
Type that is always a vector.
Definition: vector.h:192

cutlass::Vector< half, kLanes_ >::operator[]
CUTLASS_DEVICE Scalar & operator[](uint32_t i)
Accessor to the ith lane.
Definition: vector.h:119

cutlass::Vector::operator[]
CUTLASS_DEVICE Scalar & operator[](uint32_t i)
Accessor to the ith lane.
Definition: vector.h:85

cutlass::VectorTraits
Traits describing properties of vectors and scalar-as-vectors.
Definition: vector.h:181

static_assert
#define static_assert(__e, __m)
Definition: platform.h:145

cutlass::Vector
Definition: vector.h:61

cutlass::VectorTraits::IsVector
static bool const IsVector
True if the type is actually a cutlass::Vector, otherwise false.
Definition: vector.h:189

cutlass::Vector::scalars
Scalar scalars[kLanes]
The associated array of scalars.
Definition: vector.h:78

cutlass::VectorTraits< Vector< T, Lanes > const >::Vector
Vector< T, Lanes > Vector
Type that is always a Vector.
Definition: vector.h:224

cutlass::Vector::kVectorSize
Definition: vector.h:68

cutlass::VectorTraits::kLanes
static int const kLanes
Number of lanes of vector.
Definition: vector.h:186

cutlass::Vector< half, kLanes_ >::operator[]
CUTLASS_DEVICE Scalar const  & operator[](uint32_t i) const
Accessor to the ith lane.
Definition: vector.h:115

cutlass::VectorTraits< Vector< T, Lanes > const >::Scalar
T Scalar
Scalar type.
Definition: vector.h:215

cutlass::Vectorize::Type
Vector< Element_, kLanes_ > Type
Definition: vector.h:135

cutlass::VectorTraits< Vector< T, Lanes > >::Scalar
T Scalar
Scalar type.
Definition: vector.h:199

cutlass::Extent::kValue
static size_t const kValue
Definition: vector.h:162

cutlass::Vector::aligned_
AlignedStruct< kVectorSize > aligned_
The aligned storage to make sure we have good alignment.
Definition: vector.h:73

cutlass::Vector< half, kLanes_ >::aligned_
AlignedStruct< kVectorSize > aligned_
The aligned storage to make sure we have good alignment.
Definition: vector.h:105

cutlass::VectorTraits< Vector< T, Lanes > >::Vector
Vector< T, Lanes > Vector
Type that is always a Vector.
Definition: vector.h:208

cutlass::Vector::operator[]
CUTLASS_DEVICE Scalar const  & operator[](uint32_t i) const
Accessor to the ith lane.
Definition: vector.h:83

cutlass::Extent
Returns the extent of a scalar or vector.
Definition: vector.h:161