Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
vector.h
Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
31 #include <cuda_fp16.h>
32 #endif
33 
34 #include <cutlass/util/platform.h>
35 
36 namespace cutlass {
37 
39 
40 template <size_t kAlignment_>
41 struct AlignedStruct {};
42 
43 template <>
44 struct __align__(1) AlignedStruct<1>{};
45 template <>
46 struct __align__(2) AlignedStruct<2>{};
47 template <>
48 struct __align__(4) AlignedStruct<4>{};
49 template <>
50 struct __align__(8) AlignedStruct<8>{};
51 template <>
52 struct __align__(16) AlignedStruct<16>{};
53 template <>
54 struct __align__(32) AlignedStruct<32>{};
55 template <>
56 struct __align__(64) AlignedStruct<64>{};
57 
59 
60 template <typename Scalar_, int kLanes_>
61 union Vector {
63  typedef Scalar_ Scalar;
64 
66  enum { kLanes = kLanes_ };
68  enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
70  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
71 
72  // Make sure that the vector type makes sense.
73  static_assert(kVectorSize <= 16, "Vector type is too large");
74 
80  uint32_t registers[kRegisters];
81 
83  CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const { return scalars[i]; }
85  CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return scalars[i]; }
86 };
87 
89 
90 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
91 
92 template <int kLanes_>
93 union Vector<half, kLanes_> {
95  typedef half Scalar;
96 
98  enum { kLanes = kLanes_ };
100  enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
102  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
103 
104  // Make sure that the vector type makes sense.
105  static_assert(kVectorSize <= size_t(16), "Vector type is too large");
106 
110  uint16_t scalars[kLanes];
112  uint32_t registers[kRegisters];
113 
115  CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const {
116  return reinterpret_cast<Scalar const&>(scalars[i]);
117  }
119  CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return reinterpret_cast<Scalar&>(scalars[i]); }
120 };
121 
122 #endif
123 
125 
126 template <typename Scalar_>
127 CUTLASS_DEVICE void make_zero(Scalar_& x) {
128  x = Scalar_(0);
129 }
130 
132 
133 template <typename Element_, int kLanes_ = 1>
134 struct Vectorize {
136 };
137 
139 
140 template <typename Element_>
141 struct Vectorize<Element_, 1> {
142  typedef Element_ Type;
143 };
144 
146 
147 template <typename Scalar_, int kLanes_>
148 CUTLASS_DEVICE void make_zero(Vector<Scalar_, kLanes_>& vec) {
149  for (int i = 0; i < Vector<Scalar_, kLanes_>::kRegisters; ++i) {
150  vec.registers[i] = 0;
151  }
152 }
153 
155 //
156 // cutlass::Extent similar to std::extent but applicable to CUTLASS types
157 //
158 
160 template <typename T>
161 struct Extent {
162  static size_t const kValue = 1;
163 };
164 
166 template <typename T, int Lanes>
167 struct Extent<Vector<T, Lanes> > {
168  static size_t const kValue = Lanes;
169 };
170 
172 template <typename T, int Lanes>
173 struct Extent<Vector<T, Lanes> const> {
174  static size_t const kValue = Lanes;
175 };
176 
178 
180 template <typename T>
181 struct VectorTraits {
183  typedef T Scalar;
184 
186  static int const kLanes = 1;
187 
189  static bool const IsVector = false;
190 
193 };
194 
196 template <typename T, int Lanes>
197 struct VectorTraits<Vector<T, Lanes> > {
199  typedef T Scalar;
200 
202  static int const kLanes = Lanes;
203 
205  static bool const IsVector = true;
206 
209 };
210 
212 template <typename T, int Lanes>
213 struct VectorTraits<Vector<T, Lanes> const> {
215  typedef T Scalar;
216 
218  static int const kLanes = Lanes;
219 
221  static bool const IsVector = true;
222 
225 };
226 
228 
229 } // namespace cutlass
Element_ Type
Definition: vector.h:142
Definition: convert.h:33
Definition: vector.h:134
CUTLASS_DEVICE void make_zero(Scalar_ &x)
Definition: vector.h:127
Definition: vector.h:41
T Scalar
Scalar type.
Definition: vector.h:183
struct __align__(1) AlignedStruct< 1 >
Definition: vector.h:44
C++ features that may be otherwise unimplemented for CUDA device functions.
Scalar_ Scalar
The scalar type.
Definition: vector.h:63
Definition: vector.h:66
half Scalar
The scalar type.
Definition: vector.h:95
uint32_t registers[kRegisters]
The data in registers.
Definition: vector.h:80
Vector< T, 1 > Vector
Type that is always a vector.
Definition: vector.h:192
CUTLASS_DEVICE Scalar & operator[](uint32_t i)
Accessor to the ith lane.
Definition: vector.h:119
CUTLASS_DEVICE Scalar & operator[](uint32_t i)
Accessor to the ith lane.
Definition: vector.h:85
Traits describing properties of vectors and scalar-as-vectors.
Definition: vector.h:181
#define static_assert(__e, __m)
Definition: platform.h:145
Definition: vector.h:61
static bool const IsVector
True if the type is actually a cutlass::Vector, otherwise false.
Definition: vector.h:189
Scalar scalars[kLanes]
The associated array of scalars.
Definition: vector.h:78
Vector< T, Lanes > Vector
Type that is always a Vector.
Definition: vector.h:224
Definition: vector.h:68
static int const kLanes
Number of lanes of vector.
Definition: vector.h:186
CUTLASS_DEVICE Scalar const & operator[](uint32_t i) const
Accessor to the ith lane.
Definition: vector.h:115
T Scalar
Scalar type.
Definition: vector.h:215
Vector< Element_, kLanes_ > Type
Definition: vector.h:135
T Scalar
Scalar type.
Definition: vector.h:199
static size_t const kValue
Definition: vector.h:162
AlignedStruct< kVectorSize > aligned_
The aligned storage to make sure we have good alignment.
Definition: vector.h:73
AlignedStruct< kVectorSize > aligned_
The aligned storage to make sure we have good alignment.
Definition: vector.h:105
Vector< T, Lanes > Vector
Type that is always a Vector.
Definition: vector.h:208
CUTLASS_DEVICE Scalar const & operator[](uint32_t i) const
Accessor to the ith lane.
Definition: vector.h:83
Returns the extent of a scalar or vector.
Definition: vector.h:161