Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
load_store.h
Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/vector.h>
31 
32 namespace cutlass {
33 
35 
39 struct MemorySpace {
40  enum Kind {
41  kGeneric, // Data accessed through pointer dereferencing
42  kShared, // Data resides in shared memory
43  kGlobal // Data resides in global memory
44  };
45 };
46 
48 
49 template <typename Scalar_,
50  int Lanes_,
51  MemorySpace::Kind Memory_,
52  bool = (Lanes_ > 1),
53  size_t = (sizeof(Scalar_) * Lanes_)>
54 struct Load {
57 
59  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
60  dst = reinterpret_cast<AccessType const*>(&pointer[offset])[0];
61  }
62 };
63 
65 
66 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
67 struct Load<Scalar_, Lanes_, Memory_, true, 4> {
70 
72  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
73  dst.registers[0] = reinterpret_cast<uint32_t const*>(&pointer[offset])[0];
74  }
75 };
76 
78 
79 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
80 struct Load<Scalar_, Lanes_, Memory_, true, 8> {
83 
85  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
86  uint2 tmp = reinterpret_cast<uint2 const*>(&pointer[offset])[0];
87  dst.registers[0] = tmp.x;
88  dst.registers[1] = tmp.y;
89  }
90 };
91 
93 
94 template <MemorySpace::Kind Memory_>
95 struct Load<double, 2, Memory_, true, 16> {
98 
100  static CUTLASS_DEVICE void load(AccessType& dst, double const* pointer, int offset) {
101  double2 tmp = reinterpret_cast<double2 const*>(&pointer[offset])[0];
102  dst[0] = tmp.x;
103  dst[1] = tmp.y;
104  }
105 };
106 
108 
109 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
110 struct Load<Scalar_, Lanes_, Memory_, true, 16> {
113 
115  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
116  uint4 tmp = reinterpret_cast<uint4 const*>(&pointer[offset])[0];
117  dst.registers[0] = tmp.x;
118  dst.registers[1] = tmp.y;
119  dst.registers[2] = tmp.z;
120  dst.registers[3] = tmp.w;
121  }
122 };
123 
125 
126 template <typename Scalar_,
127  int Lanes_,
128  MemorySpace::Kind Memory_,
129  bool = (Lanes_ > 1),
130  size_t = (sizeof(Scalar_) * Lanes_)>
131 struct Store {
134 
136  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
137  pointer[offset] = src;
138  }
139 };
140 
142 
143 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
144 struct Store<Scalar_, Lanes_, Memory_, true, 4> {
147 
149  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
150  uint32_t* addr = reinterpret_cast<uint32_t*>(&pointer[offset]);
151  addr[0] = src.registers[0];
152  }
153 };
154 
156 
157 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
158 struct Store<Scalar_, Lanes_, Memory_, true, 8> {
161 
163  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
164  uint2* addr = reinterpret_cast<uint2*>(&pointer[offset]);
165  addr[0] = make_uint2(src.registers[0], src.registers[1]);
166  }
167 };
168 
170 
171 template <MemorySpace::Kind Memory_>
172 struct Store<double, 2, Memory_, true, 16> {
175 
177  static CUTLASS_DEVICE void store(AccessType const& src, double* pointer, int offset) {
178  double2* addr = reinterpret_cast<double2*>(&pointer[offset]);
179  addr[0] = make_double2(src[0], src[1]);
180  }
181 };
182 
184 
185 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
186 struct Store<Scalar_, Lanes_, Memory_, true, 16> {
189 
191  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
192  uint4* addr = reinterpret_cast<uint4*>(&pointer[offset]);
193  addr[0] = make_uint4(src.registers[0], src.registers[1], src.registers[2], src.registers[3]);
194  }
195 };
196 
198 
199 } // namespace cutlass
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:188
Definition: load_store.h:42
Definition: convert.h:33
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:163
Enum to specify which memory space data resides in.
Definition: load_store.h:39
Definition: load_store.h:43
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:59
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:112
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:146
Kind
Definition: load_store.h:40
Definition: load_store.h:131
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:136
uint32_t registers[kRegisters]
The data in registers.
Definition: vector.h:80
Vectorize< double, 2 >::Type AccessType
The output type.
Definition: load_store.h:174
Definition: load_store.h:41
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:72
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:133
Definition: vector.h:61
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:85
Definition: load_store.h:54
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:82
Defines a 1D vector of elements held in the registers of each thread.
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:160
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:115
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:69
static CUTLASS_DEVICE void load(AccessType &dst, double const *pointer, int offset)
The store function.
Definition: load_store.h:100
Vectorize< double, 2 >::Type AccessType
The output type.
Definition: load_store.h:97
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:56
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:191
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:149
static CUTLASS_DEVICE void store(AccessType const &src, double *pointer, int offset)
The store function.
Definition: load_store.h:177