49 template <
typename Scalar_,
53 size_t = (
sizeof(Scalar_) * Lanes_)>
59 static CUTLASS_DEVICE
void load(
AccessType& dst, Scalar_
const* pointer,
int offset) {
60 dst =
reinterpret_cast<AccessType const*
>(&pointer[offset])[0];
66 template <
typename Scalar_,
int Lanes_, MemorySpace::Kind Memory_>
67 struct Load<Scalar_, Lanes_, Memory_, true, 4> {
72 static CUTLASS_DEVICE
void load(
AccessType& dst, Scalar_
const* pointer,
int offset) {
73 dst.
registers[0] =
reinterpret_cast<uint32_t const*
>(&pointer[offset])[0];
79 template <
typename Scalar_,
int Lanes_, MemorySpace::Kind Memory_>
80 struct Load<Scalar_, Lanes_, Memory_, true, 8> {
85 static CUTLASS_DEVICE
void load(
AccessType& dst, Scalar_
const* pointer,
int offset) {
86 uint2 tmp =
reinterpret_cast<uint2 const*
>(&pointer[offset])[0];
94 template <MemorySpace::Kind Memory_>
95 struct Load<double, 2, Memory_, true, 16> {
100 static CUTLASS_DEVICE
void load(
AccessType& dst,
double const* pointer,
int offset) {
101 double2 tmp =
reinterpret_cast<double2 const*
>(&pointer[offset])[0];
109 template <
typename Scalar_,
int Lanes_, MemorySpace::Kind Memory_>
110 struct Load<Scalar_, Lanes_, Memory_, true, 16> {
115 static CUTLASS_DEVICE
void load(
AccessType& dst, Scalar_
const* pointer,
int offset) {
116 uint4 tmp =
reinterpret_cast<uint4 const*
>(&pointer[offset])[0];
126 template <
typename Scalar_,
130 size_t = (
sizeof(Scalar_) * Lanes_)>
136 static CUTLASS_DEVICE
void store(
AccessType const& src, Scalar_* pointer,
int offset) {
137 pointer[offset] = src;
143 template <
typename Scalar_,
int Lanes_, MemorySpace::Kind Memory_>
144 struct Store<Scalar_, Lanes_, Memory_, true, 4> {
149 static CUTLASS_DEVICE
void store(
AccessType const& src, Scalar_* pointer,
int offset) {
150 uint32_t* addr =
reinterpret_cast<uint32_t*
>(&pointer[offset]);
157 template <
typename Scalar_,
int Lanes_, MemorySpace::Kind Memory_>
158 struct Store<Scalar_, Lanes_, Memory_, true, 8> {
163 static CUTLASS_DEVICE
void store(
AccessType const& src, Scalar_* pointer,
int offset) {
164 uint2* addr =
reinterpret_cast<uint2*
>(&pointer[offset]);
171 template <MemorySpace::Kind Memory_>
172 struct Store<double, 2, Memory_, true, 16> {
177 static CUTLASS_DEVICE
void store(
AccessType const& src,
double* pointer,
int offset) {
178 double2* addr =
reinterpret_cast<double2*
>(&pointer[offset]);
179 addr[0] = make_double2(src[0], src[1]);
185 template <
typename Scalar_,
int Lanes_, MemorySpace::Kind Memory_>
186 struct Store<Scalar_, Lanes_, Memory_, true, 16> {
191 static CUTLASS_DEVICE
void store(
AccessType const& src, Scalar_* pointer,
int offset) {
192 uint4* addr =
reinterpret_cast<uint4*
>(&pointer[offset]);
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:188
Definition: load_store.h:42
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:163
Enum to specify which memory space data resides in.
Definition: load_store.h:39
Definition: load_store.h:43
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:59
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:112
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:146
Kind
Definition: load_store.h:40
Definition: load_store.h:131
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:136
uint32_t registers[kRegisters]
The data in registers.
Definition: vector.h:80
Vectorize< double, 2 >::Type AccessType
The output type.
Definition: load_store.h:174
Definition: load_store.h:41
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:72
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:133
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:85
Definition: load_store.h:54
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:82
Defines a 1D vector of elements held in the registers of each thread.
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:160
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:115
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:69
static CUTLASS_DEVICE void load(AccessType &dst, double const *pointer, int offset)
The store function.
Definition: load_store.h:100
Vectorize< double, 2 >::Type AccessType
The output type.
Definition: load_store.h:97
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:56
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:191
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:149
static CUTLASS_DEVICE void store(AccessType const &src, double *pointer, int offset)
The store function.
Definition: load_store.h:177