49 template <
typename Scalar_,
    53           size_t = (
sizeof(Scalar_) * Lanes_)>
    59   static CUTLASS_DEVICE 
void load(
AccessType& dst, Scalar_ 
const* pointer, 
int offset) {
    60     dst = 
reinterpret_cast<AccessType const*
>(&pointer[offset])[0];
    66 template <
typename Scalar_, 
int Lanes_, MemorySpace::Kind Memory_>
    67 struct Load<Scalar_, Lanes_, Memory_, true, 4> {
    72   static CUTLASS_DEVICE 
void load(
AccessType& dst, Scalar_ 
const* pointer, 
int offset) {
    73     dst.
registers[0] = 
reinterpret_cast<uint32_t const*
>(&pointer[offset])[0];
    79 template <
typename Scalar_, 
int Lanes_, MemorySpace::Kind Memory_>
    80 struct Load<Scalar_, Lanes_, Memory_, true, 8> {
    85   static CUTLASS_DEVICE 
void load(
AccessType& dst, Scalar_ 
const* pointer, 
int offset) {
    86     uint2 tmp = 
reinterpret_cast<uint2 const*
>(&pointer[offset])[0];
    94 template <MemorySpace::Kind Memory_>
    95 struct Load<double, 2, Memory_, true, 16> {
   100   static CUTLASS_DEVICE 
void load(
AccessType& dst, 
double const* pointer, 
int offset) {
   101     double2 tmp = 
reinterpret_cast<double2 const*
>(&pointer[offset])[0];
   109 template <
typename Scalar_, 
int Lanes_, MemorySpace::Kind Memory_>
   110 struct Load<Scalar_, Lanes_, Memory_, true, 16> {
   115   static CUTLASS_DEVICE 
void load(
AccessType& dst, Scalar_ 
const* pointer, 
int offset) {
   116     uint4 tmp = 
reinterpret_cast<uint4 const*
>(&pointer[offset])[0];
   126 template <
typename Scalar_,
   130           size_t = (
sizeof(Scalar_) * Lanes_)>
   136   static CUTLASS_DEVICE 
void store(
AccessType const& src, Scalar_* pointer, 
int offset) {
   137     pointer[offset] = src;
   143 template <
typename Scalar_, 
int Lanes_, MemorySpace::Kind Memory_>
   144 struct Store<Scalar_, Lanes_, Memory_, true, 4> {
   149   static CUTLASS_DEVICE 
void store(
AccessType const& src, Scalar_* pointer, 
int offset) {
   150     uint32_t* addr = 
reinterpret_cast<uint32_t*
>(&pointer[offset]);
   157 template <
typename Scalar_, 
int Lanes_, MemorySpace::Kind Memory_>
   158 struct Store<Scalar_, Lanes_, Memory_, true, 8> {
   163   static CUTLASS_DEVICE 
void store(
AccessType const& src, Scalar_* pointer, 
int offset) {
   164     uint2* addr = 
reinterpret_cast<uint2*
>(&pointer[offset]);
   171 template <MemorySpace::Kind Memory_>
   172 struct Store<double, 2, Memory_, true, 16> {
   177   static CUTLASS_DEVICE 
void store(
AccessType const& src, 
double* pointer, 
int offset) {
   178     double2* addr = 
reinterpret_cast<double2*
>(&pointer[offset]);
   179     addr[0] = make_double2(src[0], src[1]);
   185 template <
typename Scalar_, 
int Lanes_, MemorySpace::Kind Memory_>
   186 struct Store<Scalar_, Lanes_, Memory_, true, 16> {
   191   static CUTLASS_DEVICE 
void store(
AccessType const& src, Scalar_* pointer, 
int offset) {
   192     uint4* addr = 
reinterpret_cast<uint4*
>(&pointer[offset]);
 Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type. 
Definition: load_store.h:188
Definition: load_store.h:42
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function. 
Definition: load_store.h:163
Enum to specify which memory space data resides in. 
Definition: load_store.h:39
Definition: load_store.h:43
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function. 
Definition: load_store.h:59
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type. 
Definition: load_store.h:112
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type. 
Definition: load_store.h:146
Kind
Definition: load_store.h:40
Definition: load_store.h:131
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function. 
Definition: load_store.h:136
uint32_t registers[kRegisters]
The data in registers. 
Definition: vector.h:80
Vectorize< double, 2 >::Type AccessType
The output type. 
Definition: load_store.h:174
Definition: load_store.h:41
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function. 
Definition: load_store.h:72
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type. 
Definition: load_store.h:133
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function. 
Definition: load_store.h:85
Definition: load_store.h:54
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type. 
Definition: load_store.h:82
Defines a 1D vector of elements held in the registers of each thread. 
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type. 
Definition: load_store.h:160
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function. 
Definition: load_store.h:115
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type. 
Definition: load_store.h:69
static CUTLASS_DEVICE void load(AccessType &dst, double const *pointer, int offset)
The store function. 
Definition: load_store.h:100
Vectorize< double, 2 >::Type AccessType
The output type. 
Definition: load_store.h:97
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type. 
Definition: load_store.h:56
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function. 
Definition: load_store.h:191
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function. 
Definition: load_store.h:149
static CUTLASS_DEVICE void store(AccessType const &src, double *pointer, int offset)
The store function. 
Definition: load_store.h:177