Fix compilation in clang (#478)

- adds missing commas
- adjusts misaligned usage of CUTLASS_DEVICE between
  template declaration and specializations

Signed-off-by: Janusz Lisiecki <jlisiecki@nvidia.com>
This commit is contained in:
Janusz Lisiecki 2022-04-28 20:22:06 +02:00 committed by GitHub
parent e49f690fd7
commit 8c339ac039
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -356,6 +356,7 @@ struct global_store<AccessType, 1> {
/// ld.shared /// ld.shared
template <int Bytes> template <int Bytes>
CUTLASS_DEVICE
void shared_load(void *dst, uint32_t ptr); void shared_load(void *dst, uint32_t ptr);
/// ld.shared - 16b /// ld.shared - 16b
@ -407,6 +408,7 @@ void shared_load<16>(void *dst, uint32_t ptr) {
/// st.shared /// st.shared
template <int Bytes> template <int Bytes>
CUTLASS_DEVICE
void shared_store(uint32_t ptr, void const *src); void shared_store(uint32_t ptr, void const *src);
/// st.shared - 16b /// st.shared - 16b
@ -415,7 +417,7 @@ CUTLASS_DEVICE
void shared_store<2>(uint32_t ptr, void const *src) { void shared_store<2>(uint32_t ptr, void const *src) {
asm volatile("st.shared.u16 [%0], %1;\n" asm volatile("st.shared.u16 [%0], %1;\n"
: : : :
"r"(ptr) "r"(ptr),
"h"(*reinterpret_cast<uint16_t const *>(src)) "h"(*reinterpret_cast<uint16_t const *>(src))
); );
} }
@ -426,7 +428,7 @@ CUTLASS_DEVICE
void shared_store<4>(uint32_t ptr, void const *src) { void shared_store<4>(uint32_t ptr, void const *src) {
asm volatile("st.shared.u32 [%0], %1;\n" asm volatile("st.shared.u32 [%0], %1;\n"
: : : :
"r"(ptr) "r"(ptr),
"r"(*reinterpret_cast<uint32_t const *>(src)) "r"(*reinterpret_cast<uint32_t const *>(src))
); );
} }
@ -438,7 +440,7 @@ void shared_store<8>(uint32_t ptr, void const *src) {
uint2 const *dst_u64 = reinterpret_cast<uint2 const *>(src); uint2 const *dst_u64 = reinterpret_cast<uint2 const *>(src);
asm volatile("st.shared.v2.u32 [%0], {%1, %2};\n" asm volatile("st.shared.v2.u32 [%0], {%1, %2};\n"
: : : :
"r"(ptr) "r"(ptr),
"r"(dst_u64->x), "r"(dst_u64->x),
"r"(dst_u64->y) "r"(dst_u64->y)
); );
@ -451,7 +453,7 @@ void shared_store<16>(uint32_t ptr, void const *src) {
uint4 const *dst_u128 = reinterpret_cast<uint4 const *>(src); uint4 const *dst_u128 = reinterpret_cast<uint4 const *>(src);
asm volatile("ld.shared.v4.u32 [%0], {%1, %2, %3, %4};\n" asm volatile("ld.shared.v4.u32 [%0], {%1, %2, %3, %4};\n"
: : : :
"r"(ptr) "r"(ptr),
"r"(dst_u128->x), "r"(dst_u128->x),
"r"(dst_u128->y), "r"(dst_u128->y),
"r"(dst_u128->z), "r"(dst_u128->z),