enable L2::128B prefetch for cp.async by default (#1177)
This commit is contained in:
parent
1ab6cc7b68
commit
6e60b9b17c
@ -59,7 +59,7 @@ struct SM80_CP_ASYNC_CACHEALWAYS
|
||||
#if defined(CUTE_ARCH_CP_ASYNC_SM80_ENABLED)
|
||||
TS const* gmem_ptr = &gmem_src;
|
||||
uint32_t smem_int_ptr = cast_smem_ptr_to_uint(&smem_dst);
|
||||
asm volatile("cp.async.ca.shared.global [%0], [%1], %2;\n"
|
||||
asm volatile("cp.async.ca.shared.global.L2::128B [%0], [%1], %2;\n"
|
||||
:: "r"(smem_int_ptr),
|
||||
"l"(gmem_ptr),
|
||||
"n"(sizeof(TS)));
|
||||
@ -86,7 +86,7 @@ struct SM80_CP_ASYNC_CACHEGLOBAL
|
||||
#if defined(CUTE_ARCH_CP_ASYNC_SM80_ENABLED)
|
||||
TS const* gmem_ptr = &gmem_src;
|
||||
uint32_t smem_int_ptr = cast_smem_ptr_to_uint(&smem_dst);
|
||||
asm volatile("cp.async.cg.shared.global [%0], [%1], %2;\n"
|
||||
asm volatile("cp.async.cg.shared.global.L2::128BB [%0], [%1], %2;\n"
|
||||
:: "r"(smem_int_ptr),
|
||||
"l"(gmem_ptr),
|
||||
"n"(sizeof(TS)));
|
||||
@ -115,7 +115,7 @@ struct SM80_CP_ASYNC_CACHEALWAYS_ZFILL
|
||||
TS const* gmem_ptr = &gmem_src;
|
||||
uint32_t smem_int_ptr = cast_smem_ptr_to_uint(&smem_dst);
|
||||
int src_size = pred ? sizeof(TS) : 0;
|
||||
asm volatile("cp.async.ca.shared.global [%0], [%1], %2, %3;\n"
|
||||
asm volatile("cp.async.ca.shared.global.L2::128B [%0], [%1], %2, %3;\n"
|
||||
:: "r"(smem_int_ptr),
|
||||
"l"(gmem_ptr),
|
||||
"n"(sizeof(TS)),
|
||||
@ -145,7 +145,7 @@ struct SM80_CP_ASYNC_CACHEGLOBAL_ZFILL
|
||||
TS const* gmem_ptr = &gmem_src;
|
||||
uint32_t smem_int_ptr = cast_smem_ptr_to_uint(&smem_dst);
|
||||
int src_size = pred ? sizeof(TS) : 0;
|
||||
asm volatile("cp.async.cg.shared.global [%0], [%1], %2, %3;\n"
|
||||
asm volatile("cp.async.cg.shared.global.L2::128B [%0], [%1], %2, %3;\n"
|
||||
:: "r"(smem_int_ptr),
|
||||
"l"(gmem_ptr),
|
||||
"n"(sizeof(TS)),
|
||||
|
Loading…
Reference in New Issue
Block a user