Fix separate compilation -dc
(#433)
* Fix separate compilation `-dc` - when cutlass is included in multiple compilation units compiled with `-dc` OOB_NAN_F16x8 device constant is instantiated multiple times causing Multiple definition of '_ZN7cutlass4arch13OOB_NAN_F16x8E' error This PR makes this variable a local constant as it is not modified during runtime Signed-off-by: Janusz Lisiecki <jlisiecki@nvidia.com> * Fix Signed-off-by: Janusz Lisiecki <jlisiecki@nvidia.com> * Test GH Signed-off-by: Janusz Lisiecki <jlisiecki@nvidia.com> * Revert test GH Signed-off-by: Janusz Lisiecki <jlisiecki@nvidia.com>
This commit is contained in:
parent
3ab1eacf09
commit
8f1fe7a132
@ -162,9 +162,6 @@ struct cp_async_zfill<SizeInBytes, CacheOperation::Always> {
|
||||
}
|
||||
};
|
||||
|
||||
__device__ __constant__ uint4 OOB_NAN_F16x8 = {0x7eff7eff, 0x7eff7eff,
|
||||
0x7eff7eff, 0x7eff7eff};
|
||||
|
||||
/// Partial specialization
|
||||
template <>
|
||||
struct cp_async_nan<16, CacheOperation::Always> {
|
||||
@ -175,6 +172,9 @@ struct cp_async_nan<16, CacheOperation::Always> {
|
||||
cp_async_nan(void *smem_ptr, void const *global_ptr, bool pred_guard) {
|
||||
#if CUDA_CP_ASYNC_ACTIVATED
|
||||
|
||||
static __constant__ uint4 OOB_NAN_F16x8 = {0x7eff7eff, 0x7eff7eff,
|
||||
0x7eff7eff, 0x7eff7eff};
|
||||
|
||||
unsigned smem_int_ptr = cutlass_get_smem_pointer(smem_ptr);
|
||||
|
||||
asm volatile(
|
||||
|
Loading…
Reference in New Issue
Block a user