[Kernel] Squash a few more warnings (#6914)
This commit is contained in:
parent
5cf9254a9c
commit
cbbc904470
@ -706,7 +706,7 @@ void paged_attention_v1_launcher(
|
|||||||
int kv_block_stride = key_cache.stride(0);
|
int kv_block_stride = key_cache.stride(0);
|
||||||
int kv_head_stride = key_cache.stride(1);
|
int kv_head_stride = key_cache.stride(1);
|
||||||
|
|
||||||
int thread_group_size = MAX(WARP_SIZE / BLOCK_SIZE, 1);
|
[[maybe_unused]] int thread_group_size = MAX(WARP_SIZE / BLOCK_SIZE, 1);
|
||||||
assert(head_size % thread_group_size == 0);
|
assert(head_size % thread_group_size == 0);
|
||||||
|
|
||||||
// NOTE: alibi_slopes is optional.
|
// NOTE: alibi_slopes is optional.
|
||||||
@ -865,7 +865,7 @@ void paged_attention_v2_launcher(
|
|||||||
int kv_block_stride = key_cache.stride(0);
|
int kv_block_stride = key_cache.stride(0);
|
||||||
int kv_head_stride = key_cache.stride(1);
|
int kv_head_stride = key_cache.stride(1);
|
||||||
|
|
||||||
int thread_group_size = MAX(WARP_SIZE / BLOCK_SIZE, 1);
|
[[maybe_unused]] int thread_group_size = MAX(WARP_SIZE / BLOCK_SIZE, 1);
|
||||||
assert(head_size % thread_group_size == 0);
|
assert(head_size % thread_group_size == 0);
|
||||||
|
|
||||||
// NOTE: alibi_slopes is optional.
|
// NOTE: alibi_slopes is optional.
|
||||||
|
|||||||
@ -273,8 +273,6 @@ __global__ void Code2x8Dequant(
|
|||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
float res = 0;
|
|
||||||
|
|
||||||
int iters = (prob_k / 8 - 1) / (8 * 32) + 1;
|
int iters = (prob_k / 8 - 1) / (8 * 32) + 1;
|
||||||
while (iters--) {
|
while (iters--) {
|
||||||
if (pred && a_gl_rd < a_gl_end) {
|
if (pred && a_gl_rd < a_gl_end) {
|
||||||
|
|||||||
@ -526,6 +526,7 @@ __inline__ __device__ Tout convert(const Tin& x) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
assert(false);
|
assert(false);
|
||||||
|
return {}; // Squash missing return statement warning
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Tout, typename Tin, Fp8KVCacheDataType kv_dt>
|
template <typename Tout, typename Tin, Fp8KVCacheDataType kv_dt>
|
||||||
@ -536,6 +537,7 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
assert(false);
|
assert(false);
|
||||||
|
return {}; // Squash missing return statement warning
|
||||||
}
|
}
|
||||||
|
|
||||||
// The following macro is used to dispatch the conversion function based on
|
// The following macro is used to dispatch the conversion function based on
|
||||||
|
|||||||
@ -508,6 +508,7 @@ __inline__ __device__ Tout convert(const Tin& x) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
assert(false);
|
assert(false);
|
||||||
|
return {}; // Squash missing return statement warning
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Tout, typename Tin, Fp8KVCacheDataType kv_dt>
|
template <typename Tout, typename Tin, Fp8KVCacheDataType kv_dt>
|
||||||
@ -520,6 +521,7 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
assert(false);
|
assert(false);
|
||||||
|
return {}; // Squash missing return statement warning
|
||||||
}
|
}
|
||||||
|
|
||||||
// The following macro is used to dispatch the conversion function based on
|
// The following macro is used to dispatch the conversion function based on
|
||||||
|
|||||||
@ -203,7 +203,8 @@ void squeezellm_gemm(torch::Tensor vec, torch::Tensor mat, torch::Tensor mul,
|
|||||||
#endif
|
#endif
|
||||||
mat.data_ptr<int>(),
|
mat.data_ptr<int>(),
|
||||||
#ifndef USE_ROCM
|
#ifndef USE_ROCM
|
||||||
(half2*)mul.data<at::Half>(), (__half*)lookup_table.data_ptr<at::Half>(),
|
(half2*)mul.data_ptr<at::Half>(),
|
||||||
|
(__half*)lookup_table.data_ptr<at::Half>(),
|
||||||
#else
|
#else
|
||||||
(float2*)mul.data_ptr<float>(),
|
(float2*)mul.data_ptr<float>(),
|
||||||
(__half*)lookup_table.data_ptr<at::Half>(),
|
(__half*)lookup_table.data_ptr<at::Half>(),
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user