[Kernel] Fix Flashinfer Correctness (#7284)
This commit is contained in:
parent
6d94420246
commit
e53dfd3eaf
@ -127,6 +127,7 @@ class FlashInferMetadata(AttentionMetadata):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Only {supported_head_sizes} are supported for head_dim,",
|
f"Only {supported_head_sizes} are supported for head_dim,",
|
||||||
f"received {self.head_dim}.")
|
f"received {self.head_dim}.")
|
||||||
|
self.is_profile_run = is_block_tables_empty(self.block_tables)
|
||||||
|
|
||||||
def begin_forward(self):
|
def begin_forward(self):
|
||||||
if self.num_prefill_tokens > 0:
|
if self.num_prefill_tokens > 0:
|
||||||
@ -140,11 +141,14 @@ class FlashInferMetadata(AttentionMetadata):
|
|||||||
assert self.paged_kv_last_page_len is not None
|
assert self.paged_kv_last_page_len is not None
|
||||||
batch_size = self.query_start_loc.shape[0] - 1
|
batch_size = self.query_start_loc.shape[0] - 1
|
||||||
assert batch_size >= 0
|
assert batch_size >= 0
|
||||||
# The prefill stage does not read kv cache.
|
# The profile run does not read kv cache.
|
||||||
# Both paged_kv_indices and paged_kv_last_page_len are empty.
|
# Both paged_kv_indices and paged_kv_last_page_len are empty.
|
||||||
# paged_kv_indptr is a zero tensor with size batch_size + 1.
|
# paged_kv_indptr is a zero tensor with size batch_size + 1.
|
||||||
|
if self.is_profile_run:
|
||||||
self.paged_kv_indptr = torch.zeros(batch_size + 1,
|
self.paged_kv_indptr = torch.zeros(batch_size + 1,
|
||||||
device=self.device)
|
device=self.device)
|
||||||
|
else:
|
||||||
|
self.paged_kv_indptr = self.paged_kv_indptr.to(self.device)
|
||||||
self.paged_kv_last_page_len = self.paged_kv_last_page_len.to(
|
self.paged_kv_last_page_len = self.paged_kv_last_page_len.to(
|
||||||
self.device)
|
self.device)
|
||||||
self.paged_kv_indices = self.paged_kv_indices.to(self.device)
|
self.paged_kv_indices = self.paged_kv_indices.to(self.device)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user