Fixes assertion failure in prefix caching: the lora index mapping should respect prefix_len (#2688)
Signed-off-by: Tao He <sighingnow@gmail.com>
This commit is contained in:
parent
1af090b57d
commit
d69ff0cbbb
@ -142,10 +142,10 @@ class ModelRunner:
|
|||||||
if lora_id > 0:
|
if lora_id > 0:
|
||||||
lora_requests.add(seq_group_metadata.lora_request)
|
lora_requests.add(seq_group_metadata.lora_request)
|
||||||
|
|
||||||
lora_index_mapping.append([lora_id] * prompt_len)
|
lora_index_mapping.append([lora_id] * (prompt_len - prefix_len))
|
||||||
lora_prompt_mapping.extend(
|
lora_prompt_mapping.extend(
|
||||||
[lora_id] *
|
[lora_id] *
|
||||||
(prompt_len
|
(prompt_len - prefix_len
|
||||||
if seq_group_metadata.sampling_params.prompt_logprobs else 1))
|
if seq_group_metadata.sampling_params.prompt_logprobs else 1))
|
||||||
|
|
||||||
if seq_group_metadata.block_tables is None:
|
if seq_group_metadata.block_tables is None:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user