Fixes assertion failure in prefix caching: the lora index mapping should respect prefix_len (#2688)

Signed-off-by: Tao He <sighingnow@gmail.com>
This commit is contained in:
Tao He 2024-02-01 01:00:13 +08:00 committed by GitHub
parent 1af090b57d
commit d69ff0cbbb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -142,10 +142,10 @@ class ModelRunner:
if lora_id > 0:
lora_requests.add(seq_group_metadata.lora_request)
lora_index_mapping.append([lora_id] * prompt_len)
lora_index_mapping.append([lora_id] * (prompt_len - prefix_len))
lora_prompt_mapping.extend(
[lora_id] *
(prompt_len
(prompt_len - prefix_len
if seq_group_metadata.sampling_params.prompt_logprobs else 1))
if seq_group_metadata.block_tables is None: