[Bugfix] Fix async log stats (#8417)

This commit is contained in:
Alexander Matveev 2024-09-12 23:48:59 -04:00 committed by GitHub
parent 8427550488
commit 6821020109
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 4 deletions

View File

@ -64,6 +64,7 @@ def test_chunked_prefill_recompute(
enable_chunked_prefill=enable_chunked_prefill,
max_num_seqs=max_num_seqs,
worker_use_ray=worker_use_ray,
disable_log_stats=False,
) as vllm_model:
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
assert (vllm_model.model.llm_engine.scheduler[0].artificial_preempt_cnt

View File

@ -1056,7 +1056,8 @@ class LLMEngine:
# LLMEngine/AsyncLLMEngine directly
if is_async:
# Log stats.
self.do_log_stats(scheduler_outputs, outputs, finished_before)
self.do_log_stats(scheduler_outputs, outputs, finished_before,
skip)
# Tracing
self.do_tracing(scheduler_outputs)
@ -1363,18 +1364,20 @@ class LLMEngine:
def do_log_stats(self,
scheduler_outputs: Optional[SchedulerOutputs] = None,
model_output: Optional[List[SamplerOutput]] = None,
finished_before: Optional[List[int]] = None) -> None:
finished_before: Optional[List[int]] = None,
skip: Optional[List[int]] = None) -> None:
"""Forced log when no requests active."""
if self.log_stats:
stats = self._get_stats(scheduler_outputs, model_output,
finished_before)
finished_before, skip)
for logger in self.stat_loggers.values():
logger.log(stats)
def _get_stats(self,
scheduler_outputs: Optional[SchedulerOutputs],
model_output: Optional[List[SamplerOutput]] = None,
finished_before: Optional[List[int]] = None) -> Stats:
finished_before: Optional[List[int]] = None,
skip: Optional[List[int]] = None) -> Stats:
"""Get Stats to be Logged to Prometheus.
Args:
@ -1382,6 +1385,10 @@ class LLMEngine:
the scheduled batch,
model_output: Optional, used to emit speculative decoding metrics
which are created by the workers.
finished_before: Optional, indices of sequences that were finished
before. These sequences will be ignored.
skip: Optional, indices of sequences that were preempted. These
sequences will be ignored.
"""
now = time.time()
@ -1456,6 +1463,11 @@ class LLMEngine:
actual_num_batched_tokens -= 1
continue
# Currently, skip == preempted sequences, so we need to skip
# their log stats
if skip and idx in skip:
continue
group_was_prefill = idx < scheduler_outputs.num_prefill_groups
seq_group = scheduled_seq_group.seq_group