[Bugfix] Fix async log stats (#8417)
This commit is contained in:
parent
8427550488
commit
6821020109
@ -64,6 +64,7 @@ def test_chunked_prefill_recompute(
|
|||||||
enable_chunked_prefill=enable_chunked_prefill,
|
enable_chunked_prefill=enable_chunked_prefill,
|
||||||
max_num_seqs=max_num_seqs,
|
max_num_seqs=max_num_seqs,
|
||||||
worker_use_ray=worker_use_ray,
|
worker_use_ray=worker_use_ray,
|
||||||
|
disable_log_stats=False,
|
||||||
) as vllm_model:
|
) as vllm_model:
|
||||||
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
|
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
|
||||||
assert (vllm_model.model.llm_engine.scheduler[0].artificial_preempt_cnt
|
assert (vllm_model.model.llm_engine.scheduler[0].artificial_preempt_cnt
|
||||||
|
|||||||
@ -1056,7 +1056,8 @@ class LLMEngine:
|
|||||||
# LLMEngine/AsyncLLMEngine directly
|
# LLMEngine/AsyncLLMEngine directly
|
||||||
if is_async:
|
if is_async:
|
||||||
# Log stats.
|
# Log stats.
|
||||||
self.do_log_stats(scheduler_outputs, outputs, finished_before)
|
self.do_log_stats(scheduler_outputs, outputs, finished_before,
|
||||||
|
skip)
|
||||||
|
|
||||||
# Tracing
|
# Tracing
|
||||||
self.do_tracing(scheduler_outputs)
|
self.do_tracing(scheduler_outputs)
|
||||||
@ -1363,18 +1364,20 @@ class LLMEngine:
|
|||||||
def do_log_stats(self,
|
def do_log_stats(self,
|
||||||
scheduler_outputs: Optional[SchedulerOutputs] = None,
|
scheduler_outputs: Optional[SchedulerOutputs] = None,
|
||||||
model_output: Optional[List[SamplerOutput]] = None,
|
model_output: Optional[List[SamplerOutput]] = None,
|
||||||
finished_before: Optional[List[int]] = None) -> None:
|
finished_before: Optional[List[int]] = None,
|
||||||
|
skip: Optional[List[int]] = None) -> None:
|
||||||
"""Forced log when no requests active."""
|
"""Forced log when no requests active."""
|
||||||
if self.log_stats:
|
if self.log_stats:
|
||||||
stats = self._get_stats(scheduler_outputs, model_output,
|
stats = self._get_stats(scheduler_outputs, model_output,
|
||||||
finished_before)
|
finished_before, skip)
|
||||||
for logger in self.stat_loggers.values():
|
for logger in self.stat_loggers.values():
|
||||||
logger.log(stats)
|
logger.log(stats)
|
||||||
|
|
||||||
def _get_stats(self,
|
def _get_stats(self,
|
||||||
scheduler_outputs: Optional[SchedulerOutputs],
|
scheduler_outputs: Optional[SchedulerOutputs],
|
||||||
model_output: Optional[List[SamplerOutput]] = None,
|
model_output: Optional[List[SamplerOutput]] = None,
|
||||||
finished_before: Optional[List[int]] = None) -> Stats:
|
finished_before: Optional[List[int]] = None,
|
||||||
|
skip: Optional[List[int]] = None) -> Stats:
|
||||||
"""Get Stats to be Logged to Prometheus.
|
"""Get Stats to be Logged to Prometheus.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -1382,6 +1385,10 @@ class LLMEngine:
|
|||||||
the scheduled batch,
|
the scheduled batch,
|
||||||
model_output: Optional, used to emit speculative decoding metrics
|
model_output: Optional, used to emit speculative decoding metrics
|
||||||
which are created by the workers.
|
which are created by the workers.
|
||||||
|
finished_before: Optional, indices of sequences that were finished
|
||||||
|
before. These sequences will be ignored.
|
||||||
|
skip: Optional, indices of sequences that were preempted. These
|
||||||
|
sequences will be ignored.
|
||||||
"""
|
"""
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
@ -1456,6 +1463,11 @@ class LLMEngine:
|
|||||||
actual_num_batched_tokens -= 1
|
actual_num_batched_tokens -= 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Currently, skip == preempted sequences, so we need to skip
|
||||||
|
# their log stats
|
||||||
|
if skip and idx in skip:
|
||||||
|
continue
|
||||||
|
|
||||||
group_was_prefill = idx < scheduler_outputs.num_prefill_groups
|
group_was_prefill = idx < scheduler_outputs.num_prefill_groups
|
||||||
seq_group = scheduled_seq_group.seq_group
|
seq_group = scheduled_seq_group.seq_group
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user