diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index eabe3b23..48d53058 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -949,8 +949,9 @@ class LLMEngine: model_output: Optional[List[SamplerOutput]] = None) -> None: """Forced log when no requests active.""" if self.log_stats: + stats = self._get_stats(scheduler_outputs, model_output) for logger in self.stat_loggers.values(): - logger.log(self._get_stats(scheduler_outputs, model_output)) + logger.log(stats) def _get_stats( self, diff --git a/vllm/spec_decode/spec_decode_worker.py b/vllm/spec_decode/spec_decode_worker.py index 8cf0aa5b..98960b88 100644 --- a/vllm/spec_decode/spec_decode_worker.py +++ b/vllm/spec_decode/spec_decode_worker.py @@ -484,7 +484,7 @@ class SpecDecodeWorker(LoraNotSupportedWorkerBase): for both speculation cases (num_lookahead_slots>0) and non-speculation cases (e.g. prefill). - Returns True iff there are remaining sequences to process. + Returns True if there are remaining sequences to process. """ assert self.rank != self._driver_rank