From ef6098ec51fcd07da3b93704e2c48e1f861ff00b Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 24 Feb 2023 10:36:08 +0000 Subject: [PATCH] Merge pre_step and step --- cacheflow/master/scheduler.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/cacheflow/master/scheduler.py b/cacheflow/master/scheduler.py index 0ef0adc5..c5df8d49 100644 --- a/cacheflow/master/scheduler.py +++ b/cacheflow/master/scheduler.py @@ -93,13 +93,13 @@ class Scheduler: seq.status = SequenceStatus.SWAPPED self.swapped.append(seq_group) - def pre_step(self) -> None: + def step(self) -> None: # Blocks that need to be swaped or copied before model execution. blocks_to_swap_in: Dict[int, int] = {} blocks_to_swap_out: Dict[int, int] = {} blocks_to_copy: Dict[int, int] = {} - # 1. Prepare new slots for the running sequences. + # 1. Reserve new slots for the running sequences. # NOTE: Here we implicitly assume FCFS scheduling. # That is, the most recently added sequence group is the first # to be swapped out. @@ -159,20 +159,11 @@ class Scheduler: else: self.pending.clear() - # Execute step. - self.step(blocks_to_swap_in, blocks_to_swap_out, blocks_to_copy) - - def step( - self, - blocks_to_swap_in: Dict[int, int], - blocks_to_swap_out: Dict[int, int], - blocks_to_copy: Dict[int, int], - ) -> None: # Ensure that swap-in and swap-out never happen at the same timestep. if blocks_to_swap_in: assert not blocks_to_swap_out - # Create input data structures. + # 4. Create input data structures. prompt_tokens: Dict[int, List[int]] = {} generation_tokens: Dict[int, int] = {} context_lens: Dict[int, int] = {} @@ -195,7 +186,7 @@ class Scheduler: generation_tokens[seq_id] = seq.get_token_ids()[-1] context_lens[seq_id] = seq.get_len() - # Execute the first stage of the pipeline. + # 5. Execute the first stage of the pipeline. self.controllers[0].execute_stage( prompt_tokens, generation_tokens,