[Performance] e2e overheads reduction: Small followup diff (#7364)
This commit is contained in:
parent
67abdbb42f
commit
fc7b8d1eef
@ -336,9 +336,9 @@ class BlockSpaceManagerV1(BlockSpaceManager):
|
|||||||
|
|
||||||
# Assign the self-attention block tables for each sequence.
|
# Assign the self-attention block tables for each sequence.
|
||||||
if len(wait_seqs) == 1:
|
if len(wait_seqs) == 1:
|
||||||
self.block_tables[wait_seqs[0].seq_id] = block_table
|
self.block_tables[seq.seq_id] = block_table
|
||||||
else:
|
else:
|
||||||
for seq in seq_group.get_seqs(status=SequenceStatus.WAITING):
|
for seq in wait_seqs:
|
||||||
self.block_tables[seq.seq_id] = block_table.copy()
|
self.block_tables[seq.seq_id] = block_table.copy()
|
||||||
|
|
||||||
# Allocate encoder sequence
|
# Allocate encoder sequence
|
||||||
|
|||||||
@ -655,6 +655,9 @@ class SequenceGroup:
|
|||||||
return [seq for seq in self.seqs if not seq.is_finished()]
|
return [seq for seq in self.seqs if not seq.is_finished()]
|
||||||
|
|
||||||
def get_finished_seqs(self) -> List[Sequence]:
|
def get_finished_seqs(self) -> List[Sequence]:
|
||||||
|
if self.is_single_seq:
|
||||||
|
return self.seqs if self.seqs[0].is_finished() else []
|
||||||
|
|
||||||
return [seq for seq in self.seqs if seq.is_finished()]
|
return [seq for seq in self.seqs if seq.is_finished()]
|
||||||
|
|
||||||
def update_num_computed_tokens(self, num_new_computed_tokens: int):
|
def update_num_computed_tokens(self, num_new_computed_tokens: int):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user