[Frontend] add tok/s speed metric to llm class when using tqdm (#4400)
Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
parent
e288df0632
commit
16bc0a098f
@ -238,17 +238,25 @@ class LLM:
|
|||||||
# Initialize tqdm.
|
# Initialize tqdm.
|
||||||
if use_tqdm:
|
if use_tqdm:
|
||||||
num_requests = self.llm_engine.get_num_unfinished_requests()
|
num_requests = self.llm_engine.get_num_unfinished_requests()
|
||||||
pbar = tqdm(total=num_requests,
|
pbar = tqdm(
|
||||||
desc="Processed prompts",
|
total=num_requests,
|
||||||
dynamic_ncols=True)
|
desc="Processed prompts",
|
||||||
|
dynamic_ncols=True,
|
||||||
|
postfix=f"Generation Speed: {0:.2f} toks/s",
|
||||||
|
)
|
||||||
# Run the engine.
|
# Run the engine.
|
||||||
outputs: List[RequestOutput] = []
|
outputs: List[RequestOutput] = []
|
||||||
|
total_toks = 0
|
||||||
while self.llm_engine.has_unfinished_requests():
|
while self.llm_engine.has_unfinished_requests():
|
||||||
step_outputs = self.llm_engine.step()
|
step_outputs = self.llm_engine.step()
|
||||||
for output in step_outputs:
|
for output in step_outputs:
|
||||||
if output.finished:
|
if output.finished:
|
||||||
outputs.append(output)
|
outputs.append(output)
|
||||||
if use_tqdm:
|
if use_tqdm:
|
||||||
|
total_toks += (sum(
|
||||||
|
len(stp.token_ids) for stp in output.outputs))
|
||||||
|
spd = total_toks / pbar.format_dict["elapsed"]
|
||||||
|
pbar.postfix = f"Generation Speed: {spd:.2f} toks/s"
|
||||||
pbar.update(1)
|
pbar.update(1)
|
||||||
if use_tqdm:
|
if use_tqdm:
|
||||||
pbar.close()
|
pbar.close()
|
||||||
@ -256,4 +264,4 @@ class LLM:
|
|||||||
# This is necessary because some requests may be finished earlier than
|
# This is necessary because some requests may be finished earlier than
|
||||||
# its previous requests.
|
# its previous requests.
|
||||||
outputs = sorted(outputs, key=lambda x: int(x.request_id))
|
outputs = sorted(outputs, key=lambda x: int(x.request_id))
|
||||||
return outputs
|
return outputs
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user