From 98a42e70782ec38c1f0f20d0226d0f71147d1ac2 Mon Sep 17 00:00:00 2001 From: "Yile (Michael) Gu" <39850409+IKACE@users.noreply.github.com> Date: Thu, 28 Mar 2024 17:33:52 -0700 Subject: [PATCH] [Benchmark] Change mii to use persistent deployment and support tensor parallel (#3628) --- benchmarks/benchmark_throughput.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index 6ccdd865..9d84bde1 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -183,13 +183,15 @@ def run_mii( tensor_parallel_size: int, output_len: int, ) -> float: - from mii import pipeline - llm = pipeline(model, tensor_parallel=tensor_parallel_size) + from mii import client, serve + llm = serve(model, tensor_parallel=tensor_parallel_size) prompts = [prompt for prompt, _, _ in requests] start = time.perf_counter() - llm(prompts, max_new_tokens=output_len) + llm.generate(prompts, max_new_tokens=output_len) end = time.perf_counter() + client = client(model) + client.terminate_server() return end - start