diff --git a/benchmarks/benchmark_latency.py b/benchmarks/benchmark_latency.py index e2d358ea..91510daf 100644 --- a/benchmarks/benchmark_latency.py +++ b/benchmarks/benchmark_latency.py @@ -169,8 +169,8 @@ if __name__ == '__main__': "--device", type=str, default="cuda", - choices=["cuda"], - help='device type for vLLM execution, supporting CUDA only currently.') + choices=["cuda", "cpu"], + help='device type for vLLM execution, supporting CUDA and CPU.') parser.add_argument('--block-size', type=int, default=16, diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index d6bf18c8..e7133827 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -329,8 +329,8 @@ if __name__ == "__main__": "--device", type=str, default="cuda", - choices=["cuda"], - help='device type for vLLM execution, supporting CUDA only currently.') + choices=["cuda", "cpu"], + help='device type for vLLM execution, supporting CUDA and CPU.') parser.add_argument( "--enable-prefix-caching", action='store_true',