From cb7a1c1cbf7c07e072df29844fb7a51a01344392 Mon Sep 17 00:00:00 2001 From: Ben Date: Sat, 13 Jan 2024 04:33:29 +0800 Subject: [PATCH] Suggest using dtype=half when OOM. --- vllm/worker/worker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py index b1d85cc0..4f21edbf 100644 --- a/vllm/worker/worker.py +++ b/vllm/worker/worker.py @@ -239,4 +239,6 @@ def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype): raise ValueError( "Bfloat16 is only supported on GPUs with compute capability " f"of at least 8.0. Your {gpu_name} GPU has compute capability " - f"{compute_capability[0]}.{compute_capability[1]}.") + f"{compute_capability[0]}.{compute_capability[1]}." + f" You can explicitly specify the data type by using the --dtype option, for example: --dtype=half." + )