From dad961ef5ca3893b78224323ec943dce9f52f868 Mon Sep 17 00:00:00 2001 From: Ali Panahi <64020589+c3-ali@users.noreply.github.com> Date: Mon, 19 Aug 2024 13:47:00 -0700 Subject: [PATCH] [Bugfix] fix lora_dtype value type in arg_utils.py - part 2 (#5428) --- vllm/engine/arg_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 8fca2cc0..b23e166d 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -5,6 +5,8 @@ from dataclasses import dataclass from typing import (TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Type, Union) +import torch + import vllm.envs as envs from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, EngineConfig, LoadConfig, LoRAConfig, ModelConfig, @@ -113,7 +115,7 @@ class EngineArgs: fully_sharded_loras: bool = False lora_extra_vocab_size: int = 256 long_lora_scaling_factors: Optional[Tuple[float]] = None - lora_dtype: str = 'auto' + lora_dtype: Optional[Union[str, torch.dtype]] = 'auto' max_cpu_loras: Optional[int] = None device: str = 'auto' num_scheduler_steps: int = 1