[misc] soft drop beam search (#8763)
This commit is contained in:
parent
2467b642dd
commit
1e7d5c01f5
@ -62,6 +62,7 @@ if TYPE_CHECKING:
|
|||||||
VLLM_TORCH_PROFILER_DIR: Optional[str] = None
|
VLLM_TORCH_PROFILER_DIR: Optional[str] = None
|
||||||
VLLM_USE_TRITON_AWQ: bool = False
|
VLLM_USE_TRITON_AWQ: bool = False
|
||||||
VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False
|
VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False
|
||||||
|
VLLM_ALLOW_DEPRECATED_BEAM_SEARCH: bool = False
|
||||||
|
|
||||||
|
|
||||||
def get_default_cache_root():
|
def get_default_cache_root():
|
||||||
@ -195,6 +196,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
|||||||
lambda: (os.environ.get("VLLM_USE_TRITON_FLASH_ATTN", "True").lower() in
|
lambda: (os.environ.get("VLLM_USE_TRITON_FLASH_ATTN", "True").lower() in
|
||||||
("true", "1")),
|
("true", "1")),
|
||||||
|
|
||||||
|
# If set, allowing the use of deprecated beam search implementation
|
||||||
|
"VLLM_ALLOW_DEPRECATED_BEAM_SEARCH":
|
||||||
|
lambda: os.environ.get("VLLM_ALLOW_DEPRECATED_BEAM_SEARCH", "0") == "1",
|
||||||
|
|
||||||
# Internal flag to enable Dynamo graph capture
|
# Internal flag to enable Dynamo graph capture
|
||||||
"VLLM_TEST_DYNAMO_GRAPH_CAPTURE":
|
"VLLM_TEST_DYNAMO_GRAPH_CAPTURE":
|
||||||
lambda: int(os.environ.get("VLLM_TEST_DYNAMO_GRAPH_CAPTURE", "0")),
|
lambda: int(os.environ.get("VLLM_TEST_DYNAMO_GRAPH_CAPTURE", "0")),
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import msgspec
|
|||||||
import torch
|
import torch
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
|
import vllm.envs as envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@ -260,6 +261,10 @@ class SamplingParams(
|
|||||||
|
|
||||||
self._verify_args()
|
self._verify_args()
|
||||||
if self.use_beam_search:
|
if self.use_beam_search:
|
||||||
|
if not envs.VLLM_ALLOW_DEPRECATED_BEAM_SEARCH:
|
||||||
|
raise ValueError(
|
||||||
|
"Using beam search as a sampling parameter is deprecated, and will be removed in the future release. Please use the `vllm.LLM.use_beam_search` method for dedicated beam search instead, or set the environment variable `VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1` to suppress this error. For more details, see https://github.com/vllm-project/vllm/issues/8306 ." # noqa
|
||||||
|
)
|
||||||
self._verify_beam_search()
|
self._verify_beam_search()
|
||||||
else:
|
else:
|
||||||
self._verify_non_beam_search()
|
self._verify_non_beam_search()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user