[misc] add a flag to enable compile (#7092)
This commit is contained in:
parent
22e718ff1a
commit
708989341e
@ -174,6 +174,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
|||||||
lambda: (os.environ.get("VLLM_USE_TRITON_FLASH_ATTN", "True").lower() in
|
lambda: (os.environ.get("VLLM_USE_TRITON_FLASH_ATTN", "True").lower() in
|
||||||
("true", "1")),
|
("true", "1")),
|
||||||
|
|
||||||
|
# Internal flag to enable Dynamo graph capture
|
||||||
|
"VLLM_TEST_DYNAMO_GRAPH_CAPTURE":
|
||||||
|
lambda: int(os.environ.get("VLLM_TEST_DYNAMO_GRAPH_CAPTURE", "0")),
|
||||||
|
|
||||||
# local rank of the process in the distributed setting, used to determine
|
# local rank of the process in the distributed setting, used to determine
|
||||||
# the GPU device id
|
# the GPU device id
|
||||||
"LOCAL_RANK":
|
"LOCAL_RANK":
|
||||||
|
|||||||
@ -23,6 +23,7 @@ except ImportError:
|
|||||||
BatchPrefillWithPagedKVCacheWrapper = None
|
BatchPrefillWithPagedKVCacheWrapper = None
|
||||||
FLASHINFER_WORKSPACE_BUFFER_SIZE = 0
|
FLASHINFER_WORKSPACE_BUFFER_SIZE = 0
|
||||||
|
|
||||||
|
import vllm.envs as envs
|
||||||
from vllm.attention import AttentionMetadata, get_attn_backend
|
from vllm.attention import AttentionMetadata, get_attn_backend
|
||||||
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
|
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
|
||||||
ModelConfig, MultiModalConfig, ParallelConfig,
|
ModelConfig, MultiModalConfig, ParallelConfig,
|
||||||
@ -786,6 +787,11 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
|
|||||||
"provided. Defaulting to scaling factors of 1.0. "
|
"provided. Defaulting to scaling factors of 1.0. "
|
||||||
"This may lead to less accurate results!")
|
"This may lead to less accurate results!")
|
||||||
|
|
||||||
|
if envs.VLLM_TEST_DYNAMO_GRAPH_CAPTURE:
|
||||||
|
self.model = torch.compile(self.model,
|
||||||
|
fullgraph=True,
|
||||||
|
backend="eager")
|
||||||
|
|
||||||
def save_sharded_state(
|
def save_sharded_state(
|
||||||
self,
|
self,
|
||||||
path: str,
|
path: str,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user