[TPU] Suppress import custom_ops warning (#7458)
This commit is contained in:
parent
4d2dc5072b
commit
d6e634f3d7
@ -6,13 +6,15 @@ import torch
|
|||||||
|
|
||||||
from vllm._core_ext import ScalarType
|
from vllm._core_ext import ScalarType
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
try:
|
if not current_platform.is_tpu():
|
||||||
import vllm._C
|
try:
|
||||||
except ImportError as e:
|
import vllm._C
|
||||||
logger.warning("Failed to import from vllm._C with %r", e)
|
except ImportError as e:
|
||||||
|
logger.warning("Failed to import from vllm._C with %r", e)
|
||||||
|
|
||||||
with contextlib.suppress(ImportError):
|
with contextlib.suppress(ImportError):
|
||||||
# ruff: noqa: F401
|
# ruff: noqa: F401
|
||||||
|
|||||||
@ -29,7 +29,6 @@ import torch.types
|
|||||||
from typing_extensions import ParamSpec, TypeIs, assert_never
|
from typing_extensions import ParamSpec, TypeIs, assert_never
|
||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm import _custom_ops as ops
|
|
||||||
from vllm.logger import enable_trace_function_call, init_logger
|
from vllm.logger import enable_trace_function_call, init_logger
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@ -359,6 +358,7 @@ def is_xpu() -> bool:
|
|||||||
@lru_cache(maxsize=None)
|
@lru_cache(maxsize=None)
|
||||||
def get_max_shared_memory_bytes(gpu: int = 0) -> int:
|
def get_max_shared_memory_bytes(gpu: int = 0) -> int:
|
||||||
"""Returns the maximum shared memory per thread block in bytes."""
|
"""Returns the maximum shared memory per thread block in bytes."""
|
||||||
|
from vllm import _custom_ops as ops
|
||||||
max_shared_mem = (
|
max_shared_mem = (
|
||||||
ops.get_max_shared_memory_per_block_device_attribute(gpu))
|
ops.get_max_shared_memory_per_block_device_attribute(gpu))
|
||||||
# value 0 will cause MAX_SEQ_LEN become negative and test_attention.py
|
# value 0 will cause MAX_SEQ_LEN become negative and test_attention.py
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user