diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py index 3f0e29c7..8b07fb2d 100644 --- a/vllm/attention/selector.py +++ b/vllm/attention/selector.py @@ -58,6 +58,9 @@ def get_attn_backend( ROCmFlashAttentionBackend) return ROCmFlashAttentionBackend elif backend == _Backend.TORCH_SDPA: + # TODO: make XPU backend available here. + assert is_cpu(), RuntimeError( + "Torch SDPA backend is only used for the CPU device.") logger.info("Using Torch SDPA backend.") from vllm.attention.backends.torch_sdpa import TorchSDPABackend return TorchSDPABackend