[Misc] Fix docstring of get_attn_backend (#5271)
This commit is contained in:
parent
41ca62cf03
commit
c65146e75e
@ -31,15 +31,14 @@ def get_attn_backend(
|
|||||||
block_size: int,
|
block_size: int,
|
||||||
is_blocksparse: bool = False,
|
is_blocksparse: bool = False,
|
||||||
) -> Type[AttentionBackend]:
|
) -> Type[AttentionBackend]:
|
||||||
|
"""Selects which attention backend to use and lazily imports it."""
|
||||||
|
|
||||||
if is_blocksparse:
|
if is_blocksparse:
|
||||||
logger.info("Using BlocksparseFlashAttention backend.")
|
logger.info("Using BlocksparseFlashAttention backend.")
|
||||||
from vllm.attention.backends.blocksparse_attn import (
|
from vllm.attention.backends.blocksparse_attn import (
|
||||||
BlocksparseFlashAttentionBackend)
|
BlocksparseFlashAttentionBackend)
|
||||||
return BlocksparseFlashAttentionBackend
|
return BlocksparseFlashAttentionBackend
|
||||||
"""Determine which attention backend to use and only import
|
|
||||||
the selected backend module.
|
|
||||||
"""
|
|
||||||
backend = which_attn_to_use(num_heads, head_size, num_kv_heads,
|
backend = which_attn_to_use(num_heads, head_size, num_kv_heads,
|
||||||
sliding_window, dtype, kv_cache_dtype,
|
sliding_window, dtype, kv_cache_dtype,
|
||||||
block_size)
|
block_size)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user