From b5853f99639afd82cb18f131dce7f8c41eda74bd Mon Sep 17 00:00:00 2001 From: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com> Date: Thu, 16 May 2024 13:46:52 -0400 Subject: [PATCH] [ROCm][AMD][Bugfix] adding a missing triton autotune config (#4845) --- vllm/attention/ops/triton_flash_attention.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py index 11476641..f9421111 100644 --- a/vllm/attention/ops/triton_flash_attention.py +++ b/vllm/attention/ops/triton_flash_attention.py @@ -239,6 +239,16 @@ def _attn_fwd_inner( num_stages=1, num_warps=8, ), + triton.Config( + { + "BLOCK_M": 128, + "BLOCK_N": 64, + "waves_per_eu": 1, + "PRE_LOAD_V": False, + }, + num_stages=1, + num_warps=4, + ), triton.Config( { "BLOCK_M": 128,