From b5853f99639afd82cb18f131dce7f8c41eda74bd Mon Sep 17 00:00:00 2001
From: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com>
Date: Thu, 16 May 2024 13:46:52 -0400
Subject: [PATCH] [ROCm][AMD][Bugfix] adding a missing triton autotune config
 (#4845)

---
 vllm/attention/ops/triton_flash_attention.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py
index 11476641..f9421111 100644
--- a/vllm/attention/ops/triton_flash_attention.py
+++ b/vllm/attention/ops/triton_flash_attention.py
@@ -239,6 +239,16 @@ def _attn_fwd_inner(
             num_stages=1,
             num_warps=8,
         ),
+        triton.Config(
+            {
+                "BLOCK_M": 128,
+                "BLOCK_N": 64,
+                "waves_per_eu": 1,
+                "PRE_LOAD_V": False,
+            },
+            num_stages=1,
+            num_warps=4,
+        ),
         triton.Config(
             {
                 "BLOCK_M": 128,