From 8df2dc3c8812c0abb97ce3e2913411d88524e59f Mon Sep 17 00:00:00 2001 From: Brittany <24945384+bvrockwell@users.noreply.github.com> Date: Fri, 27 Sep 2024 01:16:55 -0700 Subject: [PATCH] [TPU] Update pallas.py to support trillium (#8871) --- vllm/attention/backends/pallas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/backends/pallas.py b/vllm/attention/backends/pallas.py index 83fdef16..a8a78d41 100644 --- a/vllm/attention/backends/pallas.py +++ b/vllm/attention/backends/pallas.py @@ -130,7 +130,7 @@ class PallasAttentionBackendImpl(AttentionImpl): assert tpu_type is not None tpu_type = tpu_type.lower() - if "lite" not in tpu_type: + if (("lite" not in tpu_type) and ("v6" not in tpu_type)): if self.num_kv_heads % 2 == 0: self.megacore_mode = "kv_head" else: