[Bugfix] Make torch registration of punica ops optional (#7970)

2024-08-28 18:11:49 -04:00 · 2024-08-28 18:11:49 -04:00 · 3cdfe1f38b
commit 3cdfe1f38b
parent fdd9daafa3
7 changed files with 37 additions and 21 deletions
--- a/vllm/lora/ops/bgmv_expand.py
+++ b/vllm/lora/ops/bgmv_expand.py
@ -160,6 +160,9 @@ def _bgmv_expand(
    return
-bgmv_expand = torch.library.custom_op("lora::bgmv_expand",
+try:
-                                      _bgmv_expand,
+    bgmv_expand = torch.library.custom_op("lora::bgmv_expand",
-                                      mutates_args=["output_tensor"])
+                                          _bgmv_expand,
                                          mutates_args=["output_tensor"])
 except AttributeError:
    bgmv_expand = _bgmv_expand
--- a/vllm/lora/ops/bgmv_expand_slice.py
+++ b/vllm/lora/ops/bgmv_expand_slice.py
@ -173,6 +173,9 @@ def _bgmv_expand_slice(
    return
-bgmv_expand_slice = torch.library.custom_op("lora::bgmv_expand_slice",
+try:
-                                            _bgmv_expand_slice,
+    bgmv_expand_slice = torch.library.custom_op("lora::bgmv_expand_slice",
-                                            mutates_args=["output_tensor"])
+                                                _bgmv_expand_slice,
                                                mutates_args=["output_tensor"])
 except AttributeError:
    bgmv_expand_slice = _bgmv_expand_slice
--- a/vllm/lora/ops/bgmv_shrink.py
+++ b/vllm/lora/ops/bgmv_shrink.py
@ -142,6 +142,9 @@ def _bgmv_shrink(
    return
-bgmv_shrink = torch.library.custom_op("lora::bgmv_shrink",
+try:
-                                      _bgmv_shrink,
+    bgmv_shrink = torch.library.custom_op("lora::bgmv_shrink",
-                                      mutates_args=["output_tensor"])
+                                          _bgmv_shrink,
                                          mutates_args=["output_tensor"])
 except AttributeError:
    bgmv_shrink = _bgmv_shrink
--- a/vllm/lora/ops/sgmv_expand.py
+++ b/vllm/lora/ops/sgmv_expand.py
@ -192,6 +192,9 @@ def _sgmv_expand(
    return
-sgmv_expand = torch.library.custom_op("lora::sgmv_expand",
+try:
-                                      _sgmv_expand,
+    sgmv_expand = torch.library.custom_op("lora::sgmv_expand",
-                                      mutates_args=["output_tensor"])
+                                          _sgmv_expand,
                                          mutates_args=["output_tensor"])
 except AttributeError:
    sgmv_expand = _sgmv_expand
--- a/vllm/lora/ops/sgmv_expand_slice.py
+++ b/vllm/lora/ops/sgmv_expand_slice.py
@ -205,6 +205,9 @@ def _sgmv_expand_slice(
    return
-sgmv_expand_slice = torch.library.custom_op("lora::sgmv_expand_slice",
+try:
-                                            _sgmv_expand_slice,
+    sgmv_expand_slice = torch.library.custom_op("lora::sgmv_expand_slice",
-                                            mutates_args=["output_tensor"])
+                                                _sgmv_expand_slice,
                                                mutates_args=["output_tensor"])
 except AttributeError:
    sgmv_expand_slice = _sgmv_expand_slice
--- a/vllm/lora/ops/sgmv_shrink.py
+++ b/vllm/lora/ops/sgmv_shrink.py
@ -189,6 +189,9 @@ def _sgmv_shrink(
    return
-sgmv_shrink = torch.library.custom_op("lora::sgmv_shrink",
+try:
-                                      _sgmv_shrink,
+    sgmv_shrink = torch.library.custom_op("lora::sgmv_shrink",
-                                      mutates_args=["output_tensor"])
+                                          _sgmv_shrink,
                                          mutates_args=["output_tensor"])
 except AttributeError:
    sgmv_shrink = _sgmv_shrink
--- a/vllm/lora/punica.py
+++ b/vllm/lora/punica.py
@ -10,10 +10,8 @@ from typing import TYPE_CHECKING, Callable, List, Optional, Tuple, Union
 import torch
 from vllm.triton_utils import HAS_TRITON
 from vllm.utils import is_xpu
-# FIXME: xpu path doesn't support torch.library.custom_op
+if HAS_TRITON:
 if HAS_TRITON and not is_xpu():
    from vllm.lora.ops.bgmv_expand import bgmv_expand
    from vllm.lora.ops.bgmv_expand_slice import bgmv_expand_slice
    from vllm.lora.ops.bgmv_shrink import bgmv_shrink