[torch.compile] add a flag to disable custom op (#8488)

2024-09-14 13:07:16 -07:00 · 2024-09-14 13:07:16 -07:00 · 47790f3e32
commit 47790f3e32
parent a36e070dad
3 changed files with 12 additions and 1 deletions
--- a/tests/compile/test_full_graph.py
+++ b/tests/compile/test_full_graph.py
@ -6,7 +6,8 @@ import pytest
@pytest.mark.parametrize("model", ["meta-llama/Meta-Llama-3-8B"])
 def test_full_graph(model):
    # make sure these models can be captured in full graph mode
-    os.environ["VLLM_TEST_DYNAMO_GRAPH_CAPTURE"] = "1"
+    if "VLLM_TEST_DYNAMO_GRAPH_CAPTURE" not in os.environ:
+        os.environ["VLLM_TEST_DYNAMO_GRAPH_CAPTURE"] = "1"

    from vllm import LLM, SamplingParams
    prompts = [
--- a/vllm/envs.py
+++ b/vllm/envs.py
@ -202,6 +202,11 @@ environment_variables: Dict[str, Callable[[], Any]] = {
    (os.environ.get("VLLM_DYNAMO_USE_CUSTOM_DISPATCHER", "True").lower() in
     ("true", "1")),

+    # Internal flag to control whether we use custom op,
+    # or use the native pytorch implementation
+    "VLLM_TEST_COMPILE_NO_CUSTOM_OPS":
+    lambda: int(os.environ.get("VLLM_TEST_COMPILE_NO_CUSTOM_OPS", "0")),
+
    # Internal flag to enable Dynamo fullgraph capture
    "VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE":
    lambda: bool(
--- a/vllm/model_executor/custom_op.py
+++ b/vllm/model_executor/custom_op.py
@ -1,5 +1,6 @@
 import torch.nn as nn

+import vllm.envs as envs
 from vllm.platforms import current_platform
 from vllm.utils import is_cpu, is_hip, is_xpu

@ -53,6 +54,10 @@ class CustomOp(nn.Module):
    def dispatch_forward(self):
        # NOTE(woosuk): Here we assume that vLLM was built for only one
        # specific backend. Currently, we do not support dynamic dispatching.
+
+        if envs.VLLM_TEST_COMPILE_NO_CUSTOM_OPS:
+            return self.forward_native
+
        if is_hip():
            return self.forward_hip
        elif is_cpu():