[BugFix] Avoid unnecessary Ray import warnings (#6079)

2024-07-02 23:09:40 -07:00 · 2024-07-02 23:09:40 -07:00 · d830656a97
commit d830656a97
parent d18bab3587
3 changed files with 28 additions and 9 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -682,11 +682,13 @@ class ParallelConfig:

            from vllm.executor import ray_utils
            backend = "mp"
-            ray_found = ray_utils.ray is not None
+            ray_found = ray_utils.ray_is_available()
            if cuda_device_count_stateless() < self.world_size:
                if not ray_found:
                    raise ValueError("Unable to load Ray which is "
-                                     "required for multi-node inference")
+                                     "required for multi-node inference, "
+                                     "please install Ray with `pip install "
+                                     "ray`.") from ray_utils.ray_import_err
                backend = "ray"
            elif ray_found:
                if self.placement_group:
@ -718,6 +720,9 @@ class ParallelConfig:
            raise ValueError(
                "Unrecognized distributed executor backend. Supported values "
                "are 'ray' or 'mp'.")
+        if self.distributed_executor_backend == "ray":
+            from vllm.executor import ray_utils
+            ray_utils.assert_ray_available()
        if not self.disable_custom_all_reduce and self.world_size > 1:
            if is_hip():
                self.disable_custom_all_reduce = True
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@ -380,6 +380,11 @@ class AsyncLLMEngine:
        """Creates an async LLM engine from the engine arguments."""
        # Create the engine configs.
        engine_config = engine_args.create_engine_config()
+
+        if engine_args.engine_use_ray:
+            from vllm.executor import ray_utils
+            ray_utils.assert_ray_available()
+
        distributed_executor_backend = (
            engine_config.parallel_config.distributed_executor_backend)

--- a/vllm/executor/ray_utils.py
+++ b/vllm/executor/ray_utils.py
@ -42,14 +42,26 @@ try:
            output = pickle.dumps(output)
            return output

+    ray_import_err = None
+
 except ImportError as e:
-    logger.warning(
-        "Failed to import Ray with %r. For multi-node inference, "
-        "please install Ray with `pip install ray`.", e)
    ray = None  # type: ignore
+    ray_import_err = e
    RayWorkerWrapper = None  # type: ignore


+def ray_is_available() -> bool:
+    """Returns True if Ray is available."""
+    return ray is not None
+
+
+def assert_ray_available():
+    """Raise an exception if Ray is not available."""
+    if ray is None:
+        raise ValueError("Failed to import Ray, please install Ray with "
+                         "`pip install ray`.") from ray_import_err
+
+
 def initialize_ray_cluster(
    parallel_config: ParallelConfig,
    ray_address: Optional[str] = None,
@ -65,10 +77,7 @@ def initialize_ray_cluster(
        ray_address: The address of the Ray cluster. If None, uses
            the default Ray cluster address.
    """
-    if ray is None:
-        raise ImportError(
-            "Ray is not installed. Please install Ray to use multi-node "
-            "serving.")
+    assert_ray_available()

    # Connect to a ray cluster.
    if is_hip() or is_xpu():