diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index c8d7164d..65095be9 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -54,8 +54,8 @@ class LLMEngine: scheduler_config: The configuration related to the request scheduler. distributed_init_method: The initialization method for distributed execution. See `torch.distributed.init_process_group` for details. - stage_devices: The list of devices for each stage. Each stage is a list - of (rank, node_resource, device) tuples. + placement_group: Ray placement group for distributed execution. + Required for distributed execution. log_stats: Whether to log statistics. """ diff --git a/vllm/engine/ray_utils.py b/vllm/engine/ray_utils.py index 80479967..ed7f1ec4 100644 --- a/vllm/engine/ray_utils.py +++ b/vllm/engine/ray_utils.py @@ -63,11 +63,10 @@ def initialize_cluster( the default Ray cluster address. Returns: - A tuple of (`distributed_init_method`, `all_stage_devices`). The + A tuple of (`distributed_init_method`, `placement_group`). The `distributed_init_method` is the address for initializing the - distributed backend. `all_stage_devices` includes device IDs for - each worker in each pipeline stage. Each device ID is a tuple of - (rank, node resource, device id). + distributed backend. `placement_group` includes the specification + of the resources for each distributed worker. """ if parallel_config.worker_use_ray or engine_use_ray: if ray is None: