Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
Wang Ran (汪然) 2023-09-28 07:22:45 +08:00 committed by GitHub
parent 21877b0d75
commit 30e775281d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 6 deletions

View File

@ -54,8 +54,8 @@ class LLMEngine:
scheduler_config: The configuration related to the request scheduler. scheduler_config: The configuration related to the request scheduler.
distributed_init_method: The initialization method for distributed distributed_init_method: The initialization method for distributed
execution. See `torch.distributed.init_process_group` for details. execution. See `torch.distributed.init_process_group` for details.
stage_devices: The list of devices for each stage. Each stage is a list placement_group: Ray placement group for distributed execution.
of (rank, node_resource, device) tuples. Required for distributed execution.
log_stats: Whether to log statistics. log_stats: Whether to log statistics.
""" """

View File

@ -63,11 +63,10 @@ def initialize_cluster(
the default Ray cluster address. the default Ray cluster address.
Returns: Returns:
A tuple of (`distributed_init_method`, `all_stage_devices`). The A tuple of (`distributed_init_method`, `placement_group`). The
`distributed_init_method` is the address for initializing the `distributed_init_method` is the address for initializing the
distributed backend. `all_stage_devices` includes device IDs for distributed backend. `placement_group` includes the specification
each worker in each pipeline stage. Each device ID is a tuple of of the resources for each distributed worker.
(rank, node resource, device id).
""" """
if parallel_config.worker_use_ray or engine_use_ray: if parallel_config.worker_use_ray or engine_use_ray:
if ray is None: if ray is None: