parent
21877b0d75
commit
30e775281d
@ -54,8 +54,8 @@ class LLMEngine:
|
|||||||
scheduler_config: The configuration related to the request scheduler.
|
scheduler_config: The configuration related to the request scheduler.
|
||||||
distributed_init_method: The initialization method for distributed
|
distributed_init_method: The initialization method for distributed
|
||||||
execution. See `torch.distributed.init_process_group` for details.
|
execution. See `torch.distributed.init_process_group` for details.
|
||||||
stage_devices: The list of devices for each stage. Each stage is a list
|
placement_group: Ray placement group for distributed execution.
|
||||||
of (rank, node_resource, device) tuples.
|
Required for distributed execution.
|
||||||
log_stats: Whether to log statistics.
|
log_stats: Whether to log statistics.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|||||||
@ -63,11 +63,10 @@ def initialize_cluster(
|
|||||||
the default Ray cluster address.
|
the default Ray cluster address.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A tuple of (`distributed_init_method`, `all_stage_devices`). The
|
A tuple of (`distributed_init_method`, `placement_group`). The
|
||||||
`distributed_init_method` is the address for initializing the
|
`distributed_init_method` is the address for initializing the
|
||||||
distributed backend. `all_stage_devices` includes device IDs for
|
distributed backend. `placement_group` includes the specification
|
||||||
each worker in each pipeline stage. Each device ID is a tuple of
|
of the resources for each distributed worker.
|
||||||
(rank, node resource, device id).
|
|
||||||
"""
|
"""
|
||||||
if parallel_config.worker_use_ray or engine_use_ray:
|
if parallel_config.worker_use_ray or engine_use_ray:
|
||||||
if ray is None:
|
if ray is None:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user