[Core][Distributed] improve logging for init dist (#4042)
This commit is contained in:
parent
5c2e66e487
commit
98afde19fc
@ -8,6 +8,10 @@ from typing import Optional
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
# Tensor model parallel group that the current rank belongs to.
|
# Tensor model parallel group that the current rank belongs to.
|
||||||
_TENSOR_MODEL_PARALLEL_GROUP = None
|
_TENSOR_MODEL_PARALLEL_GROUP = None
|
||||||
# Pipeline model parallel group that the current rank belongs to.
|
# Pipeline model parallel group that the current rank belongs to.
|
||||||
@ -45,6 +49,8 @@ def init_distributed_environment(
|
|||||||
local_rank: int = -1,
|
local_rank: int = -1,
|
||||||
backend: str = "nccl",
|
backend: str = "nccl",
|
||||||
):
|
):
|
||||||
|
logger.debug(f"{world_size=} {rank=} {local_rank=} "
|
||||||
|
f"{distributed_init_method=} {backend=}")
|
||||||
if not torch.distributed.is_initialized():
|
if not torch.distributed.is_initialized():
|
||||||
assert distributed_init_method is not None, (
|
assert distributed_init_method is not None, (
|
||||||
"distributed_init_method must be provided when initializing "
|
"distributed_init_method must be provided when initializing "
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user