[distributed][misc] error on same VLLM_HOST_IP setting (#7756)
This commit is contained in:
parent
8678a69ab5
commit
7eebe8ccaa
@ -137,7 +137,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
|||||||
os.path.join(get_default_cache_root(), "vllm"),
|
os.path.join(get_default_cache_root(), "vllm"),
|
||||||
)),
|
)),
|
||||||
|
|
||||||
# used in distributed environment to determine the master address
|
# used in distributed environment to determine the ip address
|
||||||
|
# of the current node, when the node has multiple network interfaces.
|
||||||
|
# If you are using multi-node inference, you should set this differently
|
||||||
|
# on each node.
|
||||||
'VLLM_HOST_IP':
|
'VLLM_HOST_IP':
|
||||||
lambda: os.getenv('VLLM_HOST_IP', "") or os.getenv("HOST_IP", ""),
|
lambda: os.getenv('VLLM_HOST_IP', "") or os.getenv("HOST_IP", ""),
|
||||||
|
|
||||||
|
|||||||
@ -218,6 +218,19 @@ class RayGPUExecutor(DistributedGPUExecutor):
|
|||||||
for node_id, gpu_ids in node_gpus.items():
|
for node_id, gpu_ids in node_gpus.items():
|
||||||
node_gpus[node_id] = sorted(gpu_ids)
|
node_gpus[node_id] = sorted(gpu_ids)
|
||||||
|
|
||||||
|
all_ips = set(worker_ips + [driver_ip])
|
||||||
|
n_ips = len(all_ips)
|
||||||
|
n_nodes = len(node_workers)
|
||||||
|
|
||||||
|
if n_nodes != n_ips:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Every node should have a unique IP address. Got {n_nodes}"
|
||||||
|
f" nodes with node ids {list(node_workers.keys())} and "
|
||||||
|
f"{n_ips} unique IP addresses {all_ips}. Please check your"
|
||||||
|
" network configuration. If you set `VLLM_HOST_IP` or "
|
||||||
|
"`HOST_IP` environment variable, make sure it is unique for"
|
||||||
|
" each node.")
|
||||||
|
|
||||||
VLLM_INSTANCE_ID = get_vllm_instance_id()
|
VLLM_INSTANCE_ID = get_vllm_instance_id()
|
||||||
|
|
||||||
# Set environment variables for the driver and workers.
|
# Set environment variables for the driver and workers.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user