[Misc] Revive to use loopback address for driver IP (#7091)
Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
This commit is contained in:
parent
05308891e2
commit
22e718ff1a
@ -216,6 +216,16 @@ class RayGPUExecutor(DistributedGPUExecutor):
|
|||||||
self._run_workers("update_environment_variables",
|
self._run_workers("update_environment_variables",
|
||||||
all_args=all_args_to_update_environment_variables)
|
all_args=all_args_to_update_environment_variables)
|
||||||
|
|
||||||
|
if len(node_gpus) == 1:
|
||||||
|
# in single node case, we don't need to get the IP address.
|
||||||
|
# the loopback address is sufficient
|
||||||
|
# NOTE: a node may have several IP addresses, one for each
|
||||||
|
# network interface. `get_ip()` might return any of them,
|
||||||
|
# while they might not work for communication inside the node
|
||||||
|
# if the network setup is complicated. Using the loopback address
|
||||||
|
# solves this issue, as it always works for communication inside
|
||||||
|
# the node.
|
||||||
|
driver_ip = "127.0.0.1"
|
||||||
distributed_init_method = get_distributed_init_method(
|
distributed_init_method = get_distributed_init_method(
|
||||||
driver_ip, get_open_port())
|
driver_ip, get_open_port())
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user