[Bugfix] Fix ray instance detect issue (#9439)

This commit is contained in:
Yan Ma 2024-10-28 15:13:03 +08:00 committed by GitHub
parent feb92fbe4a
commit 2adb4409e0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -232,9 +232,16 @@ def initialize_ray_cluster(
# Connect to a ray cluster.
if current_platform.is_rocm() or current_platform.is_xpu():
ray.init(address=ray_address,
ignore_reinit_error=True,
num_gpus=parallel_config.world_size)
# Try to connect existing ray instance and create a new one if not found
try:
ray.init("auto")
except ConnectionError:
logger.warning(
"No existing RAY instance detected. "
"A new instance will be launched with current node resources.")
ray.init(address=ray_address,
ignore_reinit_error=True,
num_gpus=parallel_config.world_size)
else:
ray.init(address=ray_address, ignore_reinit_error=True)