Remove Ray health check (#4693)
This commit is contained in:
parent
8d75fe48ca
commit
18a277b52d
@ -293,23 +293,6 @@ class RayGPUExecutor(DistributedGPUExecutor):
|
|||||||
])
|
])
|
||||||
return forward_dag.experimental_compile()
|
return forward_dag.experimental_compile()
|
||||||
|
|
||||||
def check_health(self) -> None:
|
|
||||||
"""Raises an error if engine is unhealthy."""
|
|
||||||
self._check_if_any_actor_is_dead()
|
|
||||||
|
|
||||||
def _check_if_any_actor_is_dead(self):
|
|
||||||
if not self.workers:
|
|
||||||
return
|
|
||||||
|
|
||||||
dead_actors = []
|
|
||||||
for actor in self.workers:
|
|
||||||
actor_state = ray.state.actors(actor._ray_actor_id.hex()) # pylint: disable=protected-access
|
|
||||||
if actor_state["State"] == "DEAD":
|
|
||||||
dead_actors.append(actor)
|
|
||||||
if dead_actors:
|
|
||||||
raise RuntimeError("At least one Worker is dead. "
|
|
||||||
f"Dead Workers: {dead_actors}. ")
|
|
||||||
|
|
||||||
|
|
||||||
class RayGPUExecutorAsync(RayGPUExecutor, DistributedGPUExecutorAsync):
|
class RayGPUExecutorAsync(RayGPUExecutor, DistributedGPUExecutorAsync):
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user