Remove Ray health check (#4693)
This commit is contained in:
parent
8d75fe48ca
commit
18a277b52d
@ -293,23 +293,6 @@ class RayGPUExecutor(DistributedGPUExecutor):
|
||||
])
|
||||
return forward_dag.experimental_compile()
|
||||
|
||||
def check_health(self) -> None:
|
||||
"""Raises an error if engine is unhealthy."""
|
||||
self._check_if_any_actor_is_dead()
|
||||
|
||||
def _check_if_any_actor_is_dead(self):
|
||||
if not self.workers:
|
||||
return
|
||||
|
||||
dead_actors = []
|
||||
for actor in self.workers:
|
||||
actor_state = ray.state.actors(actor._ray_actor_id.hex()) # pylint: disable=protected-access
|
||||
if actor_state["State"] == "DEAD":
|
||||
dead_actors.append(actor)
|
||||
if dead_actors:
|
||||
raise RuntimeError("At least one Worker is dead. "
|
||||
f"Dead Workers: {dead_actors}. ")
|
||||
|
||||
|
||||
class RayGPUExecutorAsync(RayGPUExecutor, DistributedGPUExecutorAsync):
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user