From 95db75de64bec34f4d80acff92c62d1cdfa94688 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Thu, 25 Jul 2024 13:40:01 -0400 Subject: [PATCH] [Bugfix] Add synchronize to prevent possible data race (#6788) Co-authored-by: Lucas Wilkinson --- vllm/distributed/parallel_state.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index 128096c8..e9c6fc3a 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -243,6 +243,13 @@ class GroupCoordinator: ca_comm = self.ca_comm maybe_ca_context = nullcontext( ) if ca_comm is None else ca_comm.capture() + + # ensure all initialization operations complete before attempting to + # capture the graph on another stream + curr_stream = torch.cuda.current_stream() + if curr_stream != stream: + stream.wait_stream(curr_stream) + with torch.cuda.stream(stream), maybe_ca_context: # In graph mode, we have to be very careful about the collective # operations. The current status is: