From 95db75de64bec34f4d80acff92c62d1cdfa94688 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Thu, 25 Jul 2024 13:40:01 -0400
Subject: [PATCH] [Bugfix] Add synchronize to prevent possible data race
 (#6788)

Co-authored-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
---
 vllm/distributed/parallel_state.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index 128096c8..e9c6fc3a 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -243,6 +243,13 @@ class GroupCoordinator:
         ca_comm = self.ca_comm
         maybe_ca_context = nullcontext(
         ) if ca_comm is None else ca_comm.capture()
+
+        # ensure all initialization operations complete before attempting to
+        # capture the graph on another stream
+        curr_stream = torch.cuda.current_stream()
+        if curr_stream != stream:
+            stream.wait_stream(curr_stream)
+
         with torch.cuda.stream(stream), maybe_ca_context:
             # In graph mode, we have to be very careful about the collective
             # operations. The current status is: