[Bugfix] Add synchronize to prevent possible data race (#6788)
Co-authored-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
This commit is contained in:
parent
65b1f121c8
commit
95db75de64
@ -243,6 +243,13 @@ class GroupCoordinator:
|
|||||||
ca_comm = self.ca_comm
|
ca_comm = self.ca_comm
|
||||||
maybe_ca_context = nullcontext(
|
maybe_ca_context = nullcontext(
|
||||||
) if ca_comm is None else ca_comm.capture()
|
) if ca_comm is None else ca_comm.capture()
|
||||||
|
|
||||||
|
# ensure all initialization operations complete before attempting to
|
||||||
|
# capture the graph on another stream
|
||||||
|
curr_stream = torch.cuda.current_stream()
|
||||||
|
if curr_stream != stream:
|
||||||
|
stream.wait_stream(curr_stream)
|
||||||
|
|
||||||
with torch.cuda.stream(stream), maybe_ca_context:
|
with torch.cuda.stream(stream), maybe_ca_context:
|
||||||
# In graph mode, we have to be very careful about the collective
|
# In graph mode, we have to be very careful about the collective
|
||||||
# operations. The current status is:
|
# operations. The current status is:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user