[Bugfix] set OMP_NUM_THREADS to 1 by default for multiprocessing (#6109)

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>
Co-authored-by: Nick Hill <nickhill@us.ibm.com>
This commit is contained in:
Travis Johnson 2024-07-03 17:56:59 -06:00 committed by GitHub
parent 3de6e6a30e
commit 1dab9bc8a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -37,6 +37,11 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
# Disable torch async compiling which won't work with daemonic processes
os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
# Set OMP_NUM_THREADS to 1 if it is not set explicitly, avoids CPU
# contention amongst the shards
if "OMP_NUM_THREADS" not in os.environ:
os.environ["OMP_NUM_THREADS"] = "1"
assert world_size <= cuda_device_count_stateless(), (
"please set tensor_parallel_size to less than max local gpu count")