From 4695397dcfef693a0a10f1eb8bf77ea905c54829 Mon Sep 17 00:00:00 2001 From: Ricky Xu Date: Mon, 15 Apr 2024 14:24:45 -0700 Subject: [PATCH] [Bugfix] Fix ray workers profiling with nsight (#4095) --- vllm/executor/ray_gpu_executor.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py index 5db2f3f6..7aca5e36 100644 --- a/vllm/executor/ray_gpu_executor.py +++ b/vllm/executor/ray_gpu_executor.py @@ -48,6 +48,21 @@ class RayGPUExecutor(ExecutorBase): if USE_RAY_COMPILED_DAG: self.forward_dag = self._compiled_ray_dag() + def _configure_ray_workers_use_nsight(self, + ray_remote_kwargs) -> Dict[str, Any]: + # If nsight profiling is enabled, we need to set the profiling + # configuration for the ray workers as runtime env. + runtime_env = ray_remote_kwargs.setdefault("runtime_env", {}) + runtime_env.update({ + "nsight": { + "t": "cuda,cudnn,cublas", + "o": "'worker_process_%p'", + "cuda-graph-trace": "node", + } + }) + + return ray_remote_kwargs + def _init_workers_ray(self, placement_group: "PlacementGroup", **ray_remote_kwargs): if self.parallel_config.tensor_parallel_size == 1: @@ -63,6 +78,10 @@ class RayGPUExecutor(ExecutorBase): # The remaining workers are the actual ray actors. self.workers: List[RayWorkerVllm] = [] + if self.parallel_config.ray_workers_use_nsight: + ray_remote_kwargs = self._configure_ray_workers_use_nsight( + ray_remote_kwargs) + # Create the workers. driver_ip = get_ip() for bundle_id, bundle in enumerate(placement_group.bundle_specs):