diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index ade0d2f7..7ab9c205 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -314,11 +314,11 @@ steps: num_gpus: 4 source_file_dependencies: - vllm/ - - tests/distributed/test_pipeline_parallel - tests/distributed/test_pp_cudagraph.py + - tests/distributed/test_pipeline_parallel commands: - - pytest -v -s distributed/test_pipeline_parallel.py - pytest -v -s distributed/test_pp_cudagraph.py + - pytest -v -s distributed/test_pipeline_parallel.py - label: LoRA Long Context (Distributed) # 11min # This test runs llama 13B, so it is required to run on 4 GPUs. diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index 4a339bc3..4d54e43d 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -80,6 +80,10 @@ def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, MODEL_NAME, "VLLM_USE_RAY_SPMD_WORKER": "1", "VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL": "1", } + # Temporary. Currently when zeromq + SPMD is used, it does not properly + # terminate because of aDAG issue. + pp_args.append("--disable-frontend-multiprocessing") + tp_args.append("--disable-frontend-multiprocessing") try: compare_two_settings(MODEL_NAME, pp_args, tp_args, pp_env) diff --git a/tests/utils.py b/tests/utils.py index c20a6d9e..beac8cfd 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -384,6 +384,7 @@ def fork_new_process_for_each_test( os.setpgrp() from _pytest.outcomes import Skipped pid = os.fork() + print(f"Fork a new process to run a test {pid}") if pid == 0: try: f(*args, **kwargs) @@ -401,11 +402,11 @@ def fork_new_process_for_each_test( pgid = os.getpgid(pid) _pid, _exitcode = os.waitpid(pid, 0) # ignore SIGTERM signal itself - old_singla_handler = signal.signal(signal.SIGTERM, signal.SIG_IGN) + old_signal_handler = signal.signal(signal.SIGTERM, signal.SIG_IGN) # kill all child processes os.killpg(pgid, signal.SIGTERM) # restore the signal handler - signal.signal(signal.SIGTERM, old_singla_handler) + signal.signal(signal.SIGTERM, old_signal_handler) assert _exitcode == 0, (f"function {f} failed when called with" f" args {args} and kwargs {kwargs}")