2024-01-15 04:37:58 +08:00
|
|
|
# In this file, you can add more tests to run either by adding a new step or
|
|
|
|
|
# adding a new command to an existing step. See different options here for examples.
|
|
|
|
|
# This script will be feed into Jinja template in `test-template.j2` to generate
|
|
|
|
|
# the final pipeline yaml file.
|
|
|
|
|
|
|
|
|
|
steps:
|
|
|
|
|
- label: Regression Test
|
|
|
|
|
command: pytest -v -s test_regression.py
|
|
|
|
|
working_dir: "/vllm-workspace/tests" # optional
|
|
|
|
|
|
|
|
|
|
- label: AsyncEngine Test
|
|
|
|
|
command: pytest -v -s async_engine
|
|
|
|
|
|
2024-02-19 08:44:50 +08:00
|
|
|
- label: Basic Correctness Test
|
2024-04-13 00:56:57 +08:00
|
|
|
commands:
|
|
|
|
|
- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
|
|
|
|
|
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
|
|
|
|
|
- VLLM_ATTENTION_BACKEND=ROCM_FLASH pytest -v -s basic_correctness/test_basic_correctness.py
|
|
|
|
|
- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
|
|
|
|
|
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
|
|
|
|
|
- VLLM_ATTENTION_BACKEND=ROCM_FLASH pytest -v -s basic_correctness/test_chunked_prefill.py
|
2024-03-14 08:02:21 +08:00
|
|
|
|
2024-03-06 10:23:34 +08:00
|
|
|
- label: Core Test
|
|
|
|
|
command: pytest -v -s core
|
2024-02-19 08:44:50 +08:00
|
|
|
|
|
|
|
|
- label: Distributed Comm Ops Test
|
2024-03-29 12:06:40 +08:00
|
|
|
command: pytest -v -s test_comm_ops.py
|
2024-02-19 08:44:50 +08:00
|
|
|
working_dir: "/vllm-workspace/tests/distributed"
|
|
|
|
|
num_gpus: 2 # only support 1 or 2 for now.
|
|
|
|
|
|
2024-03-27 15:33:26 +08:00
|
|
|
- label: Distributed Tests
|
2024-01-15 04:37:58 +08:00
|
|
|
working_dir: "/vllm-workspace/tests/distributed"
|
|
|
|
|
num_gpus: 2 # only support 1 or 2 for now.
|
2024-03-27 15:33:26 +08:00
|
|
|
commands:
|
2024-03-29 12:06:40 +08:00
|
|
|
- pytest -v -s test_pynccl.py
|
2024-04-18 13:28:52 +08:00
|
|
|
- pytest -v -s test_pynccl_library.py
|
2024-03-29 12:06:40 +08:00
|
|
|
- TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_basic_distributed_correctness.py
|
|
|
|
|
- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf pytest -v -s test_basic_distributed_correctness.py
|
2024-04-11 08:56:48 +08:00
|
|
|
- TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_chunked_prefill_distributed.py
|
|
|
|
|
- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf pytest -v -s test_chunked_prefill_distributed.py
|
2024-01-15 04:37:58 +08:00
|
|
|
|
|
|
|
|
- label: Engine Test
|
2024-03-19 03:48:45 +08:00
|
|
|
command: pytest -v -s engine tokenization test_sequence.py test_config.py
|
2024-01-15 04:37:58 +08:00
|
|
|
|
2024-01-17 13:33:14 +08:00
|
|
|
- label: Entrypoints Test
|
2024-04-07 08:11:41 +08:00
|
|
|
commands:
|
|
|
|
|
# these tests have to be separated, because each one will allocate all posible GPU memory
|
|
|
|
|
- pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py
|
|
|
|
|
- pytest -v -s entrypoints/test_server_oot_registration.py
|
2024-01-17 13:33:14 +08:00
|
|
|
|
2024-03-29 05:36:10 +08:00
|
|
|
- label: Examples Test
|
|
|
|
|
working_dir: "/vllm-workspace/examples"
|
|
|
|
|
commands:
|
|
|
|
|
# install aws cli for llava_example.py
|
|
|
|
|
- pip install awscli
|
|
|
|
|
- python3 offline_inference.py
|
|
|
|
|
- python3 offline_inference_with_prefix.py
|
|
|
|
|
- python3 llm_engine_example.py
|
|
|
|
|
- python3 llava_example.py
|
|
|
|
|
|
2024-03-18 05:56:30 +08:00
|
|
|
- label: Kernels Test %N
|
|
|
|
|
command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
|
|
|
|
|
parallelism: 4
|
2024-01-15 04:37:58 +08:00
|
|
|
|
|
|
|
|
- label: Models Test
|
|
|
|
|
commands:
|
2024-03-26 05:16:30 +08:00
|
|
|
- bash ../.buildkite/download-images.sh
|
2024-03-29 12:06:40 +08:00
|
|
|
- pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py
|
2024-01-15 04:37:58 +08:00
|
|
|
|
2024-03-26 05:16:30 +08:00
|
|
|
- label: Llava Test
|
|
|
|
|
commands:
|
|
|
|
|
- bash ../.buildkite/download-images.sh
|
|
|
|
|
- pytest -v -s models/test_llava.py
|
|
|
|
|
|
2024-01-18 08:32:10 +08:00
|
|
|
- label: Prefix Caching Test
|
|
|
|
|
commands:
|
|
|
|
|
- pytest -v -s prefix_caching
|
|
|
|
|
|
2024-01-15 04:37:58 +08:00
|
|
|
- label: Samplers Test
|
2024-03-21 05:46:05 +08:00
|
|
|
command: pytest -v -s samplers
|
2024-01-15 04:37:58 +08:00
|
|
|
|
2024-03-21 07:25:01 +08:00
|
|
|
- label: LogitsProcessor Test
|
|
|
|
|
command: pytest -v -s test_logits_processor.py
|
|
|
|
|
|
2024-01-15 04:37:58 +08:00
|
|
|
- label: Worker Test
|
|
|
|
|
command: pytest -v -s worker
|
|
|
|
|
|
2024-03-09 15:32:46 +08:00
|
|
|
- label: Speculative decoding tests
|
|
|
|
|
command: pytest -v -s spec_decode
|
|
|
|
|
|
2024-03-18 05:56:30 +08:00
|
|
|
- label: LoRA Test %N
|
2024-03-21 05:46:05 +08:00
|
|
|
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
|
2024-03-18 05:56:30 +08:00
|
|
|
parallelism: 4
|
2024-01-24 07:26:37 +08:00
|
|
|
|
2024-04-14 08:13:01 +08:00
|
|
|
- label: Tensorizer Test
|
2024-04-17 02:34:39 +08:00
|
|
|
command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader
|
2024-04-14 08:13:01 +08:00
|
|
|
|
2024-02-23 06:00:12 +08:00
|
|
|
- label: Metrics Test
|
|
|
|
|
command: pytest -v -s metrics
|
|
|
|
|
|
2024-01-15 04:37:58 +08:00
|
|
|
- label: Benchmarks
|
|
|
|
|
working_dir: "/vllm-workspace/.buildkite"
|
|
|
|
|
commands:
|
|
|
|
|
- pip install aiohttp
|
|
|
|
|
- bash run-benchmarks.sh
|
2024-02-13 14:53:07 +08:00
|
|
|
|
|
|
|
|
- label: Documentation Build
|
2024-04-05 12:53:16 +08:00
|
|
|
working_dir: "/vllm-workspace/test_docs/docs"
|
2024-02-13 14:53:07 +08:00
|
|
|
no_gpu: True
|
|
|
|
|
commands:
|
|
|
|
|
- pip install -r requirements-docs.txt
|
|
|
|
|
- SPHINXOPTS=\"-W\" make html
|