vllm/.buildkite/test-pipeline.yaml

# In this file, you can add more tests to run either by adding a new step or
# adding a new command to an existing step. See different options here for examples.
# This script will be feed into Jinja template in `test-template.j2` to generate
# the final pipeline yaml file.

steps:
- label: Regression Test
  mirror_hardwares: [amd]
  command: pytest -v -s test_regression.py
  working_dir: "/vllm-workspace/tests" # optional

- label: AsyncEngine Test
  #mirror_hardwares: [amd]
  command: pytest -v -s async_engine

- label: Basic Correctness Test
  mirror_hardwares: [amd]
  commands:
  - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
  - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
  - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
  - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
  - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py

- label: Core Test
  mirror_hardwares: [amd]
  command: pytest -v -s core

- label: Distributed Comm Ops Test
  #mirror_hardwares: [amd]
  command: pytest -v -s distributed/test_comm_ops.py
  working_dir: "/vllm-workspace/tests"
  num_gpus: 2

- label: Distributed Tests
  mirror_hardwares: [amd]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 2
  commands:
  - pytest -v -s distributed/test_pynccl_library.py
  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
  - TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_chunked_prefill_distributed.py
  - TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_chunked_prefill_distributed.py
  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
  - TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_chunked_prefill_distributed.py
  - TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_chunked_prefill_distributed.py
  - pytest -v -s spec_decode/e2e/test_integration_dist.py 

- label: Distributed Tests (Multiple Groups)
  #mirror_hardwares: [amd]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 4
  commands:
  - pytest -v -s distributed/test_pynccl.py

- label: Engine Test
  mirror_hardwares: [amd]
  command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py

- label: Entrypoints Test
  #mirror_hardwares: [amd]
  commands:
  # these tests have to be separated, because each one will allocate all posible GPU memory
  - pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py
  - pytest -v -s entrypoints/test_server_oot_registration.py

- label: Examples Test
  working_dir: "/vllm-workspace/examples"
  mirror_hardwares: [amd]
  commands:
    # install aws cli for llava_example.py
    # install tensorizer for tensorize_vllm_model.py
    - pip install awscli tensorizer
    - python3 offline_inference.py
    - python3 offline_inference_with_prefix.py
    - python3 llm_engine_example.py
    - python3 llava_example.py
    - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors

- label: Kernels Test %N
  #mirror_hardwares: [amd]
  command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
  parallelism: 4

- label: Models Test
  #mirror_hardwares: [amd]
  commands:
    - bash ../.buildkite/download-images.sh
    - pytest -v -s models --ignore=models/test_llava.py

- label: Llava Test
  mirror_hardwares: [amd]
  commands:
    - bash ../.buildkite/download-images.sh
    - pytest -v -s models/test_llava.py

- label: Prefix Caching Test
  mirror_hardwares: [amd]
  commands:
    - pytest -v -s prefix_caching

- label: Samplers Test
  #mirror_hardwares: [amd]
  command: pytest -v -s samplers

- label: LogitsProcessor Test
  mirror_hardwares: [amd]
  command: pytest -v -s test_logits_processor.py

- label: Worker Test
  mirror_hardwares: [amd]
  command: pytest -v -s worker

- label: Speculative decoding tests
  #mirror_hardwares: [amd]
  command: pytest -v -s spec_decode

- label: LoRA Test %N
  #mirror_hardwares: [amd]
  command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
  parallelism: 4

- label: Tensorizer Test
  #mirror_hardwares: [amd]
  command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader

- label: Metrics Test
  mirror_hardwares: [amd]
  command: pytest -v -s metrics

- label: Quantization Test
  #mirror_hardwares: [amd]
  command: pytest -v -s quantization

- label: Benchmarks
  working_dir: "/vllm-workspace/.buildkite"
  mirror_hardwares: [amd]
  commands:
  - pip install aiohttp
  - bash run-benchmarks.sh

- label: Documentation Build
  working_dir: "/vllm-workspace/test_docs/docs"
  no_gpu: True
  commands:
  - pip install -r requirements-docs.txt
  - SPHINXOPTS=\"-W\" make html
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00			`# In this file, you can add more tests to run either by adding a new step or`
			`# adding a new command to an existing step. See different options here for examples.`
			# This script will be feed into Jinja template in `test-template.j2` to generate
			`# the final pipeline yaml file.`

			`steps:`
			`- label: Regression Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`mirror_hardwares: [amd]`
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00			`command: pytest -v -s test_regression.py`
			`working_dir: "/vllm-workspace/tests" # optional`

			`- label: AsyncEngine Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`#mirror_hardwares: [amd]`
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00			`command: pytest -v -s async_engine`

[Test] Add basic correctness test (#2908) 2024-02-19 08:44:50 +08:00			`- label: Basic Correctness Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`mirror_hardwares: [amd]`
[Test] Test multiple attn backend for chunked prefill. (#4023) 2024-04-13 00:56:57 +08:00			`commands:`
			`- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py`
			`- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py`
			`- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py`
			`- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py`
[Bug fix][Core] assert num_new_tokens == 1 fails when SamplingParams.n is not 1 and max_tokens is large & Add tests for preemption (#4451) 2024-05-02 10:24:13 +08:00			`- VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py`
[Hotfix] [Debug] test_openai_server.py::test_guided_regex_completion (#3383) 2024-03-14 08:02:21 +08:00
[Tests] Add block manager and scheduler tests (#3108) 2024-03-06 10:23:34 +08:00			`- label: Core Test`
[CI/Build] AMD CI pipeline with extended set of tests. (#4267) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-05-03 03:29:07 +08:00			`mirror_hardwares: [amd]`
[Tests] Add block manager and scheduler tests (#3108) 2024-03-06 10:23:34 +08:00			`command: pytest -v -s core`
[Test] Add basic correctness test (#2908) 2024-02-19 08:44:50 +08:00
			`- label: Distributed Comm Ops Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`#mirror_hardwares: [amd]`
[CI/Build] Move `test_utils.py` to `tests/utils.py` (#4425) Since #4335 was merged, I've noticed that the definition of ServerRunner in the tests is the same as in the test for OpenAI API. I have moved the class to the test utilities to avoid code duplication. (Although it only has been repeated twice so far, I will add another similar test suite in #4200 which would duplicate the code a third time) Also, I have moved the test utilities file (test_utils.py) to under the test directory (tests/utils.py), since none of its code is actually used in the main package. Note that I have added __init__.py to each test subpackage and updated the ray.init() call in the test utilities file in order to relative import tests/utils.py. 2024-05-13 22:50:09 +08:00			`command: pytest -v -s distributed/test_comm_ops.py`
			`working_dir: "/vllm-workspace/tests"`
[Core][Distributed] enable multiple tp group (#4512) Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2024-05-02 12:28:21 +08:00			`num_gpus: 2`
[Test] Add basic correctness test (#2908) 2024-02-19 08:44:50 +08:00
[Core] remove cupy dependency (#3625) 2024-03-27 15:33:26 +08:00			`- label: Distributed Tests`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`mirror_hardwares: [amd]`
[CI/Build] Move `test_utils.py` to `tests/utils.py` (#4425) Since #4335 was merged, I've noticed that the definition of ServerRunner in the tests is the same as in the test for OpenAI API. I have moved the class to the test utilities to avoid code duplication. (Although it only has been repeated twice so far, I will add another similar test suite in #4200 which would duplicate the code a third time) Also, I have moved the test utilities file (test_utils.py) to under the test directory (tests/utils.py), since none of its code is actually used in the main package. Note that I have added __init__.py to each test subpackage and updated the ray.init() call in the test utilities file in order to relative import tests/utils.py. 2024-05-13 22:50:09 +08:00			`working_dir: "/vllm-workspace/tests"`
			`num_gpus: 2`
[Core] remove cupy dependency (#3625) 2024-03-27 15:33:26 +08:00			`commands:`
[CI/Build] Move `test_utils.py` to `tests/utils.py` (#4425) Since #4335 was merged, I've noticed that the definition of ServerRunner in the tests is the same as in the test for OpenAI API. I have moved the class to the test utilities to avoid code duplication. (Although it only has been repeated twice so far, I will add another similar test suite in #4200 which would duplicate the code a third time) Also, I have moved the test utilities file (test_utils.py) to under the test directory (tests/utils.py), since none of its code is actually used in the main package. Note that I have added __init__.py to each test subpackage and updated the ray.init() call in the test utilities file in order to relative import tests/utils.py. 2024-05-13 22:50:09 +08:00			`- pytest -v -s distributed/test_pynccl_library.py`
[Core] Add MultiprocessingGPUExecutor (#4539) Co-authored-by: SAHIL SUNEJA <suneja@us.ibm.com> 2024-05-15 01:38:59 +08:00			`- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py`
			`- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py`
			`- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_chunked_prefill_distributed.py`
			`- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_chunked_prefill_distributed.py`
			`- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py`
			`- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py`
			`- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_chunked_prefill_distributed.py`
			`- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_chunked_prefill_distributed.py`
[Speculative decoding][Re-take] Enable TP>1 speculative decoding (#4840) Co-authored-by: Cade Daniel <edacih@gmail.com> Co-authored-by: Cade Daniel <cade@anyscale.com> 2024-05-16 15:53:51 +08:00			`- pytest -v -s spec_decode/e2e/test_integration_dist.py`
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00
[Core][Distributed] enable multiple tp group (#4512) Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2024-05-02 12:28:21 +08:00			`- label: Distributed Tests (Multiple Groups)`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`#mirror_hardwares: [amd]`
[CI/Build] Move `test_utils.py` to `tests/utils.py` (#4425) Since #4335 was merged, I've noticed that the definition of ServerRunner in the tests is the same as in the test for OpenAI API. I have moved the class to the test utilities to avoid code duplication. (Although it only has been repeated twice so far, I will add another similar test suite in #4200 which would duplicate the code a third time) Also, I have moved the test utilities file (test_utils.py) to under the test directory (tests/utils.py), since none of its code is actually used in the main package. Note that I have added __init__.py to each test subpackage and updated the ray.init() call in the test utilities file in order to relative import tests/utils.py. 2024-05-13 22:50:09 +08:00			`working_dir: "/vllm-workspace/tests"`
[Core][Distributed] enable multiple tp group (#4512) Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2024-05-02 12:28:21 +08:00			`num_gpus: 4`
			`commands:`
[CI/Build] Move `test_utils.py` to `tests/utils.py` (#4425) Since #4335 was merged, I've noticed that the definition of ServerRunner in the tests is the same as in the test for OpenAI API. I have moved the class to the test utilities to avoid code duplication. (Although it only has been repeated twice so far, I will add another similar test suite in #4200 which would duplicate the code a third time) Also, I have moved the test utilities file (test_utils.py) to under the test directory (tests/utils.py), since none of its code is actually used in the main package. Note that I have added __init__.py to each test subpackage and updated the ray.init() call in the test utilities file in order to relative import tests/utils.py. 2024-05-13 22:50:09 +08:00			`- pytest -v -s distributed/test_pynccl.py`
[Core][Distributed] enable multiple tp group (#4512) Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2024-05-02 12:28:21 +08:00
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00			`- label: Engine Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`mirror_hardwares: [amd]`
[Core] add an option to log every function call to for debugging hang/crash in distributed inference (#4079) Co-authored-by: Simon Mo <simon.mo@hey.com> 2024-04-19 07:15:12 +08:00			`command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py`
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00
OpenAI Server refactoring (#2360) 2024-01-17 13:33:14 +08:00			`- label: Entrypoints Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`#mirror_hardwares: [amd]`
[Core] enable out-of-tree model register (#3871) 2024-04-07 08:11:41 +08:00			`commands:`
			`# these tests have to be separated, because each one will allocate all posible GPU memory`
			`- pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py`
			`- pytest -v -s entrypoints/test_server_oot_registration.py`
OpenAI Server refactoring (#2360) 2024-01-17 13:33:14 +08:00
[CI] Add test case to run examples scripts (#3638) 2024-03-29 05:36:10 +08:00			`- label: Examples Test`
			`working_dir: "/vllm-workspace/examples"`
[CI/Build] AMD CI pipeline with extended set of tests. (#4267) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-05-03 03:29:07 +08:00			`mirror_hardwares: [amd]`
[CI] Add test case to run examples scripts (#3638) 2024-03-29 05:36:10 +08:00			`commands:`
			`# install aws cli for llava_example.py`
[Frontend] [Core] perf: Automatically detect vLLM-tensorized model, update `tensorizer` to version 2.9.0 (#4208) 2024-05-14 05:57:07 +08:00			`# install tensorizer for tensorize_vllm_model.py`
			`- pip install awscli tensorizer`
[CI] Add test case to run examples scripts (#3638) 2024-03-29 05:36:10 +08:00			`- python3 offline_inference.py`
			`- python3 offline_inference_with_prefix.py`
			`- python3 llm_engine_example.py`
			`- python3 llava_example.py`
[Frontend] [Core] perf: Automatically detect vLLM-tensorized model, update `tensorizer` to version 2.9.0 (#4208) 2024-05-14 05:57:07 +08:00			`- python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors`
[CI] Add test case to run examples scripts (#3638) 2024-03-29 05:36:10 +08:00
[CI] Shard tests for LoRA and Kernels to speed up (#3445) 2024-03-18 05:56:30 +08:00			`- label: Kernels Test %N`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`#mirror_hardwares: [amd]`
[CI] Shard tests for LoRA and Kernels to speed up (#3445) 2024-03-18 05:56:30 +08:00			`command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT`
			`parallelism: 4`
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00
			`- label: Models Test`
[Build/CI] Fixing 'docker run' to re-enable AMD CI tests. (#4642) 2024-05-08 00:23:17 +08:00			`#mirror_hardwares: [amd]`
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00			`commands:`
[Feature] Add vision language model support. (#3042) 2024-03-26 05:16:30 +08:00			`- bash ../.buildkite/download-images.sh`
[CI] Make mistral tests pass (#4596) 2024-05-08 23:44:35 +08:00			`- pytest -v -s models --ignore=models/test_llava.py`
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00
[Feature] Add vision language model support. (#3042) 2024-03-26 05:16:30 +08:00			`- label: Llava Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`mirror_hardwares: [amd]`
[Feature] Add vision language model support. (#3042) 2024-03-26 05:16:30 +08:00			`commands:`
			`- bash ../.buildkite/download-images.sh`
			`- pytest -v -s models/test_llava.py`

[Experimental] Prefix Caching Support (#1669) Co-authored-by: DouHappy <2278958187@qq.com> Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2024-01-18 08:32:10 +08:00			`- label: Prefix Caching Test`
[CI/Build] AMD CI pipeline with extended set of tests. (#4267) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-05-03 03:29:07 +08:00			`mirror_hardwares: [amd]`
[Experimental] Prefix Caching Support (#1669) Co-authored-by: DouHappy <2278958187@qq.com> Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> 2024-01-18 08:32:10 +08:00			`commands:`
			`- pytest -v -s prefix_caching`

[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00			`- label: Samplers Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`#mirror_hardwares: [amd]`
[1/n][Chunked Prefill] Refactor input query shapes (#3236) 2024-03-21 05:46:05 +08:00			`command: pytest -v -s samplers`
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00
Migrate `logits` computation and gather to `model_runner` (#3233) 2024-03-21 07:25:01 +08:00			`- label: LogitsProcessor Test`
[CI/Build] AMD CI pipeline with extended set of tests. (#4267) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-05-03 03:29:07 +08:00			`mirror_hardwares: [amd]`
Migrate `logits` computation and gather to `model_runner` (#3233) 2024-03-21 07:25:01 +08:00			`command: pytest -v -s test_logits_processor.py`

[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00			`- label: Worker Test`
[CI/Build] AMD CI pipeline with extended set of tests. (#4267) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-05-03 03:29:07 +08:00			`mirror_hardwares: [amd]`
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00			`command: pytest -v -s worker`

[Speculative decoding 3/9] Worker which speculates, scores, and applies rejection sampling (#3103) 2024-03-09 15:32:46 +08:00			`- label: Speculative decoding tests`
[Build/CI] Fixing 'docker run' to re-enable AMD CI tests. (#4642) 2024-05-08 00:23:17 +08:00			`#mirror_hardwares: [amd]`
[Speculative decoding 3/9] Worker which speculates, scores, and applies rejection sampling (#3103) 2024-03-09 15:32:46 +08:00			`command: pytest -v -s spec_decode`

[CI] Shard tests for LoRA and Kernels to speed up (#3445) 2024-03-18 05:56:30 +08:00			`- label: LoRA Test %N`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`#mirror_hardwares: [amd]`
[1/n][Chunked Prefill] Refactor input query shapes (#3236) 2024-03-21 05:46:05 +08:00			`command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT`
[CI] Shard tests for LoRA and Kernels to speed up (#3445) 2024-03-18 05:56:30 +08:00			`parallelism: 4`
[Experimental] Add multi-LoRA support (#1804) Co-authored-by: Chen Shen <scv119@gmail.com> Co-authored-by: Shreyas Krishnaswamy <shrekris@anyscale.com> Co-authored-by: Avnish Narayan <avnish@anyscale.com> 2024-01-24 07:26:37 +08:00
[Frontend] [Core] feat: Add model loading using `tensorizer` (#3476) 2024-04-14 08:13:01 +08:00			`- label: Tensorizer Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`#mirror_hardwares: [amd]`
[Core] Refactor model loading code (#4097) 2024-04-17 02:34:39 +08:00			`command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader`
[Frontend] [Core] feat: Add model loading using `tensorizer` (#3476) 2024-04-14 08:13:01 +08:00
Include tokens from prompt phase in `counter_generation_tokens` (#2802) 2024-02-23 06:00:12 +08:00			`- label: Metrics Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`mirror_hardwares: [amd]`
Include tokens from prompt phase in `counter_generation_tokens` (#2802) 2024-02-23 06:00:12 +08:00			`command: pytest -v -s metrics`

[BUG] fixed fp8 conflict with aqlm (#4307) Fixes fp8 iterface which broke in AQLM merge. 2024-04-24 09:26:33 +08:00			`- label: Quantization Test`
[Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) 2024-05-17 11:58:25 +08:00			`#mirror_hardwares: [amd]`
[BUG] fixed fp8 conflict with aqlm (#4307) Fixes fp8 iterface which broke in AQLM merge. 2024-04-24 09:26:33 +08:00			`command: pytest -v -s quantization`

[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00			`- label: Benchmarks`
			`working_dir: "/vllm-workspace/.buildkite"`
[CI/Build] AMD CI pipeline with extended set of tests. (#4267) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-05-03 03:29:07 +08:00			`mirror_hardwares: [amd]`
[CI] Add Buildkite (#2355) 2024-01-15 04:37:58 +08:00			`commands:`
			`- pip install aiohttp`
			`- bash run-benchmarks.sh`
[CI] Ensure documentation build is checked in CI (#2842) 2024-02-13 14:53:07 +08:00
			`- label: Documentation Build`
[CI/Build] refactor dockerfile & fix pip cache [CI/Build] fix pip cache with vllm_nccl & refactor dockerfile to build wheels (#3859) 2024-04-05 12:53:16 +08:00			`working_dir: "/vllm-workspace/test_docs/docs"`
[CI] Ensure documentation build is checked in CI (#2842) 2024-02-13 14:53:07 +08:00			`no_gpu: True`
			`commands:`
			`- pip install -r requirements-docs.txt`
			`- SPHINXOPTS=\"-W\" make html`