diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c4fc43dc..8c98aa36 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -310,13 +310,22 @@ steps: - pytest -v -s models/test_oot_registration.py # it needs a clean process - pytest -v -s models/*.py --ignore=models/test_oot_registration.py -- label: Decoder-only Language Models Test # 1h36min +- label: Decoder-only Language Models Test (Standard) # 35min #mirror_hardwares: [amd] source_file_dependencies: - vllm/ - tests/models/decoder_only/language commands: - - pytest -v -s models/decoder_only/language + - pytest -v -s models/decoder_only/language/test_models.py + - pytest -v -s models/decoder_only/language/test_big_models.py + +- label: Decoder-only Language Models Test (Extended) # 1h20min + nightly: true + source_file_dependencies: + - vllm/ + - tests/models/decoder_only/language + commands: + - pytest -v -s models/decoder_only/language --ignore=models/decoder_only/language/test_models.py --ignore=models/decoder_only/language/test_big_models.py - label: Decoder-only Multi-Modal Models Test # 1h31min #mirror_hardwares: [amd] diff --git a/tests/models/decoder_only/language/test_big_models.py b/tests/models/decoder_only/language/test_big_models.py index fcc15863..75625b35 100644 --- a/tests/models/decoder_only/language/test_big_models.py +++ b/tests/models/decoder_only/language/test_big_models.py @@ -21,10 +21,14 @@ MODELS = [ ] if not current_platform.is_cpu(): - # MiniCPM requires fused_moe which is not supported by CPU - MODELS.append("openbmb/MiniCPM3-4B") + MODELS += [ + # fused_moe which not supported on CPU + "openbmb/MiniCPM3-4B", + # Head size isn't supported on CPU + "h2oai/h2o-danube3-4b-base", + ] -#TODO: remove this after CPU float16 support ready +# TODO: remove this after CPU float16 support ready target_dtype = "float" if current_platform.is_cpu() else "half" diff --git a/tests/models/decoder_only/language/test_danube3_4b.py b/tests/models/decoder_only/language/test_danube3_4b.py deleted file mode 100644 index bdd498ed..00000000 --- a/tests/models/decoder_only/language/test_danube3_4b.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Compare the outputs of HF and vLLM when using greedy sampling. - -This tests danube3 separately because its head size isn't supported on CPU yet. - -Run `pytest tests/models/test_danube3_4b.py`. -""" -import pytest - -from ...utils import check_outputs_equal - -MODELS = ["h2oai/h2o-danube3-4b-base"] - -target_dtype = "half" - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", [target_dtype]) -@pytest.mark.parametrize("max_tokens", [32]) -def test_models( - hf_runner, - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - with hf_runner(model, dtype=dtype) as hf_model: - hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens) - - with vllm_runner(model, dtype=dtype) as vllm_model: - vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) - - check_outputs_equal( - outputs_0_lst=hf_outputs, - outputs_1_lst=vllm_outputs, - name_0="hf", - name_1="vllm", - ) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", [target_dtype]) -def test_model_print( - vllm_runner, - model: str, - dtype: str, -) -> None: - with vllm_runner(model, dtype=dtype) as vllm_model: - # This test is for verifying whether the model's extra_repr - # can be printed correctly. - print(vllm_model.model.llm_engine.model_executor.driver_worker. - model_runner.model)