[Build] Temporarily Disable Kernels and LoRA tests (#6961)

This commit is contained in:
Simon Mo 2024-07-30 14:59:48 -07:00 committed by GitHub
parent 6ca8031e71
commit 40c27a7cbb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -155,12 +155,12 @@ steps:
- pytest -v -s test_inputs.py - pytest -v -s test_inputs.py
- pytest -v -s multimodal - pytest -v -s multimodal
- label: Kernels Test %N # - label: Kernels Test %N
#mirror_hardwares: [amd] # #mirror_hardwares: [amd]
commands: # commands:
- pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl # - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl
- pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT # - pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
parallelism: 4 # parallelism: 4
- label: Models Test - label: Models Test
#mirror_hardwares: [amd] #mirror_hardwares: [amd]
@ -202,20 +202,20 @@ steps:
- export VLLM_ATTENTION_BACKEND=XFORMERS - export VLLM_ATTENTION_BACKEND=XFORMERS
- pytest -v -s spec_decode - pytest -v -s spec_decode
- label: LoRA Test %N # - label: LoRA Test %N
#mirror_hardwares: [amd] # #mirror_hardwares: [amd]
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py # command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py
parallelism: 4 # parallelism: 4
- label: LoRA Long Context (Distributed) # - label: LoRA Long Context (Distributed)
#mirror_hardwares: [amd] # #mirror_hardwares: [amd]
num_gpus: 4 # num_gpus: 4
# This test runs llama 13B, so it is required to run on 4 GPUs. # # This test runs llama 13B, so it is required to run on 4 GPUs.
commands: # commands:
# FIXIT: find out which code initialize cuda before running the test # # FIXIT: find out which code initialize cuda before running the test
# before the fix, we need to use spawn to test it # # before the fix, we need to use spawn to test it
- export VLLM_WORKER_MULTIPROC_METHOD=spawn # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s -x lora/test_long_context.py # - pytest -v -s -x lora/test_long_context.py
- label: Tensorizer Test - label: Tensorizer Test
#mirror_hardwares: [amd] #mirror_hardwares: [amd]