vllm/kernels at 36eecfbddb9ac2c491174c86b28ee83c4773eb5e - vllm

youkaichao 9aaf14c62e [misc] add forward context for attention (#9029 )	2024-10-03 12:09:42 -07:00
..
__init__.py	[CI/Build] Move `test_utils.py` to `tests/utils.py` (#4425 )	2024-05-13 23:50:09 +09:00
allclose_default.py	[ROCm] Fix some kernels failed unit tests (#2498 )	2024-02-05 14:25:36 -08:00
conftest.py	[Kernel] Use flashinfer for decoding (#4353 )	2024-05-03 15:51:27 -07:00
quant_utils.py	[Feature][Hardware][Amd] Add fp8 Linear Layer for Rocm (#7210 )	2024-08-16 10:06:30 -07:00
test_activation.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_aqlm.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00
test_attention_selector.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_attention.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00
test_awq_triton.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_awq.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00
test_blocksparse_attention.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_cache.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_causal_conv1d.py	[Kernel][Model] Varlen prefill + Prefill chunking support for mamba kernels and Jamba model (#8533 )	2024-09-29 17:35:58 -04:00
test_cutlass.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00
test_encoder_decoder_attn.py	[torch.compile] use empty tensor instead of None for profiling (#8875 )	2024-09-27 08:11:32 -07:00
test_flash_attn.py	[misc] add forward context for attention (#9029 )	2024-10-03 12:09:42 -07:00
test_flashinfer.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_fp8_quant.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00
test_ggml.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00
test_gguf.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_gptq.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00
test_int8_quant.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_layernorm.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_machete_gemm.py	[Kernel] (2/N) Machete - Integrate into CompressedTensorsWNA16 and GPTQMarlin (#7701 )	2024-09-23 13:46:26 -04:00
test_mamba_ssm.py	[Kernel][Model] Varlen prefill + Prefill chunking support for mamba kernels and Jamba model (#8533 )	2024-09-29 17:35:58 -04:00
test_marlin_gemm.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00
test_moe.py	[Bugfix] Fix Marlin MoE act order when is_k_full == False (#8741 )	2024-09-28 18:19:40 -07:00
test_permute_cols.py	[Kernel] (2/N) Machete - Integrate into CompressedTensorsWNA16 and GPTQMarlin (#7701 )	2024-09-23 13:46:26 -04:00
test_pos_encoding.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_prefix_prefill.py	[CI/Build] Avoid CUDA initialization (#8534 )	2024-09-18 10:38:11 +00:00
test_rotary_embedding.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00
test_utils.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00
utils.py	[Kernel] Fullgraph and opcheck tests (#8479 )	2024-09-25 08:35:52 -06:00

__init__.py

[CI/Build] Move test_utils.py to tests/utils.py (#4425 )

2024-05-13 23:50:09 +09:00

allclose_default.py

[ROCm] Fix some kernels failed unit tests (#2498 )

2024-02-05 14:25:36 -08:00

conftest.py

[Kernel] Use flashinfer for decoding (#4353 )

2024-05-03 15:51:27 -07:00

quant_utils.py

[Feature][Hardware][Amd] Add fp8 Linear Layer for Rocm (#7210 )

2024-08-16 10:06:30 -07:00

test_activation.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_aqlm.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00

test_attention_selector.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_attention.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00

test_awq_triton.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_awq.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00

test_blocksparse_attention.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_cache.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_causal_conv1d.py

[Kernel][Model] Varlen prefill + Prefill chunking support for mamba kernels and Jamba model (#8533 )

2024-09-29 17:35:58 -04:00

test_cutlass.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00

test_encoder_decoder_attn.py

[torch.compile] use empty tensor instead of None for profiling (#8875 )

2024-09-27 08:11:32 -07:00

test_flash_attn.py

[misc] add forward context for attention (#9029 )

2024-10-03 12:09:42 -07:00

test_flashinfer.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_fp8_quant.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00

test_ggml.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00

test_gguf.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_gptq.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00

test_int8_quant.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_layernorm.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_machete_gemm.py

[Kernel] (2/N) Machete - Integrate into CompressedTensorsWNA16 and GPTQMarlin (#7701 )

2024-09-23 13:46:26 -04:00

test_mamba_ssm.py

[Kernel][Model] Varlen prefill + Prefill chunking support for mamba kernels and Jamba model (#8533 )

2024-09-29 17:35:58 -04:00

test_marlin_gemm.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00

test_moe.py

[Bugfix] Fix Marlin MoE act order when is_k_full == False (#8741 )

2024-09-28 18:19:40 -07:00

test_permute_cols.py

[Kernel] (2/N) Machete - Integrate into CompressedTensorsWNA16 and GPTQMarlin (#7701 )

2024-09-23 13:46:26 -04:00

test_pos_encoding.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_prefix_prefill.py

[CI/Build] Avoid CUDA initialization (#8534 )

2024-09-18 10:38:11 +00:00

test_rotary_embedding.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00

test_utils.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00

utils.py

[Kernel] Fullgraph and opcheck tests (#8479 )

2024-09-25 08:35:52 -06:00