| .. |
|
__init__.py
|
[CI/Build] Move test_utils.py to tests/utils.py (#4425)
|
2024-05-13 23:50:09 +09:00 |
|
allclose_default.py
|
[ROCm] Fix some kernels failed unit tests (#2498)
|
2024-02-05 14:25:36 -08:00 |
|
conftest.py
|
[Kernel] Use flashinfer for decoding (#4353)
|
2024-05-03 15:51:27 -07:00 |
|
quant_utils.py
|
[Hardware][ROCM] using current_platform.is_rocm (#9642)
|
2024-10-28 04:07:00 +00:00 |
|
test_activation.py
|
[Kernel] add kernel for FATReLU (#9610)
|
2024-10-24 16:18:27 +08:00 |
|
test_aqlm.py
|
[Kernel] Fullgraph and opcheck tests (#8479)
|
2024-09-25 08:35:52 -06:00 |
|
test_attention_selector.py
|
[Hardware][ROCM] using current_platform.is_rocm (#9642)
|
2024-10-28 04:07:00 +00:00 |
|
test_attention.py
|
[Hardware][ROCM] using current_platform.is_rocm (#9642)
|
2024-10-28 04:07:00 +00:00 |
|
test_awq_marlin.py
|
[Bugfix] Try to handle older versions of pytorch (#9086)
|
2024-10-08 14:28:12 -07:00 |
|
test_awq_triton.py
|
[CI/Build] Avoid CUDA initialization (#8534)
|
2024-09-18 10:38:11 +00:00 |
|
test_awq.py
|
[Bugfix] Try to handle older versions of pytorch (#9086)
|
2024-10-08 14:28:12 -07:00 |
|
test_blocksparse_attention.py
|
[Hardware][ROCM] using current_platform.is_rocm (#9642)
|
2024-10-28 04:07:00 +00:00 |
|
test_cache.py
|
[CI/Build] Avoid CUDA initialization (#8534)
|
2024-09-18 10:38:11 +00:00 |
|
test_causal_conv1d.py
|
[Kernel][Model] Improve continuous batching for Jamba and Mamba (#9189)
|
2024-10-16 12:12:43 -04:00 |
|
test_cutlass.py
|
[Kernel] Fullgraph and opcheck tests (#8479)
|
2024-09-25 08:35:52 -06:00 |
|
test_encoder_decoder_attn.py
|
[Hardware][ROCM] using current_platform.is_rocm (#9642)
|
2024-10-28 04:07:00 +00:00 |
|
test_flash_attn.py
|
[Kernel] Support sliding window in flash attention backend (#9403)
|
2024-10-20 10:57:52 -07:00 |
|
test_flashinfer.py
|
[CI/Build] Avoid CUDA initialization (#8534)
|
2024-09-18 10:38:11 +00:00 |
|
test_fp8_quant.py
|
[Kernel] Fullgraph and opcheck tests (#8479)
|
2024-09-25 08:35:52 -06:00 |
|
test_ggml.py
|
[Kernel] Fullgraph and opcheck tests (#8479)
|
2024-09-25 08:35:52 -06:00 |
|
test_gguf.py
|
[CI/Build] Avoid CUDA initialization (#8534)
|
2024-09-18 10:38:11 +00:00 |
|
test_gptq.py
|
[Kernel] Fullgraph and opcheck tests (#8479)
|
2024-09-25 08:35:52 -06:00 |
|
test_int8_quant.py
|
[CI/Build] Avoid CUDA initialization (#8534)
|
2024-09-18 10:38:11 +00:00 |
|
test_layernorm.py
|
[CI/Build] Avoid CUDA initialization (#8534)
|
2024-09-18 10:38:11 +00:00 |
|
test_machete_gemm.py
|
[Bugfix] Fix support for dimension like integers and ScalarType (#9299)
|
2024-10-17 19:08:34 +00:00 |
|
test_mamba_ssm.py
|
[Kernel][Model] Improve continuous batching for Jamba and Mamba (#9189)
|
2024-10-16 12:12:43 -04:00 |
|
test_marlin_gemm.py
|
[Bugfix] Fix support for dimension like integers and ScalarType (#9299)
|
2024-10-17 19:08:34 +00:00 |
|
test_moe.py
|
[Hardware][ROCM] using current_platform.is_rocm (#9642)
|
2024-10-28 04:07:00 +00:00 |
|
test_permute_cols.py
|
[Kernel] (2/N) Machete - Integrate into CompressedTensorsWNA16 and GPTQMarlin (#7701)
|
2024-09-23 13:46:26 -04:00 |
|
test_pos_encoding.py
|
[Misc] Standardize RoPE handling for Qwen2-VL (#9250)
|
2024-10-16 13:56:17 +08:00 |
|
test_prefix_prefill.py
|
[CI/Build] Avoid CUDA initialization (#8534)
|
2024-09-18 10:38:11 +00:00 |
|
test_rotary_embedding.py
|
[Kernel] Fullgraph and opcheck tests (#8479)
|
2024-09-25 08:35:52 -06:00 |
|
test_utils.py
|
[Kernel] Fullgraph and opcheck tests (#8479)
|
2024-09-25 08:35:52 -06:00 |
|
utils.py
|
[Kernel] Zero point support in fused MarlinMoE kernel + AWQ Fused MoE (#8973)
|
2024-10-04 12:34:44 -06:00 |