| .. |
|
__init__.py
|
[CI/Build] Move test_utils.py to tests/utils.py (#4425)
|
2024-05-13 23:50:09 +09:00 |
|
allclose_default.py
|
[ROCm] Fix some kernels failed unit tests (#2498)
|
2024-02-05 14:25:36 -08:00 |
|
conftest.py
|
[Kernel] Use flashinfer for decoding (#4353)
|
2024-05-03 15:51:27 -07:00 |
|
quant_utils.py
|
[Feature][Hardware][Amd] Add fp8 Linear Layer for Rocm (#7210)
|
2024-08-16 10:06:30 -07:00 |
|
test_activation.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
test_attention_selector.py
|
[Core] Subclass ModelRunner to support cross-attention & encoder sequences (towards eventual encoder/decoder model support) (#4942)
|
2024-08-06 16:51:47 -04:00 |
|
test_attention.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
test_blocksparse_attention.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
test_cache.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
test_cutlass.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
test_encoder_decoder_attn.py
|
[Core] Subclass ModelRunner to support cross-attention & encoder sequences (towards eventual encoder/decoder model support) (#4942)
|
2024-08-06 16:51:47 -04:00 |
|
test_flash_attn.py
|
register custom op for flash attn and use from torch.ops (#7536)
|
2024-08-15 22:38:56 -07:00 |
|
test_flashinfer.py
|
[BUG] fix crash on flashinfer backend with cudagraph disabled, when attention group_size not in [1,2,4,8] (#7509)
|
2024-08-21 08:54:31 -07:00 |
|
test_fp8_quant.py
|
[Feature][Hardware][Amd] Add fp8 Linear Layer for Rocm (#7210)
|
2024-08-16 10:06:30 -07:00 |
|
test_int8_quant.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
test_layernorm.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
test_machete_gemm.py
|
[Kernel] (1/N) Machete - Hopper Optimized Mixed Precision Linear Kernel (#7174)
|
2024-08-20 07:09:33 -06:00 |
|
test_marlin_gemm.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
test_moe.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
test_pos_encoding.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
test_prefix_prefill.py
|
[Core/Bugfix] Add FP8 K/V Scale and dtype conversion for prefix/prefill Triton Kernel (#7208)
|
2024-08-12 22:47:41 +00:00 |
|
test_rand.py
|
[CI] Try introducing isort. (#3495)
|
2024-03-25 07:59:47 -07:00 |
|
test_sampler.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |
|
utils.py
|
[Misc/Testing] Use torch.testing.assert_close (#7324)
|
2024-08-16 04:24:04 +00:00 |