| .. |
|
__init__.py
|
[CI/Build] Move test_utils.py to tests/utils.py (#4425)
|
2024-05-13 23:50:09 +09:00 |
|
allclose_default.py
|
[ROCm] Fix some kernels failed unit tests (#2498)
|
2024-02-05 14:25:36 -08:00 |
|
conftest.py
|
[Kernel] Use flashinfer for decoding (#4353)
|
2024-05-03 15:51:27 -07:00 |
|
quant_utils.py
|
[ Kernel ] FP8 Dynamic Per Token Quant - Add scale_ub (#6593)
|
2024-07-19 18:15:26 -07:00 |
|
test_activation.py
|
[Misc] Add CustomOp interface for device portability (#5255)
|
2024-06-05 09:18:19 -07:00 |
|
test_attention_selector.py
|
[Core] Subclass ModelRunner to support cross-attention & encoder sequences (towards eventual encoder/decoder model support) (#4942)
|
2024-08-06 16:51:47 -04:00 |
|
test_attention.py
|
[Model] H2O Danube3-4b (#6451)
|
2024-07-26 20:47:50 -07:00 |
|
test_blocksparse_attention.py
|
[Kernel][Attention] Separate Attention.kv_scale into k_scale and v_scale (#6081)
|
2024-07-16 15:31:32 -07:00 |
|
test_cache.py
|
[Model] H2O Danube3-4b (#6451)
|
2024-07-26 20:47:50 -07:00 |
|
test_cutlass.py
|
[Kernel] Add per-tensor and per-token AZP epilogues (#5941)
|
2024-08-06 18:17:08 +00:00 |
|
test_encoder_decoder_attn.py
|
[Core] Subclass ModelRunner to support cross-attention & encoder sequences (towards eventual encoder/decoder model support) (#4942)
|
2024-08-06 16:51:47 -04:00 |
|
test_flash_attn.py
|
[Bugfix][Kernel] Increased atol to fix failing tests (#7305)
|
2024-08-08 12:16:13 -04:00 |
|
test_flashinfer.py
|
[Kernel][Model] logits_soft_cap for Gemma2 with flashinfer (#6051)
|
2024-07-04 16:35:51 -07:00 |
|
test_fp8_quant.py
|
[Bugfix][Kernel] Use int64_t for indices in fp8 quant kernels (#6649)
|
2024-07-22 14:08:30 -06:00 |
|
test_int8_quant.py
|
[Misc] Disambiguate quantized types via a new ScalarType (#6396)
|
2024-08-02 13:51:58 -07:00 |
|
test_layernorm.py
|
[Misc] Add CustomOp interface for device portability (#5255)
|
2024-06-05 09:18:19 -07:00 |
|
test_marlin_gemm.py
|
[Misc] Disambiguate quantized types via a new ScalarType (#6396)
|
2024-08-02 13:51:58 -07:00 |
|
test_moe.py
|
[ Misc ] Refactor MoE to isolate Fp8 From Mixtral (#5970)
|
2024-07-02 21:54:35 +00:00 |
|
test_pos_encoding.py
|
[Model] H2O Danube3-4b (#6451)
|
2024-07-26 20:47:50 -07:00 |
|
test_prefix_prefill.py
|
[Core/Bugfix] Add FP8 K/V Scale and dtype conversion for prefix/prefill Triton Kernel (#7208)
|
2024-08-12 22:47:41 +00:00 |
|
test_rand.py
|
[CI] Try introducing isort. (#3495)
|
2024-03-25 07:59:47 -07:00 |
|
test_sampler.py
|
[Kernel][RFC] Refactor the punica kernel based on Triton (#5036)
|
2024-07-31 17:12:24 -07:00 |
|
utils.py
|
[Core] Subclass ModelRunner to support cross-attention & encoder sequences (towards eventual encoder/decoder model support) (#4942)
|
2024-08-06 16:51:47 -04:00 |