| .. |
|
alibi.h
|
Fix typos of comments about shape. (#837)
|
2024-06-30 22:40:59 -07:00 |
|
block_info.h
|
Change inline to __forceinline__, use __grid_constant__ param
|
2024-01-20 17:38:47 -08:00 |
|
dropout.h
|
Refactor masking in fwd pass into 1 object
|
2024-01-20 17:39:53 -08:00 |
|
flash_bwd_hdim32_bf16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim32_fp16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim64_bf16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim64_fp16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim96_bf16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim96_fp16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim128_bf16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim128_fp16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim160_bf16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim160_fp16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim192_bf16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim192_fp16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim224_bf16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim224_fp16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim256_bf16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_hdim256_fp16_sm80.cu
|
Remove configure in bwd kernel launch
|
2024-01-21 15:28:33 -08:00 |
|
flash_bwd_kernel.h
|
Fix typos of comments about shape. (#837)
|
2024-06-30 22:40:59 -07:00 |
|
flash_bwd_launch_template.h
|
Add in, macrosf for defining __grid_constant__ (#852)
|
2024-03-15 00:48:54 -07:00 |
|
flash_bwd_preprocess_kernel.h
|
Support unpadded LSE layout (#970)
|
2024-06-27 02:38:13 -07:00 |
|
flash_fwd_hdim32_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim32_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim32_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim32_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim64_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim64_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim64_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim64_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim96_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim96_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim96_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim96_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim128_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim128_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim128_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim128_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim160_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim160_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim160_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim160_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim192_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim192_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim192_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim192_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim224_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim224_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim224_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim224_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim256_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim256_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim256_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_hdim256_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_kernel.h
|
Minor cleanup of softcapping
|
2024-07-09 22:57:03 -07:00 |
|
flash_fwd_launch_template.h
|
Don't support softcap and dropout at the same time
|
2024-07-10 11:23:12 -07:00 |
|
flash_fwd_split_hdim32_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim32_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim32_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim32_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim64_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim64_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim64_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim64_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim96_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim96_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim96_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim96_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim128_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim128_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim128_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim128_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim160_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim160_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim160_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim160_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim192_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim192_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim192_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim192_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim224_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim224_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim224_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim224_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim256_bf16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim256_bf16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim256_fp16_causal_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash_fwd_split_hdim256_fp16_sm80.cu
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
flash.h
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
generate_kernels.py
|
Split into more .cu files to speed up compilation
|
2024-07-10 00:24:04 -07:00 |
|
kernel_traits.h
|
Update to Cutlass 3.5
|
2024-05-26 12:49:33 -07:00 |
|
mask.h
|
Fix typos of comments about shape. (#837)
|
2024-06-30 22:40:59 -07:00 |
|
philox.cuh
|
Change inline to __forceinline__, use __grid_constant__ param
|
2024-01-20 17:38:47 -08:00 |
|
rotary.h
|
Update to Cutlass 3.5
|
2024-05-26 12:49:33 -07:00 |
|
softmax.h
|
Add the option for the macro and note (#893)
|
2024-03-27 19:12:11 -07:00 |
|
static_switch.h
|
Implement softcapping. (#1025)
|
2024-07-08 11:24:48 -07:00 |
|
utils.h
|
Update to Cutlass 3.5
|
2024-05-26 12:49:33 -07:00 |