flash-attention/csrc/flash_attn/src/fmha_bwd_hdim32.cu

// Copyright (c) 2022, Tri Dao.

// Splitting the different head dimensions to different files to speed up compilation.

#include "fmha_bwd_launch_template.h"

void run_fmha_bwd_hdim32(FMHA_dgrad_params &params, cudaStream_t stream, const bool configure) {
    FP16_SWITCH(params.is_bf16, ([&] {
        if (params.seqlen_k == 128) {
            using Kernel_traits = FMHA_kernel_traits<128, 32, 16, 1, 8, 0x08u, elem_type>;
            run_fmha_bwd_loop<Kernel_traits>(params, stream, configure);
        } else if (params.seqlen_k >= 256) {
            using Kernel_traits = FMHA_kernel_traits<256, 32, 16, 1, 8, 0x08u, elem_type>;
            run_fmha_bwd_loop<Kernel_traits>(params, stream, configure);
        }
    }));
}
Speed up compilation by splitting into separate .cu files 2022-11-26 08:29:17 +08:00			`// Copyright (c) 2022, Tri Dao.`

Fix typo in comments 2022-11-26 08:35:08 +08:00			`// Splitting the different head dimensions to different files to speed up compilation.`
Speed up compilation by splitting into separate .cu files 2022-11-26 08:29:17 +08:00
			`#include "fmha_bwd_launch_template.h"`

			`void run_fmha_bwd_hdim32(FMHA_dgrad_params &params, cudaStream_t stream, const bool configure) {`
[Compilation] Change BOOL_SWITCH to fix Windows compilation Follow xFormers's DISTPATCH_BOOL. Haven't tested it on Windows. 2023-01-07 06:40:58 +08:00			`FP16_SWITCH(params.is_bf16, ([&] {`
Speed up compilation by splitting into separate .cu files 2022-11-26 08:29:17 +08:00			`if (params.seqlen_k == 128) {`
			`using Kernel_traits = FMHA_kernel_traits<128, 32, 16, 1, 8, 0x08u, elem_type>;`
			`run_fmha_bwd_loop<Kernel_traits>(params, stream, configure);`
			`} else if (params.seqlen_k >= 256) {`
			`using Kernel_traits = FMHA_kernel_traits<256, 32, 16, 1, 8, 0x08u, elem_type>;`
			`run_fmha_bwd_loop<Kernel_traits>(params, stream, configure);`
			`}`
Simplify BOOL_SWITCH macro to fix compiling error on gcc 7 2022-12-07 06:16:04 +08:00			`}));`
Speed up compilation by splitting into separate .cu files 2022-11-26 08:29:17 +08:00			`}`