![]() * Split apart gemm reference templates into multiple TUs for parallel compilation * remove old files * better balancing of ref kernels across TUs * remove 3 new added refcheck kernels and some un-necessary fp8 library instances to reduce lib size * remove auto fp8 kernels * remove some redundant kernels |
||
---|---|---|
.. | ||
conv2d.cu | ||
conv3d.cu | ||
conv_reference_operation.h | ||
gemm_e4m3a_e4m3out.cu | ||
gemm_e4m3a_e5m2out.cu | ||
gemm_e5m2a_e4m3out.cu | ||
gemm_e5m2a_e5m2out.cu | ||
gemm_fp8in_bf16out.cu | ||
gemm_fp8in_fp16out.cu | ||
gemm_fp8in_fp32out.cu | ||
gemm_fp32out.cu | ||
gemm_fp_other.cu | ||
gemm_int4.cu | ||
gemm_int8_canonical.cu | ||
gemm_int8_interleaved_32.cu | ||
gemm_int8_interleaved_64.cu | ||
gemm_reference_operation.h | ||
initialize_reference_operations.cu |