148 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			CMake
		
	
	
	
	
	
			
		
		
	
	
			148 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			CMake
		
	
	
	
	
	
| 
 | |
| # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 | |
| # SPDX-License-Identifier: BSD-3-Clause
 | |
| #
 | |
| # Redistribution and use in source and binary forms, with or without
 | |
| # modification, are permitted provided that the following conditions are met:
 | |
| #
 | |
| # 1. Redistributions of source code must retain the above copyright notice, this
 | |
| # list of conditions and the following disclaimer.
 | |
| #
 | |
| # 2. Redistributions in binary form must reproduce the above copyright notice,
 | |
| # this list of conditions and the following disclaimer in the documentation
 | |
| # and/or other materials provided with the distribution.
 | |
| #
 | |
| # 3. Neither the name of the copyright holder nor the names of its
 | |
| # contributors may be used to endorse or promote products derived from
 | |
| # this software without specific prior written permission.
 | |
| #
 | |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | |
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | |
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 | |
| # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 | |
| # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | |
| # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | |
| # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | |
| # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | |
| # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| 
 | |
| 
 | |
| set(CUTLASS_EXAMPLES_COMMON_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/common)
 | |
| 
 | |
| add_custom_target(cutlass_examples)
 | |
| add_custom_target(test_examples)
 | |
| 
 | |
| function(cutlass_example_add_executable NAME)
 | |
| 
 | |
|   set(options)
 | |
|   set(oneValueArgs DISABLE_TESTS)
 | |
|   set(multiValueArgs DEPENDS DEPENDEES TEST_COMMAND_OPTIONS)
 | |
|   cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 | |
| 
 | |
|   if (NOT DEFINED __DISABLE_TESTS)
 | |
|     set(__DISABLE_TESTS OFF)
 | |
|   endif()
 | |
| 
 | |
|   cutlass_add_executable(${NAME} ${__UNPARSED_ARGUMENTS} BATCH_SOURCES OFF)
 | |
| 
 | |
|   add_dependencies(cutlass_examples ${NAME})
 | |
| 
 | |
|   target_link_libraries(
 | |
|     ${NAME}
 | |
|     PRIVATE
 | |
|     CUTLASS
 | |
|     cutlass_tools_util_includes
 | |
|     $<$<BOOL:${CUTLASS_ENABLE_CUBLAS}>:nvidia::cublas>
 | |
|     cuda
 | |
|     )
 | |
| 
 | |
|   target_include_directories(
 | |
|     ${NAME}
 | |
|     PRIVATE
 | |
|     ${CUTLASS_EXAMPLES_COMMON_SOURCE_DIR}
 | |
|     ${CUTLASS_EXAMPLES_UTILS_DIR}
 | |
|     )
 | |
| 
 | |
|   install(
 | |
|     TARGETS ${NAME}
 | |
|     RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
 | |
|     )
 | |
| 
 | |
|   cutlass_add_executable_tests(
 | |
|     test_examples_${NAME} ${NAME}
 | |
|     DEPENDS ${__DEPENDS}
 | |
|     DEPENDEES test_examples ${__DEPENDEES}
 | |
|     TEST_COMMAND_OPTIONS ${__TEST_COMMAND_OPTIONS}
 | |
|     DISABLE_EXECUTABLE_INSTALL_RULE
 | |
|     DISABLE_TESTS ${__DISABLE_TESTS}
 | |
|     )
 | |
| 
 | |
| endfunction()
 | |
| 
 | |
| foreach(EXAMPLE
 | |
|   00_basic_gemm
 | |
|   01_cutlass_utilities
 | |
|   02_dump_reg_shmem
 | |
|   03_visualize_layout
 | |
|   04_tile_iterator
 | |
|   05_batched_gemm
 | |
|   06_splitK_gemm
 | |
|   07_volta_tensorop_gemm
 | |
|   08_turing_tensorop_gemm
 | |
|   09_turing_tensorop_conv2dfprop
 | |
|   10_planar_complex
 | |
|   11_planar_complex_array
 | |
|   12_gemm_bias_relu
 | |
|   13_two_tensor_op_fusion
 | |
|   14_ampere_tf32_tensorop_gemm
 | |
|   15_ampere_sparse_tensorop_gemm
 | |
|   16_ampere_tensorop_conv2dfprop
 | |
|   17_fprop_per_channel_bias
 | |
|   18_ampere_fp64_tensorop_affine2_gemm
 | |
|   19_tensorop_canonical
 | |
|   20_simt_canonical
 | |
|   21_quaternion_gemm
 | |
|   22_quaternion_conv
 | |
|   23_ampere_gemm_operand_reduction_fusion
 | |
|   24_gemm_grouped
 | |
|   25_ampere_fprop_mainloop_fusion
 | |
|   26_ampere_wgrad_mainloop_fusion
 | |
|   27_ampere_3xtf32_fast_accurate_tensorop_gemm
 | |
|   28_ampere_3xtf32_fast_accurate_tensorop_fprop
 | |
|   29_ampere_3xtf32_fast_accurate_tensorop_complex_gemm
 | |
|   30_wgrad_split_k
 | |
|   31_basic_syrk
 | |
|   32_basic_trmm
 | |
|   33_ampere_3xtf32_tensorop_symm
 | |
|   34_transposed_conv2d
 | |
|   35_gemm_softmax
 | |
|   36_gather_scatter_fusion
 | |
|   37_gemm_layernorm_gemm_fusion
 | |
|   38_syr2k_grouped
 | |
|   cute
 | |
|   39_gemm_permute
 | |
|   41_fused_multi_head_attention
 | |
|   42_ampere_tensorop_group_conv
 | |
|   43_ell_block_sparse_gemm
 | |
|   45_dual_gemm
 | |
|   46_depthwise_simt_conv2dfprop
 | |
|   47_ampere_gemm_universal_streamk
 | |
|   48_hopper_warp_specialized_gemm
 | |
|   49_hopper_gemm_with_collective_builder
 | |
|   50_hopper_gemm_with_epilogue_swizzle
 | |
|   51_hopper_gett
 | |
|   52_hopper_gather_scatter_fusion
 | |
|   53_hopper_gemm_permute
 | |
|   54_hopper_fp8_warp_specialized_gemm
 | |
|   55_hopper_mixed_dtype_gemm
 | |
|   56_hopper_ptr_array_batched_gemm
 | |
|   57_hopper_grouped_gemm
 | |
|   58_ada_fp8_gemm
 | |
|   59_ampere_gather_scatter_conv
 | |
|   )
 | |
| 
 | |
|   add_subdirectory(${EXAMPLE})
 | |
| 
 | |
| endforeach()
 | 
