From 0b8cacd6f11611d0d0bc3ea6568c29ba1cc9a271 Mon Sep 17 00:00:00 2001 From: Ivan Komarov Date: Tue, 19 Jul 2022 22:23:54 +0300 Subject: [PATCH] Remove redundant includes (#563) * Remove redundant includes * Fix fstream in examples/ * Fix in test/ * Use consistent order for (always after ) * Remove an unneeded include in a file where std::ofstream usage is commented out Co-authored-by: Ivan Komarov --- examples/04_tile_iterator/tile_iterator.cu | 1 - .../turing_tensorop_conv2dfprop.cu | 1 + examples/10_planar_complex/planar_complex.cu | 1 - examples/11_planar_complex_array/planar_complex_array.cu | 1 - .../ampere_tensorop_conv2dfprop.cu | 1 + examples/22_quaternion_conv/quaternion_conv.cu | 1 + .../ampere_gemm_operand_reduction_fusion.cu | 1 + .../ampere_fprop_mainloop_fusion.cu | 1 + .../ampere_wgrad_mainloop_fusion.cu | 1 + .../ampere_3xtf32_fast_accurate_tensorop_fprop.cu | 1 + examples/30_wgrad_split_k/30_wgrad_split_k.cu | 1 + examples/36_gather_scatter_fusion/gather_scatter_fusion.cu | 1 + test/unit/conv/device/conv2d_testbed.h | 2 ++ test/unit/conv/device/conv2d_testbed_interleaved.h | 2 ++ test/unit/conv/device/conv2d_with_broadcast_testbed.h | 2 ++ test/unit/conv/device/conv2d_with_reduction_testbed.h | 2 ++ test/unit/conv/device/conv3d_testbed.h | 2 ++ test/unit/epilogue/threadblock/epilogue_planar_complex.cu | 2 -- test/unit/epilogue/threadblock/epilogue_simt.cu | 2 -- test/unit/epilogue/threadblock/epilogue_simt_sm60.cu | 2 -- test/unit/epilogue/threadblock/epilogue_simt_sm61.cu | 2 -- test/unit/epilogue/threadblock/epilogue_tensor_op.cu | 2 -- test/unit/epilogue/threadblock/epilogue_volta_tensor_op.cu | 2 -- .../epilogue/threadblock/epilogue_with_reduction_tensor_op.cu | 2 -- test/unit/epilogue/threadblock/epilogue_wmma_tensor_op_sm70.cu | 2 -- test/unit/epilogue/threadblock/output_tile_threadmap.cu | 2 -- test/unit/gemm/device/testbed_complex.h | 1 - test/unit/gemm/device/testbed_grouped.h | 1 + test/unit/gemm/device/testbed_sanity.h | 1 - test/unit/gemm/device/testbed_splitk.h | 1 - test/unit/gemm/threadblock/mma_multistage_testbed_slicedk.h | 2 ++ test/unit/gemm/threadblock/mma_pipelined_testbed.h | 2 ++ test/unit/gemm/threadblock/mma_pipelined_testbed_slicedk.h | 2 ++ test/unit/gemm/threadblock/mma_planar_complex_testbed.h | 2 ++ tools/util/include/cutlass/util/distribution.h | 2 +- 35 files changed, 29 insertions(+), 25 deletions(-) diff --git a/examples/04_tile_iterator/tile_iterator.cu b/examples/04_tile_iterator/tile_iterator.cu index 8fc19319..886c1770 100644 --- a/examples/04_tile_iterator/tile_iterator.cu +++ b/examples/04_tile_iterator/tile_iterator.cu @@ -50,7 +50,6 @@ #include #include #include -#include // CUTLASS includes #include "cutlass/transform/threadblock/predicated_tile_iterator.h" diff --git a/examples/09_turing_tensorop_conv2dfprop/turing_tensorop_conv2dfprop.cu b/examples/09_turing_tensorop_conv2dfprop/turing_tensorop_conv2dfprop.cu index b7391216..bd74ce12 100644 --- a/examples/09_turing_tensorop_conv2dfprop/turing_tensorop_conv2dfprop.cu +++ b/examples/09_turing_tensorop_conv2dfprop/turing_tensorop_conv2dfprop.cu @@ -124,6 +124,7 @@ compare if the output from CUTLASS kernel is same as the reference implicit GEMM */ #include +#include #include #include "cutlass/cutlass.h" diff --git a/examples/10_planar_complex/planar_complex.cu b/examples/10_planar_complex/planar_complex.cu index c7dc7b27..9a9dc888 100644 --- a/examples/10_planar_complex/planar_complex.cu +++ b/examples/10_planar_complex/planar_complex.cu @@ -74,7 +74,6 @@ */ #include -#include #include #include "cutlass/cutlass.h" diff --git a/examples/11_planar_complex_array/planar_complex_array.cu b/examples/11_planar_complex_array/planar_complex_array.cu index 1dd35846..272390f2 100644 --- a/examples/11_planar_complex_array/planar_complex_array.cu +++ b/examples/11_planar_complex_array/planar_complex_array.cu @@ -72,7 +72,6 @@ */ #include -#include #include #include "cutlass/cutlass.h" diff --git a/examples/16_ampere_tensorop_conv2dfprop/ampere_tensorop_conv2dfprop.cu b/examples/16_ampere_tensorop_conv2dfprop/ampere_tensorop_conv2dfprop.cu index a35a3946..66b0dee5 100644 --- a/examples/16_ampere_tensorop_conv2dfprop/ampere_tensorop_conv2dfprop.cu +++ b/examples/16_ampere_tensorop_conv2dfprop/ampere_tensorop_conv2dfprop.cu @@ -111,6 +111,7 @@ compare if the output from CUTLASS kernel is same as the reference implicit GEMM */ #include +#include #include #include "cutlass/cutlass.h" diff --git a/examples/22_quaternion_conv/quaternion_conv.cu b/examples/22_quaternion_conv/quaternion_conv.cu index cd2a48d9..756d4651 100644 --- a/examples/22_quaternion_conv/quaternion_conv.cu +++ b/examples/22_quaternion_conv/quaternion_conv.cu @@ -30,6 +30,7 @@ **************************************************************************************************/ #include +#include #include #include "cutlass/cutlass.h" diff --git a/examples/23_ampere_gemm_operand_reduction_fusion/ampere_gemm_operand_reduction_fusion.cu b/examples/23_ampere_gemm_operand_reduction_fusion/ampere_gemm_operand_reduction_fusion.cu index bb880f4f..41ea3200 100644 --- a/examples/23_ampere_gemm_operand_reduction_fusion/ampere_gemm_operand_reduction_fusion.cu +++ b/examples/23_ampere_gemm_operand_reduction_fusion/ampere_gemm_operand_reduction_fusion.cu @@ -41,6 +41,7 @@ epilogue/threadblock/epilogue_gemm_k_reduction.h */ #include +#include #include #include "cutlass/cutlass.h" diff --git a/examples/25_ampere_fprop_mainloop_fusion/ampere_fprop_mainloop_fusion.cu b/examples/25_ampere_fprop_mainloop_fusion/ampere_fprop_mainloop_fusion.cu index 661efcf7..fe756fba 100644 --- a/examples/25_ampere_fprop_mainloop_fusion/ampere_fprop_mainloop_fusion.cu +++ b/examples/25_ampere_fprop_mainloop_fusion/ampere_fprop_mainloop_fusion.cu @@ -52,6 +52,7 @@ line is the same. */ #include +#include #include #include "cutlass/cutlass.h" diff --git a/examples/26_ampere_wgrad_mainloop_fusion/ampere_wgrad_mainloop_fusion.cu b/examples/26_ampere_wgrad_mainloop_fusion/ampere_wgrad_mainloop_fusion.cu index da3ec1ca..72d7284f 100644 --- a/examples/26_ampere_wgrad_mainloop_fusion/ampere_wgrad_mainloop_fusion.cu +++ b/examples/26_ampere_wgrad_mainloop_fusion/ampere_wgrad_mainloop_fusion.cu @@ -49,6 +49,7 @@ technical details. */ #include +#include #include #include "cutlass/cutlass.h" diff --git a/examples/28_ampere_3xtf32_fast_accurate_tensorop_fprop/ampere_3xtf32_fast_accurate_tensorop_fprop.cu b/examples/28_ampere_3xtf32_fast_accurate_tensorop_fprop/ampere_3xtf32_fast_accurate_tensorop_fprop.cu index b2996f2d..a197e2ef 100644 --- a/examples/28_ampere_3xtf32_fast_accurate_tensorop_fprop/ampere_3xtf32_fast_accurate_tensorop_fprop.cu +++ b/examples/28_ampere_3xtf32_fast_accurate_tensorop_fprop/ampere_3xtf32_fast_accurate_tensorop_fprop.cu @@ -36,6 +36,7 @@ compared with CUDA Cores. See example 27 for the trick of 3xTF32. */ #include +#include #include #include "cutlass/cutlass.h" diff --git a/examples/30_wgrad_split_k/30_wgrad_split_k.cu b/examples/30_wgrad_split_k/30_wgrad_split_k.cu index b49446cc..5016adf2 100644 --- a/examples/30_wgrad_split_k/30_wgrad_split_k.cu +++ b/examples/30_wgrad_split_k/30_wgrad_split_k.cu @@ -40,6 +40,7 @@ to correctly instantiate the GEMM template. */ #include +#include #include #include "cutlass/cutlass.h" diff --git a/examples/36_gather_scatter_fusion/gather_scatter_fusion.cu b/examples/36_gather_scatter_fusion/gather_scatter_fusion.cu index 8ceea638..f22e235f 100644 --- a/examples/36_gather_scatter_fusion/gather_scatter_fusion.cu +++ b/examples/36_gather_scatter_fusion/gather_scatter_fusion.cu @@ -69,6 +69,7 @@ #include #include +#include #include #include diff --git a/test/unit/conv/device/conv2d_testbed.h b/test/unit/conv/device/conv2d_testbed.h index 125c177a..9f0e04f9 100644 --- a/test/unit/conv/device/conv2d_testbed.h +++ b/test/unit/conv/device/conv2d_testbed.h @@ -33,6 +33,8 @@ */ #pragma once +#include + #include "../../common/cutlass_unit_test.h" #include "cutlass/cutlass.h" diff --git a/test/unit/conv/device/conv2d_testbed_interleaved.h b/test/unit/conv/device/conv2d_testbed_interleaved.h index db271999..2aa60f0b 100644 --- a/test/unit/conv/device/conv2d_testbed_interleaved.h +++ b/test/unit/conv/device/conv2d_testbed_interleaved.h @@ -33,6 +33,8 @@ */ #pragma once +#include + #include "../../common/cutlass_unit_test.h" #include "cutlass/cutlass.h" diff --git a/test/unit/conv/device/conv2d_with_broadcast_testbed.h b/test/unit/conv/device/conv2d_with_broadcast_testbed.h index 1561ed8e..dd12bf60 100644 --- a/test/unit/conv/device/conv2d_with_broadcast_testbed.h +++ b/test/unit/conv/device/conv2d_with_broadcast_testbed.h @@ -37,6 +37,8 @@ */ #pragma once +#include + #include "../../common/cutlass_unit_test.h" #include "cutlass/cutlass.h" diff --git a/test/unit/conv/device/conv2d_with_reduction_testbed.h b/test/unit/conv/device/conv2d_with_reduction_testbed.h index d2ccc9f1..a147275b 100644 --- a/test/unit/conv/device/conv2d_with_reduction_testbed.h +++ b/test/unit/conv/device/conv2d_with_reduction_testbed.h @@ -33,6 +33,8 @@ */ #pragma once +#include + #include "../../common/cutlass_unit_test.h" #include "cutlass/cutlass.h" diff --git a/test/unit/conv/device/conv3d_testbed.h b/test/unit/conv/device/conv3d_testbed.h index 1c511c19..f9cc3563 100644 --- a/test/unit/conv/device/conv3d_testbed.h +++ b/test/unit/conv/device/conv3d_testbed.h @@ -33,6 +33,8 @@ */ #pragma once +#include + #include "../../common/cutlass_unit_test.h" #include "cutlass/cutlass.h" diff --git a/test/unit/epilogue/threadblock/epilogue_planar_complex.cu b/test/unit/epilogue/threadblock/epilogue_planar_complex.cu index 9373e7da..4b3fecca 100644 --- a/test/unit/epilogue/threadblock/epilogue_planar_complex.cu +++ b/test/unit/epilogue/threadblock/epilogue_planar_complex.cu @@ -32,8 +32,6 @@ \brief Unit tests for thread-level GEMM */ -#include - #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/epilogue/threadblock/epilogue_simt.cu b/test/unit/epilogue/threadblock/epilogue_simt.cu index bca6c247..386f2871 100644 --- a/test/unit/epilogue/threadblock/epilogue_simt.cu +++ b/test/unit/epilogue/threadblock/epilogue_simt.cu @@ -32,8 +32,6 @@ \brief Unit tests for thread-level GEMM */ -#include - #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/epilogue/threadblock/epilogue_simt_sm60.cu b/test/unit/epilogue/threadblock/epilogue_simt_sm60.cu index 880d490c..84f9110f 100644 --- a/test/unit/epilogue/threadblock/epilogue_simt_sm60.cu +++ b/test/unit/epilogue/threadblock/epilogue_simt_sm60.cu @@ -32,8 +32,6 @@ \brief Unit tests for thread-level GEMM */ -#include - #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/epilogue/threadblock/epilogue_simt_sm61.cu b/test/unit/epilogue/threadblock/epilogue_simt_sm61.cu index 48c8be17..4aa27bef 100644 --- a/test/unit/epilogue/threadblock/epilogue_simt_sm61.cu +++ b/test/unit/epilogue/threadblock/epilogue_simt_sm61.cu @@ -32,8 +32,6 @@ \brief Unit tests for thread-level GEMM */ -#include - #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/epilogue/threadblock/epilogue_tensor_op.cu b/test/unit/epilogue/threadblock/epilogue_tensor_op.cu index 696af7b5..5d185dd5 100644 --- a/test/unit/epilogue/threadblock/epilogue_tensor_op.cu +++ b/test/unit/epilogue/threadblock/epilogue_tensor_op.cu @@ -32,8 +32,6 @@ \brief Unit tests for thread-level GEMM */ -#include - #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/epilogue/threadblock/epilogue_volta_tensor_op.cu b/test/unit/epilogue/threadblock/epilogue_volta_tensor_op.cu index 828e3941..415f7dd7 100644 --- a/test/unit/epilogue/threadblock/epilogue_volta_tensor_op.cu +++ b/test/unit/epilogue/threadblock/epilogue_volta_tensor_op.cu @@ -32,8 +32,6 @@ \brief Unit tests for thread-level GEMM */ -#include - #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/epilogue/threadblock/epilogue_with_reduction_tensor_op.cu b/test/unit/epilogue/threadblock/epilogue_with_reduction_tensor_op.cu index b6490154..922bebc1 100644 --- a/test/unit/epilogue/threadblock/epilogue_with_reduction_tensor_op.cu +++ b/test/unit/epilogue/threadblock/epilogue_with_reduction_tensor_op.cu @@ -33,8 +33,6 @@ \brief Unit tests for thread-level GEMM */ -#include - #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/epilogue/threadblock/epilogue_wmma_tensor_op_sm70.cu b/test/unit/epilogue/threadblock/epilogue_wmma_tensor_op_sm70.cu index 4606b845..1e9e5c87 100644 --- a/test/unit/epilogue/threadblock/epilogue_wmma_tensor_op_sm70.cu +++ b/test/unit/epilogue/threadblock/epilogue_wmma_tensor_op_sm70.cu @@ -35,8 +35,6 @@ #ifdef CUTLASS_ARCH_WMMA_SM70_ENABLED -#include - #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/epilogue/threadblock/output_tile_threadmap.cu b/test/unit/epilogue/threadblock/output_tile_threadmap.cu index 7d434eaa..c33e0a89 100644 --- a/test/unit/epilogue/threadblock/output_tile_threadmap.cu +++ b/test/unit/epilogue/threadblock/output_tile_threadmap.cu @@ -32,8 +32,6 @@ \brief Unit tests for thread-level GEMM */ -#include - #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/gemm/device/testbed_complex.h b/test/unit/gemm/device/testbed_complex.h index 561859a4..e6893026 100644 --- a/test/unit/gemm/device/testbed_complex.h +++ b/test/unit/gemm/device/testbed_complex.h @@ -35,7 +35,6 @@ #pragma once #include -#include #include #include diff --git a/test/unit/gemm/device/testbed_grouped.h b/test/unit/gemm/device/testbed_grouped.h index 2641e8d1..5ec41618 100644 --- a/test/unit/gemm/device/testbed_grouped.h +++ b/test/unit/gemm/device/testbed_grouped.h @@ -36,6 +36,7 @@ #pragma once #include +#include #include "../../common/cutlass_unit_test.h" #include "cutlass/cutlass.h" diff --git a/test/unit/gemm/device/testbed_sanity.h b/test/unit/gemm/device/testbed_sanity.h index d7f63c3c..e3976024 100644 --- a/test/unit/gemm/device/testbed_sanity.h +++ b/test/unit/gemm/device/testbed_sanity.h @@ -33,7 +33,6 @@ */ #include -#include #include #include "../../common/cutlass_unit_test.h" diff --git a/test/unit/gemm/device/testbed_splitk.h b/test/unit/gemm/device/testbed_splitk.h index 8ad85a9e..fcc136c1 100644 --- a/test/unit/gemm/device/testbed_splitk.h +++ b/test/unit/gemm/device/testbed_splitk.h @@ -35,7 +35,6 @@ #pragma once #include -#include #include #include "../../common/cutlass_unit_test.h" diff --git a/test/unit/gemm/threadblock/mma_multistage_testbed_slicedk.h b/test/unit/gemm/threadblock/mma_multistage_testbed_slicedk.h index a9cf2c62..c8343f8f 100644 --- a/test/unit/gemm/threadblock/mma_multistage_testbed_slicedk.h +++ b/test/unit/gemm/threadblock/mma_multistage_testbed_slicedk.h @@ -35,6 +35,8 @@ #pragma once +#include + #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/gemm/threadblock/mma_pipelined_testbed.h b/test/unit/gemm/threadblock/mma_pipelined_testbed.h index bdc0d873..c36e8030 100644 --- a/test/unit/gemm/threadblock/mma_pipelined_testbed.h +++ b/test/unit/gemm/threadblock/mma_pipelined_testbed.h @@ -34,6 +34,8 @@ #pragma once +#include + #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/gemm/threadblock/mma_pipelined_testbed_slicedk.h b/test/unit/gemm/threadblock/mma_pipelined_testbed_slicedk.h index d0e0e05a..1d509d5c 100644 --- a/test/unit/gemm/threadblock/mma_pipelined_testbed_slicedk.h +++ b/test/unit/gemm/threadblock/mma_pipelined_testbed_slicedk.h @@ -35,6 +35,8 @@ #pragma once +#include + #include "../../common/cutlass_unit_test.h" #include "cutlass/aligned_buffer.h" diff --git a/test/unit/gemm/threadblock/mma_planar_complex_testbed.h b/test/unit/gemm/threadblock/mma_planar_complex_testbed.h index 59539062..1d8ef51c 100644 --- a/test/unit/gemm/threadblock/mma_planar_complex_testbed.h +++ b/test/unit/gemm/threadblock/mma_planar_complex_testbed.h @@ -34,6 +34,8 @@ #pragma once +#include + #include "../../common/cutlass_unit_test.h" #include "cutlass/cutlass.h" diff --git a/tools/util/include/cutlass/util/distribution.h b/tools/util/include/cutlass/util/distribution.h index 4ebf58bb..773487ee 100644 --- a/tools/util/include/cutlass/util/distribution.h +++ b/tools/util/include/cutlass/util/distribution.h @@ -34,7 +34,7 @@ \brief This header contains a class to parametrize a statistical distribution function. */ -#include +#include namespace cutlass {