diff --git a/include/cutlass/complex.h b/include/cutlass/complex.h index 8be2c494..aeccd2db 100644 --- a/include/cutlass/complex.h +++ b/include/cutlass/complex.h @@ -30,11 +30,12 @@ **************************************************************************************************/ #pragma once +#include + #if defined(__CUDACC_RTC__) #include #else #include -#include #endif #include "cutlass/cutlass.h" @@ -435,10 +436,10 @@ CUTLASS_HOST_DEVICE complex conj(complex const &z) { /// Indentity transform for non-complex types template CUTLASS_HOST_DEVICE T conj(T const &z) { - static_assert( !std::is_same::value && - !std::is_same::value && - !std::is_same>::value && - !std::is_same>::value, "May not be a complex data type"); + static_assert( !platform::is_same::value && + !platform::is_same::value && + !platform::is_same>::value && + !platform::is_same>::value, "May not be a complex data type"); return z; } diff --git a/include/cutlass/conv/kernel/implicit_gemm_convolution.h b/include/cutlass/conv/kernel/implicit_gemm_convolution.h index 4c5aa9db..d3f1a19f 100644 --- a/include/cutlass/conv/kernel/implicit_gemm_convolution.h +++ b/include/cutlass/conv/kernel/implicit_gemm_convolution.h @@ -121,7 +121,7 @@ struct ImplicitGemmConvolution { // Conv2d row-major matrix C (KxRSC) // Conv3d row-major matrix C (KxTRSC) static int const kWgradCStrideIdx = - cutlass::platform::is_same::value ? 2 : 3; + platform::is_same::value ? 2 : 3; /// This chooses the appropriate stride element of the C tensor. static int const kTensorCStrideIdx = diff --git a/include/cutlass/conv/kernel/implicit_gemm_convolution_fusion.h b/include/cutlass/conv/kernel/implicit_gemm_convolution_fusion.h index 97bad541..d43521f1 100644 --- a/include/cutlass/conv/kernel/implicit_gemm_convolution_fusion.h +++ b/include/cutlass/conv/kernel/implicit_gemm_convolution_fusion.h @@ -123,7 +123,7 @@ struct ImplicitGemmConvolutionFusion { // Conv2d row-major matrix C (KxRSC) // Conv3d row-major matrix C (KxTRSC) static int const kWgradCStrideIdx = - cutlass::platform::is_same::value ? 2 : 3; + platform::is_same::value ? 2 : 3; /// This chooses the appropriate stride element of the C tensor. static int const kTensorCStrideIdx = diff --git a/include/cutlass/conv/kernel/implicit_gemm_convolution_strided_dgrad.h b/include/cutlass/conv/kernel/implicit_gemm_convolution_strided_dgrad.h index 62fd9b77..31958a42 100644 --- a/include/cutlass/conv/kernel/implicit_gemm_convolution_strided_dgrad.h +++ b/include/cutlass/conv/kernel/implicit_gemm_convolution_strided_dgrad.h @@ -121,20 +121,20 @@ struct ImplicitGemmConvolutionStridedDgrad { // Conv2d row-major matrix C (KxRSC) // Conv3d row-major matrix C (KxTRSC) static int const kWgradCStrideIdx = - cutlass::platform::is_same::value ? 2 : 3; + platform::is_same::value ? 2 : 3; /// This chooses the appropriate stride element of the C tensor. static int const kTensorCStrideIdx = (kConvolutionalOperator == conv::Operator::kWgrad ? kWgradCStrideIdx : 0); // Strided dgrad uses a specialized threadblock swizzle for functionality and performance - static_assert((std::is_same::value) || - (std::is_same>::value) || - (std::is_same>::value) || - (std::is_same>::value), "Needs ThreadblockSwizzle type specialized for strided dgrad"); diff --git a/include/cutlass/conv/kernel/implicit_gemm_convolution_with_fused_epilogue.h b/include/cutlass/conv/kernel/implicit_gemm_convolution_with_fused_epilogue.h index 1dc7db02..2ab47637 100644 --- a/include/cutlass/conv/kernel/implicit_gemm_convolution_with_fused_epilogue.h +++ b/include/cutlass/conv/kernel/implicit_gemm_convolution_with_fused_epilogue.h @@ -121,7 +121,7 @@ struct ImplicitGemmConvolutionWithFusedEpilogue { // Conv2d row-major matrix C (KxRSC) // Conv3d row-major matrix C (KxTRSC) static int const kWgradCStrideIdx = - cutlass::platform::is_same::value ? 2 : 3; + platform::is_same::value ? 2 : 3; /// This chooses the appropriate stride element of the C tensor. static int const kTensorCStrideIdx = diff --git a/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h b/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h index a7742c08..0c7c4a88 100644 --- a/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h +++ b/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h @@ -215,10 +215,10 @@ struct DefaultIteratorsTensorOp< InstructionShape, ThreadMap> { - static_assert(cutlass::platform::is_same::value || - cutlass::platform::is_same::value || - cutlass::platform::is_same::value || - cutlass::platform::is_same::value, + static_assert(platform::is_same::value || + platform::is_same::value || + platform::is_same::value || + platform::is_same::value, "ElementOutput needs to be 4 or 8 bit (unsigned) int."); static_assert((ElementsPerAccess == 16 || ElementsPerAccess == 8), diff --git a/include/cutlass/gemm/device/rank_2k.h b/include/cutlass/gemm/device/rank_2k.h index 3630406e..b10c5e6a 100644 --- a/include/cutlass/gemm/device/rank_2k.h +++ b/include/cutlass/gemm/device/rank_2k.h @@ -149,7 +149,7 @@ class Rank2K { static int const kUpdateRank = 2; // static asserts for rank 2k update kernel - static_assert(std::is_same::value, + static_assert(platform::is_same::value, "Rank 2K update operator support same layouts for operandA and B"); /// Define the kernel diff --git a/include/cutlass/gemm/device/symm.h b/include/cutlass/gemm/device/symm.h index 99bcd16a..6380223e 100755 --- a/include/cutlass/gemm/device/symm.h +++ b/include/cutlass/gemm/device/symm.h @@ -153,7 +153,7 @@ class Symm { static BlasMode const kBlasMode = BlasMode_; // static asserts for symm update kernel - static_assert(std::is_same::value, + static_assert(platform::is_same::value, "SYMM update operator support same layouts for operand A and B"); /// Define the kernel diff --git a/include/cutlass/gemm/kernel/default_gemm.h b/include/cutlass/gemm/kernel/default_gemm.h index 3b13bf5a..8b433d24 100644 --- a/include/cutlass/gemm/kernel/default_gemm.h +++ b/include/cutlass/gemm/kernel/default_gemm.h @@ -209,7 +209,7 @@ struct DefaultGemm::Epilogue; - using Epilogue = typename cutlass::platform::conditional::value, + using Epilogue = typename cutlass::platform::conditional::value, RegularEpilogue, Affine2Epilogue>::type; @@ -672,7 +672,7 @@ struct DefaultGemm< kEpilogueElementsPerAccess >::Epilogue; - using Epilogue = typename cutlass::platform::conditional::value, + using Epilogue = typename cutlass::platform::conditional::value, RegularEpilogue, Affine2Epilogue>::type; @@ -780,7 +780,7 @@ struct DefaultGemm::Epilogue; - using Epilogue = typename cutlass::platform::conditional::value, + using Epilogue = typename cutlass::platform::conditional::value, RegularEpilogue, Affine2Epilogue>::type; diff --git a/include/cutlass/gemm/kernel/default_gemm_grouped.h b/include/cutlass/gemm/kernel/default_gemm_grouped.h index d6cdd1dd..ec9cac32 100644 --- a/include/cutlass/gemm/kernel/default_gemm_grouped.h +++ b/include/cutlass/gemm/kernel/default_gemm_grouped.h @@ -183,7 +183,7 @@ struct DefaultGemmGrouped< > { // If true, we must construct a 'transposed-and-exchanged' Mma operator. - static bool const kInternalTranspose = std::is_same::value; + static bool const kInternalTranspose = platform::is_same::value; using MapArguments = kernel::detail::MapArguments< ElementA, @@ -307,7 +307,7 @@ struct DefaultGemmGrouped< > { // If true, we must construct a 'transposed-and-exchanged' Mma operator. - static bool const kInternalTranspose = std::is_same::value; + static bool const kInternalTranspose = platform::is_same::value; using MapArguments = kernel::detail::MapArguments< ElementA, diff --git a/include/cutlass/gemm/kernel/gemv.h b/include/cutlass/gemm/kernel/gemv.h index 2d45fbb5..d7f10a59 100644 --- a/include/cutlass/gemm/kernel/gemv.h +++ b/include/cutlass/gemm/kernel/gemv.h @@ -67,7 +67,7 @@ public: using LayoutA = layout::ColumnMajor; using TensorRefA = TensorRef; - static_assert(std::is_same::value, + static_assert(platform::is_same::value, "Only supported for column-major A matrix"); using ElementB = ElementB_; diff --git a/include/cutlass/gemm/threadblock/default_mma.h b/include/cutlass/gemm/threadblock/default_mma.h index d1ffc819..2fab97d7 100644 --- a/include/cutlass/gemm/threadblock/default_mma.h +++ b/include/cutlass/gemm/threadblock/default_mma.h @@ -632,8 +632,8 @@ struct DefaultMma::value; - static const bool transposeB = cutlass::platform::is_same< LayoutB, layout::RowMajor >::value; + static const bool transposeA = platform::is_same< LayoutA, layout::ColumnMajor >::value; + static const bool transposeB = platform::is_same< LayoutB, layout::RowMajor >::value; // Define the MmaCore components using MmaCore = typename cutlass::gemm::threadblock::DefaultMmaCore< diff --git a/include/cutlass/transform/thread/unaryOp.h b/include/cutlass/transform/thread/unaryOp.h index 77645da9..7696cf73 100644 --- a/include/cutlass/transform/thread/unaryOp.h +++ b/include/cutlass/transform/thread/unaryOp.h @@ -54,19 +54,19 @@ class UnaryOp static FragmentOut execute(FragmentIn &in) { static_assert(FragmentIn::kElements == FragmentOut::kElements, "Number of elements must match."); - static_assert(std::is_same::value || - std::is_same::value, + static_assert(platform::is_same::value || + platform::is_same::value, "Unary Operator not supported."); FragmentOut out; - if( std::is_same::value ) + if( platform::is_same::value ) { CUTLASS_PRAGMA_UNROLL for(int i=0; i < FragmentIn::kElements; ++i){ out[i] = static_cast(in[i]); } } - else if( std::is_same::value ) + else if( platform::is_same::value ) { for(int i=0; i < FragmentIn::kElements; ++i){ out[i] = conj(static_cast(in[i])); @@ -83,15 +83,15 @@ class UnaryOp CUTLASS_DEVICE static FragmentIn execute(FragmentIn &in) { - static_assert(std::is_same::value || - std::is_same::value, + static_assert(platform::is_same::value || + platform::is_same::value, "Unary Operator not supported."); - if( std::is_same::value ) + if( platform::is_same::value ) { return in; } - else if( std::is_same::value ) + else if( platform::is_same::value ) { for(int i=0; i < FragmentIn::kElements; ++i){ in[i] = conj(in[i]);