/*************************************************************************************************** * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: BSD-3-Clause * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ /*! \file \brief CUTLASS host-device template for complex numbers supporting all CUTLASS numeric types. */ #include #include #include "../common/cutlass_unit_test.h" #include "cutlass/complex.h" #include "cutlass/constants.h" #include "cutlass/numeric_conversion.h" #include "cutlass/tfloat32.h" #include ///////////////////////////////////////////////////////////////////////////////////////////////// TEST(complex, f64_to_f32_conversion) { cutlass::complex source = {1.5, -1.25}; cutlass::complex dest = cutlass::complex(source); // explicit conversion EXPECT_TRUE(source.real() == 1.5 && source.imag() == -1.25 && dest.real() == 1.5f && dest.imag() == -1.25f); } ///////////////////////////////////////////////////////////////////////////////////////////////// TEST(complex, f32_to_f64_conversion) { cutlass::complex source = {-1.5f, 1.25f}; cutlass::complex dest = source; // implicit conversion EXPECT_TRUE(source.real() == -1.5f && source.imag() == 1.25f && dest.real() == -1.5 && dest.imag() == 1.25); } ///////////////////////////////////////////////////////////////////////////////////////////////// TEST(complex, s32_to_f64_conversion) { cutlass::complex source = {-2, 1}; cutlass::complex dest = source; // implicit conversion EXPECT_TRUE(source.real() == -2 && source.imag() == 1 && dest.real() == -2 && dest.imag() == 1); } ///////////////////////////////////////////////////////////////////////////////////////////////// TEST(complex, f16_to_f32_conversion) { cutlass::complex source = {1.5_hf, -1.25_hf}; cutlass::complex dest = cutlass::complex(source); // explicit conversion EXPECT_TRUE(source.real() == 1.5_hf && source.imag() == -1.25_hf && dest.real() == 1.5f && dest.imag() == -1.25f); } //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(complex, exp_f32) { cutlass::complex Z[] = { {1, 1}, {2 , cutlass::constants::pi()/2.0f }, {0.5f, cutlass::constants::pi() }, {0.25f, cutlass::constants::pi()*3/4.0f }, {0, 0}, }; cutlass::complex Expected[] = { {1.4686939399158851, 2.2873552871788423}, {4.524491950137825e-16, 7.38905609893065}, {-1.6487212707001282, 2.019101226849069e-16}, {-0.9079430793557842, 0.9079430793557843}, {1, 0} }; double tolerance = 0.00001; for (int i = 0; cutlass::real(Z[i]) != 0.0f; ++i) { double e_r = cutlass::real(Expected[i]); double e_i = cutlass::real(Expected[i]); cutlass::complex got = cutlass::exp(Z[i]); float g_r = cutlass::real(got); float g_i = cutlass::real(got); EXPECT_TRUE( std::abs(g_r - e_r) < tolerance && std::abs(g_i - e_i) < tolerance ) << "Expected(" << Expected[i] << "), Got(" << got << ")"; } } TEST(complex, absolute_value_real_and_imag) { { cutlass::complex z_d{3.0, 4.0}; auto abs_d = cutlass::abs(z_d); static_assert(std::is_same_v); EXPECT_EQ(abs_d, 5.0); auto real_d = cutlass::real(z_d); static_assert(std::is_same_v); EXPECT_EQ(real_d, 3.0); auto imag_d = cutlass::imag(z_d); static_assert(std::is_same_v); EXPECT_EQ(imag_d, 4.0); } { cutlass::complex z_f{3.0f, 4.0f}; auto abs_f = cutlass::abs(z_f); static_assert(std::is_same_v); EXPECT_EQ(abs_f, 5.0f); auto real_f = cutlass::real(z_f); static_assert(std::is_same_v); EXPECT_EQ(real_f, 3.0f); auto imag_f = cutlass::imag(z_f); static_assert(std::is_same_v); EXPECT_EQ(imag_f, 4.0f); } { cutlass::complex z_tf32{cutlass::tfloat32_t{3.0f}, cutlass::tfloat32_t{4.0f}}; auto abs_tf32 = cutlass::abs(z_tf32); static_assert(std::is_same_v); EXPECT_EQ(abs_tf32, cutlass::tfloat32_t{5.0f}); auto real_tf32 = cutlass::real(z_tf32); static_assert(std::is_same_v); EXPECT_EQ(real_tf32, cutlass::tfloat32_t{3.0f}); auto imag_tf32 = cutlass::imag(z_tf32); static_assert(std::is_same_v); EXPECT_EQ(imag_tf32, cutlass::tfloat32_t{4.0f}); } { cutlass::complex z_i{3, 4}; // sqrt(int) isn't a valid overload, so cutlass::abs isn't tested. auto real_i = cutlass::real(z_i); static_assert(std::is_same_v); EXPECT_EQ(real_i, 3); auto imag_i = cutlass::imag(z_i); static_assert(std::is_same_v); EXPECT_EQ(imag_i, 4); } { double x_d{3.0}; auto real_d = cutlass::real(x_d); static_assert(std::is_same_v); EXPECT_EQ(real_d, 3.0); auto imag_d = cutlass::imag(x_d); static_assert(std::is_same_v); EXPECT_EQ(imag_d, 0.0); } { float x_f{3.0f}; auto real_f = cutlass::real(x_f); static_assert(std::is_same_v); EXPECT_EQ(real_f, 3.0f); auto imag_f = cutlass::imag(x_f); static_assert(std::is_same_v); EXPECT_EQ(imag_f, 0.0f); } { cutlass::tfloat32_t x_tf32{3.0f}; auto real_tf32 = cutlass::real(x_tf32); static_assert(std::is_same_v); EXPECT_EQ(real_tf32, cutlass::tfloat32_t{3.0f}); auto imag_tf32 = cutlass::imag(x_tf32); static_assert(std::is_same_v); EXPECT_EQ(imag_tf32, cutlass::tfloat32_t{0.0f}); } { int x_i{3}; auto real_i = cutlass::real(x_i); static_assert(std::is_same_v); EXPECT_EQ(real_i, 3); auto imag_i = cutlass::imag(x_i); static_assert(std::is_same_v); EXPECT_EQ(imag_i, 0); } } // FakeReal and FakeComplex test whether cutlass::real and // cutlass::imag correctly handle user-defined non-complex // and complex number types. namespace test { // These classes have no conversions to or from arithmetic types, so // that the test can ensure that the implementation does not silently // convert to, say, float or int. class FakeReal { public: // cutlass::imag must be able to value-construct its noncomplex input. FakeReal() = default; static CUTLASS_HOST_DEVICE FakeReal make_FakeReal(int val) { return FakeReal{val}; } friend CUTLASS_HOST_DEVICE bool operator==(FakeReal lhs, FakeReal rhs) { return lhs.value_ == rhs.value_; } friend CUTLASS_HOST_DEVICE FakeReal operator-(FakeReal const& x) { return make_FakeReal(-x.value_); } private: CUTLASS_HOST_DEVICE FakeReal(int val) : value_(val) {} int value_ = 0; }; class FakeComplex { public: static CUTLASS_HOST_DEVICE FakeComplex make_FakeComplex(FakeReal re, FakeReal im) { return FakeComplex{re, im}; } // Existence of member functions real and imag tell // CUTLASS that FakeComplex is a complex number type. CUTLASS_HOST_DEVICE FakeReal real() const { return real_; } CUTLASS_HOST_DEVICE FakeReal imag() const { return imag_; } friend CUTLASS_HOST_DEVICE bool operator==(FakeComplex lhs, FakeComplex rhs) { return lhs.real_ == rhs.real_ && lhs.imag_ == rhs.imag_; } private: CUTLASS_HOST_DEVICE FakeComplex(FakeReal re, FakeReal im) : real_(re), imag_(im) {} FakeReal real_{}; FakeReal imag_{}; }; CUTLASS_HOST_DEVICE FakeComplex conj(FakeComplex const& z) { return FakeComplex::make_FakeComplex(z.real(), -z.imag()); } // Variant of FakeComplex that has a hidden friend conj instead of a // nonmember conj defined outside the class. class FakeComplexWithHiddenFriendConj { public: static CUTLASS_HOST_DEVICE FakeComplexWithHiddenFriendConj make_FakeComplexWithHiddenFriendConj(FakeReal re, FakeReal im) { return FakeComplexWithHiddenFriendConj{re, im}; } CUTLASS_HOST_DEVICE FakeReal real() const { return real_; } CUTLASS_HOST_DEVICE FakeReal imag() const { return imag_; } friend CUTLASS_HOST_DEVICE bool operator==(FakeComplexWithHiddenFriendConj lhs, FakeComplexWithHiddenFriendConj rhs) { return lhs.real_ == rhs.real_ && lhs.imag_ == rhs.imag_; } friend CUTLASS_HOST_DEVICE FakeComplexWithHiddenFriendConj conj(FakeComplexWithHiddenFriendConj const& z) { return FakeComplexWithHiddenFriendConj::make_FakeComplexWithHiddenFriendConj(z.real(), -z.imag()); } private: CUTLASS_HOST_DEVICE FakeComplexWithHiddenFriendConj(FakeReal re, FakeReal im) : real_(re), imag_(im) {} FakeReal real_{}; FakeReal imag_{}; }; } // namespace test TEST(complex, real_and_imag_with_custom_types) { using test::FakeReal; using test::FakeComplex; { FakeReal x = FakeReal::make_FakeReal(42); auto x_r = cutlass::real(x); static_assert(std::is_same_v); EXPECT_EQ(x_r, FakeReal::make_FakeReal(42)); auto x_i = cutlass::imag(x); static_assert(std::is_same_v); EXPECT_EQ(x_i, FakeReal::make_FakeReal(0)); } { FakeComplex z = FakeComplex::make_FakeComplex( FakeReal::make_FakeReal(3), FakeReal::make_FakeReal(4)); auto z_r = cutlass::real(z); static_assert(std::is_same_v); EXPECT_EQ(z_r, FakeReal::make_FakeReal(3)); auto z_i = cutlass::imag(z); static_assert(std::is_same_v); EXPECT_EQ(z_i, FakeReal::make_FakeReal(4)); } } namespace test { template void conj_tester(T z, T z_c_expected, const char type_name[]) { // Use cutlass::conj just like std::swap (the "std::swap two-step"). using cutlass::conj; auto z_c = conj(z); static_assert(std::is_same_v); constexpr bool is_cuComplex = std::is_same_v || std::is_same_v; if constexpr (is_cuComplex) { EXPECT_EQ(z_c.x, z_c_expected.x); EXPECT_EQ(z_c.y, z_c_expected.y) << "conj failed for type " << type_name; } else { EXPECT_EQ(z_c, z_c_expected) << "conj failed for type " << type_name; } auto z_c2 = cutlass::conjugate{}(z); static_assert(std::is_same_v); if constexpr (is_cuComplex) { // cuFloatComplex and cuDoubleComplex don't report conj(z) as // being well-formed, probably because they are type aliases of // some kind. cutlass::conj works fine, though! static_assert(! cutlass::platform::is_arithmetic_v && (cutlass::detail::has_unqualified_conj_v || cutlass::detail::has_cutlass_conj_v)); EXPECT_EQ(z_c2.x, z_c_expected.x); EXPECT_EQ(z_c2.y, z_c_expected.y) << "conjugate failed for type " << type_name; } else { EXPECT_EQ(z_c2, z_c_expected) << "conjugate failed for type " << type_name; } } } // namespace test TEST(complex, conj_with_standard_arithmetic_types) { { double x = 42.0; double x_c_expected = 42.0; test::conj_tester(x, x_c_expected, "double"); } { float x = 42.0f; float x_c_expected = 42.0f; test::conj_tester(x, x_c_expected, "float"); } { int x = 42; int x_c_expected = 42; test::conj_tester(x, x_c_expected, "int"); } } TEST(complex, conj_with_cutlass_complex_types) { { cutlass::complex z{3.0, 4.0}; cutlass::complex z_c_expected{3.0, -4.0}; test::conj_tester(z, z_c_expected, "cutlass::complex"); } { cutlass::complex z{3.0f, 4.0f}; cutlass::complex z_c_expected{3.0f, -4.0f}; test::conj_tester(z, z_c_expected, "cutlass::complex"); } { cutlass::complex z{ cutlass::tfloat32_t{3.0f}, cutlass::tfloat32_t{4.0f}}; cutlass::complex z_c_expected{ cutlass::tfloat32_t{3.0f}, cutlass::tfloat32_t{-4.0f}}; test::conj_tester(z, z_c_expected, "cutlass::complex"); } } TEST(complex, conj_with_noncomplex_type_not_in_cutlass_namespace) { test::FakeReal x = test::FakeReal::make_FakeReal(42); test::FakeReal x_c_expected = test::FakeReal::make_FakeReal(42); test::conj_tester(x, x_c_expected, "test::FakeReal"); } TEST(complex, conj_with_noncomplex_type_in_cutlass_namespace) { cutlass::tfloat32_t x{42.0f}; cutlass::tfloat32_t x_c_expected{42.0f}; test::conj_tester(x, x_c_expected, "cutlass::tfloat32_t"); } TEST(complex, conj_with_complex_types_not_in_cutlass_namespace) { using test::FakeReal; // conj defined as nonmember outside the class { test::FakeComplex z = test::FakeComplex::make_FakeComplex( FakeReal::make_FakeReal(3), FakeReal::make_FakeReal(4)); test::FakeComplex z_c_expected = test::FakeComplex::make_FakeComplex( FakeReal::make_FakeReal(3), FakeReal::make_FakeReal(-4)); test::conj_tester(z, z_c_expected, "test::FakeComplex"); } // conj defined as hidden friend { test::FakeComplexWithHiddenFriendConj z = test::FakeComplexWithHiddenFriendConj::make_FakeComplexWithHiddenFriendConj( FakeReal::make_FakeReal(3), FakeReal::make_FakeReal(4)); test::FakeComplexWithHiddenFriendConj z_c_expected = test::FakeComplexWithHiddenFriendConj::make_FakeComplexWithHiddenFriendConj( FakeReal::make_FakeReal(3), FakeReal::make_FakeReal(-4)); test::conj_tester(z, z_c_expected, "test::FakeComplexWithHiddenFriendConj"); } } TEST(complex, conj_with_cuda_std_complex_types) { { cuda::std::complex z{3.0, 4.0}; cuda::std::complex z_c_expected{3.0, -4.0}; test::conj_tester(z, z_c_expected, "cuda::std::complex"); } { cuda::std::complex z{3.0f, 4.0f}; cuda::std::complex z_c_expected{3.0f, -4.0f}; test::conj_tester(z, z_c_expected, "cuda::std::complex"); } } TEST(complex, conj_with_cuComplex_types) { { cuDoubleComplex z = make_cuDoubleComplex(3.0, 4.0); cuDoubleComplex z_c_expected = make_cuDoubleComplex(3.0, -4.0); test::conj_tester(z, z_c_expected, "cuDoubleComplex"); } { cuFloatComplex z = make_cuFloatComplex(3.0f, 4.0f); cuFloatComplex z_c_expected = make_cuFloatComplex(3.0f, -4.0f); test::conj_tester(z, z_c_expected, "cuFloatComplex"); } } //////////////////////////////////////////////////////////////////////////////////////////////////// namespace test { /// Thorough testing for basic complex math operators. Uses std::complex as a reference. template struct ComplexOperators { ComplexOperators() { for (int ar = -N; ar <= N; ++ar) { for (int ai = -N; ai <= N; ++ai) { for (int br = -N; br <= N; ++br) { for (int bi = -N; bi <= N; ++bi) { cutlass::complex Ae(T(ar) / T(M), T(ai) / T(M)); cutlass::complex Be(T(br) / T(M), T(bi) / T(M)); std::complex Ar(T(ar) / T(M), T(ai) / T(M)); std::complex Br(T(br) / T(M), T(bi) / T(M)); cutlass::complex add_e = Ae + Be; cutlass::complex sub_e = Ae - Be; cutlass::complex mul_e = Ae * Be; std::complex add_r = (Ar + Br); std::complex sub_r = (Ar - Br); std::complex mul_r = (Ar * Br); EXPECT_EQ(real(add_e), real(add_r)); EXPECT_EQ(imag(add_e), imag(add_r)); EXPECT_EQ(real(sub_e), real(sub_r)); EXPECT_EQ(imag(sub_e), imag(sub_r)); EXPECT_EQ(real(mul_e), real(mul_r)); EXPECT_EQ(imag(mul_e), imag(mul_r)); if (!(br == 0 && bi == 0)) { cutlass::complex div_e = Ae / Be; std::complex div_r = Ar / Br; T const kRange = T(0.001); EXPECT_NEAR(real(div_e), real(div_r), kRange); EXPECT_NEAR(imag(div_e), imag(div_r), kRange); } } } } } } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(complex, host_float) { test::ComplexOperators test; } //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(complex, host_double) { test::ComplexOperators test; } /////////////////////////////////////////////////////////////////////////////////////////////////