cutlass/test/unit/core/complex.cu
Vijay Thakkar be60a0b272
CUTLASS 3.5.1 (#1623)
* CUTLASS 3.5.1

* updates, optimizations, fixes
2024-07-29 08:46:24 -04:00

567 lines
18 KiB
Plaintext

/***************************************************************************************************
* Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
/*! \file
\brief CUTLASS host-device template for complex numbers supporting all CUTLASS numeric types.
*/
#include <complex>
#include <cuda/std/complex>
#include "../common/cutlass_unit_test.h"
#include "cutlass/complex.h"
#include "cutlass/constants.h"
#include "cutlass/numeric_conversion.h"
#include "cutlass/tfloat32.h"
#include <type_traits>
/////////////////////////////////////////////////////////////////////////////////////////////////
TEST(complex, f64_to_f32_conversion) {
cutlass::complex<double> source = {1.5, -1.25};
cutlass::complex<float> dest = cutlass::complex<float>(source); // explicit conversion
EXPECT_TRUE(source.real() == 1.5 && source.imag() == -1.25 &&
dest.real() == 1.5f && dest.imag() == -1.25f);
}
/////////////////////////////////////////////////////////////////////////////////////////////////
TEST(complex, f32_to_f64_conversion) {
cutlass::complex<float> source = {-1.5f, 1.25f};
cutlass::complex<double> dest = source; // implicit conversion
EXPECT_TRUE(source.real() == -1.5f && source.imag() == 1.25f &&
dest.real() == -1.5 && dest.imag() == 1.25);
}
/////////////////////////////////////////////////////////////////////////////////////////////////
TEST(complex, s32_to_f64_conversion) {
cutlass::complex<int> source = {-2, 1};
cutlass::complex<double> dest = source; // implicit conversion
EXPECT_TRUE(source.real() == -2 && source.imag() == 1 &&
dest.real() == -2 && dest.imag() == 1);
}
/////////////////////////////////////////////////////////////////////////////////////////////////
TEST(complex, f16_to_f32_conversion) {
cutlass::complex<cutlass::half_t> source = {1.5_hf, -1.25_hf};
cutlass::complex<float> dest = cutlass::complex<float>(source); // explicit conversion
EXPECT_TRUE(source.real() == 1.5_hf && source.imag() == -1.25_hf &&
dest.real() == 1.5f && dest.imag() == -1.25f);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
TEST(complex, exp_f32) {
cutlass::complex<float> Z[] = {
{1, 1},
{2 , cutlass::constants::pi<float>()/2.0f },
{0.5f, cutlass::constants::pi<float>() },
{0.25f, cutlass::constants::pi<float>()*3/4.0f },
{0, 0},
};
cutlass::complex<double> Expected[] = {
{1.4686939399158851, 2.2873552871788423},
{4.524491950137825e-16, 7.38905609893065},
{-1.6487212707001282, 2.019101226849069e-16},
{-0.9079430793557842, 0.9079430793557843},
{1, 0}
};
double tolerance = 0.00001;
for (int i = 0; cutlass::real(Z[i]) != 0.0f; ++i) {
double e_r = cutlass::real(Expected[i]);
double e_i = cutlass::real(Expected[i]);
cutlass::complex<float> got = cutlass::exp(Z[i]);
float g_r = cutlass::real(got);
float g_i = cutlass::real(got);
EXPECT_TRUE(
std::abs(g_r - e_r) < tolerance && std::abs(g_i - e_i) < tolerance
) << "Expected(" << Expected[i] << "), Got(" << got << ")";
}
}
TEST(complex, absolute_value_real_and_imag) {
{
cutlass::complex z_d{3.0, 4.0};
auto abs_d = cutlass::abs(z_d);
static_assert(std::is_same_v<decltype(abs_d), double>);
EXPECT_EQ(abs_d, 5.0);
auto real_d = cutlass::real(z_d);
static_assert(std::is_same_v<decltype(real_d), double>);
EXPECT_EQ(real_d, 3.0);
auto imag_d = cutlass::imag(z_d);
static_assert(std::is_same_v<decltype(imag_d), double>);
EXPECT_EQ(imag_d, 4.0);
}
{
cutlass::complex z_f{3.0f, 4.0f};
auto abs_f = cutlass::abs(z_f);
static_assert(std::is_same_v<decltype(abs_f), float>);
EXPECT_EQ(abs_f, 5.0f);
auto real_f = cutlass::real(z_f);
static_assert(std::is_same_v<decltype(real_f), float>);
EXPECT_EQ(real_f, 3.0f);
auto imag_f = cutlass::imag(z_f);
static_assert(std::is_same_v<decltype(imag_f), float>);
EXPECT_EQ(imag_f, 4.0f);
}
{
cutlass::complex z_tf32{cutlass::tfloat32_t{3.0f}, cutlass::tfloat32_t{4.0f}};
auto abs_tf32 = cutlass::abs(z_tf32);
static_assert(std::is_same_v<decltype(abs_tf32), cutlass::tfloat32_t>);
EXPECT_EQ(abs_tf32, cutlass::tfloat32_t{5.0f});
auto real_tf32 = cutlass::real(z_tf32);
static_assert(std::is_same_v<decltype(real_tf32), cutlass::tfloat32_t>);
EXPECT_EQ(real_tf32, cutlass::tfloat32_t{3.0f});
auto imag_tf32 = cutlass::imag(z_tf32);
static_assert(std::is_same_v<decltype(imag_tf32), cutlass::tfloat32_t>);
EXPECT_EQ(imag_tf32, cutlass::tfloat32_t{4.0f});
}
{
cutlass::complex z_i{3, 4};
// sqrt(int) isn't a valid overload, so cutlass::abs isn't tested.
auto real_i = cutlass::real(z_i);
static_assert(std::is_same_v<decltype(real_i), int>);
EXPECT_EQ(real_i, 3);
auto imag_i = cutlass::imag(z_i);
static_assert(std::is_same_v<decltype(imag_i), int>);
EXPECT_EQ(imag_i, 4);
}
{
double x_d{3.0};
auto real_d = cutlass::real(x_d);
static_assert(std::is_same_v<decltype(real_d), double>);
EXPECT_EQ(real_d, 3.0);
auto imag_d = cutlass::imag(x_d);
static_assert(std::is_same_v<decltype(imag_d), double>);
EXPECT_EQ(imag_d, 0.0);
}
{
float x_f{3.0f};
auto real_f = cutlass::real(x_f);
static_assert(std::is_same_v<decltype(real_f), float>);
EXPECT_EQ(real_f, 3.0f);
auto imag_f = cutlass::imag(x_f);
static_assert(std::is_same_v<decltype(imag_f), float>);
EXPECT_EQ(imag_f, 0.0f);
}
{
cutlass::tfloat32_t x_tf32{3.0f};
auto real_tf32 = cutlass::real(x_tf32);
static_assert(std::is_same_v<decltype(real_tf32), cutlass::tfloat32_t>);
EXPECT_EQ(real_tf32, cutlass::tfloat32_t{3.0f});
auto imag_tf32 = cutlass::imag(x_tf32);
static_assert(std::is_same_v<decltype(imag_tf32), cutlass::tfloat32_t>);
EXPECT_EQ(imag_tf32, cutlass::tfloat32_t{0.0f});
}
{
int x_i{3};
auto real_i = cutlass::real(x_i);
static_assert(std::is_same_v<decltype(real_i), int>);
EXPECT_EQ(real_i, 3);
auto imag_i = cutlass::imag(x_i);
static_assert(std::is_same_v<decltype(imag_i), int>);
EXPECT_EQ(imag_i, 0);
}
}
// FakeReal and FakeComplex test whether cutlass::real and
// cutlass::imag correctly handle user-defined non-complex
// and complex number types.
namespace test {
// These classes have no conversions to or from arithmetic types, so
// that the test can ensure that the implementation does not silently
// convert to, say, float or int.
class FakeReal {
public:
// cutlass::imag must be able to value-construct its noncomplex input.
FakeReal() = default;
static CUTLASS_HOST_DEVICE FakeReal make_FakeReal(int val) {
return FakeReal{val};
}
friend CUTLASS_HOST_DEVICE bool operator==(FakeReal lhs, FakeReal rhs) {
return lhs.value_ == rhs.value_;
}
friend CUTLASS_HOST_DEVICE FakeReal operator-(FakeReal const& x) {
return make_FakeReal(-x.value_);
}
private:
CUTLASS_HOST_DEVICE FakeReal(int val) : value_(val) {}
int value_ = 0;
};
class FakeComplex {
public:
static CUTLASS_HOST_DEVICE FakeComplex
make_FakeComplex(FakeReal re, FakeReal im) {
return FakeComplex{re, im};
}
// Existence of member functions real and imag tell
// CUTLASS that FakeComplex is a complex number type.
CUTLASS_HOST_DEVICE FakeReal real() const { return real_; }
CUTLASS_HOST_DEVICE FakeReal imag() const { return imag_; }
friend CUTLASS_HOST_DEVICE bool operator==(FakeComplex lhs, FakeComplex rhs) {
return lhs.real_ == rhs.real_ && lhs.imag_ == rhs.imag_;
}
private:
CUTLASS_HOST_DEVICE FakeComplex(FakeReal re, FakeReal im)
: real_(re), imag_(im)
{}
FakeReal real_{};
FakeReal imag_{};
};
CUTLASS_HOST_DEVICE FakeComplex conj(FakeComplex const& z) {
return FakeComplex::make_FakeComplex(z.real(), -z.imag());
}
// Variant of FakeComplex that has a hidden friend conj instead of a
// nonmember conj defined outside the class.
class FakeComplexWithHiddenFriendConj {
public:
static CUTLASS_HOST_DEVICE FakeComplexWithHiddenFriendConj
make_FakeComplexWithHiddenFriendConj(FakeReal re, FakeReal im) {
return FakeComplexWithHiddenFriendConj{re, im};
}
CUTLASS_HOST_DEVICE FakeReal real() const { return real_; }
CUTLASS_HOST_DEVICE FakeReal imag() const { return imag_; }
friend CUTLASS_HOST_DEVICE bool
operator==(FakeComplexWithHiddenFriendConj lhs,
FakeComplexWithHiddenFriendConj rhs)
{
return lhs.real_ == rhs.real_ && lhs.imag_ == rhs.imag_;
}
friend CUTLASS_HOST_DEVICE FakeComplexWithHiddenFriendConj
conj(FakeComplexWithHiddenFriendConj const& z) {
return FakeComplexWithHiddenFriendConj::make_FakeComplexWithHiddenFriendConj(z.real(), -z.imag());
}
private:
CUTLASS_HOST_DEVICE
FakeComplexWithHiddenFriendConj(FakeReal re, FakeReal im)
: real_(re), imag_(im)
{}
FakeReal real_{};
FakeReal imag_{};
};
} // namespace test
TEST(complex, real_and_imag_with_custom_types) {
using test::FakeReal;
using test::FakeComplex;
{
FakeReal x = FakeReal::make_FakeReal(42);
auto x_r = cutlass::real(x);
static_assert(std::is_same_v<decltype(x_r), FakeReal>);
EXPECT_EQ(x_r, FakeReal::make_FakeReal(42));
auto x_i = cutlass::imag(x);
static_assert(std::is_same_v<decltype(x_i), FakeReal>);
EXPECT_EQ(x_i, FakeReal::make_FakeReal(0));
}
{
FakeComplex z = FakeComplex::make_FakeComplex(
FakeReal::make_FakeReal(3), FakeReal::make_FakeReal(4));
auto z_r = cutlass::real(z);
static_assert(std::is_same_v<decltype(z_r), FakeReal>);
EXPECT_EQ(z_r, FakeReal::make_FakeReal(3));
auto z_i = cutlass::imag(z);
static_assert(std::is_same_v<decltype(z_i), FakeReal>);
EXPECT_EQ(z_i, FakeReal::make_FakeReal(4));
}
}
namespace test {
template<class T>
void conj_tester(T z, T z_c_expected, const char type_name[]) {
// Use cutlass::conj just like std::swap (the "std::swap two-step").
using cutlass::conj;
auto z_c = conj(z);
static_assert(std::is_same_v<decltype(z_c), T>);
constexpr bool is_cuComplex = std::is_same_v<T, cuDoubleComplex> ||
std::is_same_v<T, cuFloatComplex>;
if constexpr (is_cuComplex) {
EXPECT_EQ(z_c.x, z_c_expected.x);
EXPECT_EQ(z_c.y, z_c_expected.y) << "conj failed for type " << type_name;
}
else {
EXPECT_EQ(z_c, z_c_expected) << "conj failed for type " << type_name;
}
auto z_c2 = cutlass::conjugate<T>{}(z);
static_assert(std::is_same_v<decltype(z_c2), T>);
if constexpr (is_cuComplex) {
// cuFloatComplex and cuDoubleComplex don't report conj(z) as
// being well-formed, probably because they are type aliases of
// some kind. cutlass::conj works fine, though!
static_assert(! cutlass::platform::is_arithmetic_v<T> &&
(cutlass::detail::has_unqualified_conj_v<T> ||
cutlass::detail::has_cutlass_conj_v<T>));
EXPECT_EQ(z_c2.x, z_c_expected.x);
EXPECT_EQ(z_c2.y, z_c_expected.y)
<< "conjugate failed for type " << type_name;
}
else {
EXPECT_EQ(z_c2, z_c_expected) << "conjugate failed for type " << type_name;
}
}
} // namespace test
TEST(complex, conj_with_standard_arithmetic_types) {
{
double x = 42.0;
double x_c_expected = 42.0;
test::conj_tester(x, x_c_expected, "double");
}
{
float x = 42.0f;
float x_c_expected = 42.0f;
test::conj_tester(x, x_c_expected, "float");
}
{
int x = 42;
int x_c_expected = 42;
test::conj_tester(x, x_c_expected, "int");
}
}
TEST(complex, conj_with_cutlass_complex_types) {
{
cutlass::complex<double> z{3.0, 4.0};
cutlass::complex<double> z_c_expected{3.0, -4.0};
test::conj_tester(z, z_c_expected, "cutlass::complex<double>");
}
{
cutlass::complex<float> z{3.0f, 4.0f};
cutlass::complex<float> z_c_expected{3.0f, -4.0f};
test::conj_tester(z, z_c_expected, "cutlass::complex<float>");
}
{
cutlass::complex<cutlass::tfloat32_t> z{
cutlass::tfloat32_t{3.0f}, cutlass::tfloat32_t{4.0f}};
cutlass::complex<cutlass::tfloat32_t> z_c_expected{
cutlass::tfloat32_t{3.0f}, cutlass::tfloat32_t{-4.0f}};
test::conj_tester(z, z_c_expected, "cutlass::complex<cutlass::tfloat32_t>");
}
}
TEST(complex, conj_with_noncomplex_type_not_in_cutlass_namespace) {
test::FakeReal x = test::FakeReal::make_FakeReal(42);
test::FakeReal x_c_expected = test::FakeReal::make_FakeReal(42);
test::conj_tester(x, x_c_expected, "test::FakeReal");
}
TEST(complex, conj_with_noncomplex_type_in_cutlass_namespace) {
cutlass::tfloat32_t x{42.0f};
cutlass::tfloat32_t x_c_expected{42.0f};
test::conj_tester(x, x_c_expected, "cutlass::tfloat32_t");
}
TEST(complex, conj_with_complex_types_not_in_cutlass_namespace) {
using test::FakeReal;
// conj defined as nonmember outside the class
{
test::FakeComplex z = test::FakeComplex::make_FakeComplex(
FakeReal::make_FakeReal(3), FakeReal::make_FakeReal(4));
test::FakeComplex z_c_expected = test::FakeComplex::make_FakeComplex(
FakeReal::make_FakeReal(3), FakeReal::make_FakeReal(-4));
test::conj_tester(z, z_c_expected, "test::FakeComplex");
}
// conj defined as hidden friend
{
test::FakeComplexWithHiddenFriendConj z =
test::FakeComplexWithHiddenFriendConj::make_FakeComplexWithHiddenFriendConj(
FakeReal::make_FakeReal(3),
FakeReal::make_FakeReal(4));
test::FakeComplexWithHiddenFriendConj z_c_expected =
test::FakeComplexWithHiddenFriendConj::make_FakeComplexWithHiddenFriendConj(
FakeReal::make_FakeReal(3),
FakeReal::make_FakeReal(-4));
test::conj_tester(z, z_c_expected, "test::FakeComplexWithHiddenFriendConj");
}
}
TEST(complex, conj_with_cuda_std_complex_types) {
{
cuda::std::complex<double> z{3.0, 4.0};
cuda::std::complex<double> z_c_expected{3.0, -4.0};
test::conj_tester(z, z_c_expected, "cuda::std::complex<double>");
}
{
cuda::std::complex<float> z{3.0f, 4.0f};
cuda::std::complex<float> z_c_expected{3.0f, -4.0f};
test::conj_tester(z, z_c_expected, "cuda::std::complex<float>");
}
}
TEST(complex, conj_with_cuComplex_types) {
{
cuDoubleComplex z = make_cuDoubleComplex(3.0, 4.0);
cuDoubleComplex z_c_expected = make_cuDoubleComplex(3.0, -4.0);
test::conj_tester(z, z_c_expected, "cuDoubleComplex");
}
{
cuFloatComplex z = make_cuFloatComplex(3.0f, 4.0f);
cuFloatComplex z_c_expected = make_cuFloatComplex(3.0f, -4.0f);
test::conj_tester(z, z_c_expected, "cuFloatComplex");
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
namespace test {
/// Thorough testing for basic complex math operators. Uses std::complex as a reference.
template <typename T, int N, int M>
struct ComplexOperators {
ComplexOperators() {
for (int ar = -N; ar <= N; ++ar) {
for (int ai = -N; ai <= N; ++ai) {
for (int br = -N; br <= N; ++br) {
for (int bi = -N; bi <= N; ++bi) {
cutlass::complex<T> Ae(T(ar) / T(M), T(ai) / T(M));
cutlass::complex<T> Be(T(br) / T(M), T(bi) / T(M));
std::complex<T> Ar(T(ar) / T(M), T(ai) / T(M));
std::complex<T> Br(T(br) / T(M), T(bi) / T(M));
cutlass::complex<T> add_e = Ae + Be;
cutlass::complex<T> sub_e = Ae - Be;
cutlass::complex<T> mul_e = Ae * Be;
std::complex<T> add_r = (Ar + Br);
std::complex<T> sub_r = (Ar - Br);
std::complex<T> mul_r = (Ar * Br);
EXPECT_EQ(real(add_e), real(add_r));
EXPECT_EQ(imag(add_e), imag(add_r));
EXPECT_EQ(real(sub_e), real(sub_r));
EXPECT_EQ(imag(sub_e), imag(sub_r));
EXPECT_EQ(real(mul_e), real(mul_r));
EXPECT_EQ(imag(mul_e), imag(mul_r));
if (!(br == 0 && bi == 0)) {
cutlass::complex<T> div_e = Ae / Be;
std::complex<T> div_r = Ar / Br;
T const kRange = T(0.001);
EXPECT_NEAR(real(div_e), real(div_r), kRange);
EXPECT_NEAR(imag(div_e), imag(div_r), kRange);
}
}
}
}
}
}
};
}
////////////////////////////////////////////////////////////////////////////////////////////////////
TEST(complex, host_float) {
test::ComplexOperators<float, 32, 8> test;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
TEST(complex, host_double) {
test::ComplexOperators<double, 32, 8> test;
}
/////////////////////////////////////////////////////////////////////////////////////////////////