cutlass/include/cutlass/constants.h

1240 lines
47 KiB
C
Raw Normal View History

/***************************************************************************************************
2022-04-24 03:02:38 +08:00
* Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
/* \file
\brief Boost-style constant definitions for floating-point types.
*/
#pragma once
#include "cutlass/cutlass.h"
#include "cutlass/numeric_types.h"
#include "cutlass/complex.h"
///////////////////////////////////////////////////////////////////////////////////
namespace cutlass {
namespace constants {
///////////////////////////////////////////////////////////////////////////////////
//
// Primary templates
//
/// Returns 1, the multiplicative identity element
template <typename T> CUTLASS_HOST_DEVICE T one();
/// Returns 0, the additive identity element
template <typename T> CUTLASS_HOST_DEVICE T zero();
/// Returns 2
template <typename T> CUTLASS_HOST_DEVICE T two();
/// Returns pi, approximately 3.141
template <typename T> CUTLASS_HOST_DEVICE T pi();
/// Returns 2 * pi
template <typename T> CUTLASS_HOST_DEVICE T two_pi();
/// Returns pi / 2
template <typename T> CUTLASS_HOST_DEVICE T half_pi();
/// Returns sqrt(pi)
template <typename T> CUTLASS_HOST_DEVICE T root_pi();
/// Returns sqrt(pi / 2)
template <typename T> CUTLASS_HOST_DEVICE T root_half_pi();
/// Returns sqrt(2 * pi)
template <typename T> CUTLASS_HOST_DEVICE T root_two_pi();
/// Returns sqrt(ln(4))
template <typename T> CUTLASS_HOST_DEVICE T root_ln_four();
/// Returns e, approximately 2.718...
template <typename T> CUTLASS_HOST_DEVICE T e();
/// Returns (1/2)
template <typename T> CUTLASS_HOST_DEVICE T half();
/// Returns sqrt(2), approximately 1.414...
template <typename T> CUTLASS_HOST_DEVICE T root_two();
/// Returns sqrt(2)/2, approximately 0.707...
template <typename T> CUTLASS_HOST_DEVICE T half_root_two();
/// Returns ln(2), approximately 0.693...
template <typename T> CUTLASS_HOST_DEVICE T ln_two();
/// Returns ln(ln(2)), approximately -0.3665...
template <typename T> CUTLASS_HOST_DEVICE T ln_ln_two();
/// Returns 1/3, approximately 0.333...
template <typename T> CUTLASS_HOST_DEVICE T third();
/// Returns 2/3, approximately 0.666...
template <typename T> CUTLASS_HOST_DEVICE T twothirds();
/// Returns pi - 3, approximately 0.1416...
template <typename T> CUTLASS_HOST_DEVICE T pi_minus_three();
/// Returns 4 - pi, approximately 0.858...
template <typename T> CUTLASS_HOST_DEVICE T four_minus_pi();
/////////////////////////////////////////////////////////////////////////////////////
// Specialization for double
/// Returns 1, the multiplicative identity element (specialization for double)
template <> CUTLASS_HOST_DEVICE double one<double>() {
uint64_t bits = 0x3ff0000000000000ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns 1, the multiplicative identity element (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> one< complex<double> >() {
return complex<double>(one<double>(), double());
}
/// Returns 0, the additive identity element (specialization for double)
template <> CUTLASS_HOST_DEVICE double zero<double>() {
uint64_t bits = 0x0ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns 0, the additive identity element (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> zero< complex<double> >() {
return complex<double>(zero<double>(), double());
}
/// Returns 2 (specialization for double)
template <> CUTLASS_HOST_DEVICE double two<double>() {
uint64_t bits = 0x4000000000000000ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns 2 (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> two< complex<double> >() {
return complex<double>(two<double>(), double());
}
/// Returns pi, approximately 3.141 (specialization for double)
template <> CUTLASS_HOST_DEVICE double pi<double>() {
uint64_t bits = 0x400921fb54442d18ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns pi, approximately 3.141 (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> pi< complex<double> >() {
return complex<double>(pi<double>(), double());
}
/// Returns 2 * pi (specialization for double)
template <> CUTLASS_HOST_DEVICE double two_pi<double>() {
uint64_t bits = 0x401921fb54442d18ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns 2 * pi (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> two_pi< complex<double> >() {
return complex<double>(two_pi<double>(), double());
}
/// Returns pi / 2 (specialization for double)
template <> CUTLASS_HOST_DEVICE double half_pi<double>() {
uint64_t bits = 0x3ff921fb54442d18ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns pi / 2 (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> half_pi< complex<double> >() {
return complex<double>(half_pi<double>(), double());
}
/// Returns sqrt(pi) (specialization for double)
template <> CUTLASS_HOST_DEVICE double root_pi<double>() {
uint64_t bits = 0x3ffc5bf891b4ef6aull;
return reinterpret_cast<double const &>(bits);
}
/// Returns sqrt(pi) (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> root_pi< complex<double> >() {
return complex<double>(root_pi<double>(), double());
}
/// Returns sqrt(pi / 2) (specialization for double)
template <> CUTLASS_HOST_DEVICE double root_half_pi<double>() {
uint64_t bits = 0x3ff40d931ff62705ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns sqrt(pi / 2) (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> root_half_pi< complex<double> >() {
return complex<double>(root_half_pi<double>(), double());
}
/// Returns sqrt(2 * pi) (specialization for double)
template <> CUTLASS_HOST_DEVICE double root_two_pi<double>() {
uint64_t bits = 0x40040d931ff62705ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns sqrt(2 * pi) (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> root_two_pi< complex<double> >() {
return complex<double>(root_two_pi<double>(), double());
}
/// Returns sqrt(ln(4)) (specialization for double)
template <> CUTLASS_HOST_DEVICE double root_ln_four<double>() {
uint64_t bits = 0x3ff2d6abe44afc43ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns sqrt(ln(4)) (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> root_ln_four< complex<double> >() {
return complex<double>(root_ln_four<double>(), double());
}
/// Returns e, approximately 2.718... (specialization for double)
template <> CUTLASS_HOST_DEVICE double e<double>() {
uint64_t bits = 0x4005bf0a8b145769ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns e, approximately 2.718... (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> e< complex<double> >() {
return complex<double>(e<double>(), double());
}
/// Returns (1/2) (specialization for double)
template <> CUTLASS_HOST_DEVICE double half<double>() {
uint64_t bits = 0x3fe0000000000000ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns (1/2) (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> half< complex<double> >() {
return complex<double>(half<double>(), double());
}
/// Returns sqrt(2), approximately 1.414... (specialization for double)
template <> CUTLASS_HOST_DEVICE double root_two<double>() {
uint64_t bits = 0x3ff6a09e667f3bcdull;
return reinterpret_cast<double const &>(bits);
}
/// Returns sqrt(2), approximately 1.414... (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> root_two< complex<double> >() {
return complex<double>(root_two<double>(), double());
}
/// Returns sqrt(2)/2, approximately 0.707... (specialization for double)
template <> CUTLASS_HOST_DEVICE double half_root_two<double>() {
uint64_t bits = 0x3fe6a09e667f3bcdull;
return reinterpret_cast<double const &>(bits);
}
/// Returns sqrt(2)/2, approximately 0.707... (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> half_root_two< complex<double> >() {
return complex<double>(half_root_two<double>(), double());
}
/// Returns ln(2), approximately 0.693... (specialization for double)
template <> CUTLASS_HOST_DEVICE double ln_two<double>() {
uint64_t bits = 0x3fe62e42fefa39efull;
return reinterpret_cast<double const &>(bits);
}
/// Returns ln(2), approximately 0.693... (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> ln_two< complex<double> >() {
return complex<double>(ln_two<double>(), double());
}
/// Returns ln(ln(2)), approximately -0.3665... (specialization for double)
template <> CUTLASS_HOST_DEVICE double ln_ln_two<double>() {
uint64_t bits = 0xbfd774f29bdd6b9full;
return reinterpret_cast<double const &>(bits);
}
/// Returns ln(ln(2)), approximately -0.3665... (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> ln_ln_two< complex<double> >() {
return complex<double>(ln_ln_two<double>(), double());
}
/// Returns 1/3, approximately 0.333... (specialization for double)
template <> CUTLASS_HOST_DEVICE double third<double>() {
uint64_t bits = 0x3fd5555555555555ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns 1/3, approximately 0.333... (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> third< complex<double> >() {
return complex<double>(third<double>(), double());
}
/// Returns 2/3, approximately 0.666... (specialization for double)
template <> CUTLASS_HOST_DEVICE double twothirds<double>() {
uint64_t bits = 0x3fe5555555555555ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns 2/3, approximately 0.666... (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> twothirds< complex<double> >() {
return complex<double>(twothirds<double>(), double());
}
/// Returns pi - 3, approximately 0.1416... (specialization for double)
template <> CUTLASS_HOST_DEVICE double pi_minus_three<double>() {
uint64_t bits = 0x3fc21fb54442d180ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns pi - 3, approximately 0.1416... (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> pi_minus_three< complex<double> >() {
return complex<double>(pi_minus_three<double>(), double());
}
/// Returns 4 - pi, approximately 0.858... (specialization for double)
template <> CUTLASS_HOST_DEVICE double four_minus_pi<double>() {
uint64_t bits = 0x3feb7812aeef4ba0ull;
return reinterpret_cast<double const &>(bits);
}
/// Returns 4 - pi, approximately 0.858... (specialization for complex<double>)
template <> CUTLASS_HOST_DEVICE complex<double> four_minus_pi< complex<double> >() {
return complex<double>(four_minus_pi<double>(), double());
}
/////////////////////////////////////////////////////////////////////////////////////
// Specialization for float
/// Returns 1, the multiplicative identity element (specialization for float)
template <> CUTLASS_HOST_DEVICE float one<float>() {
uint32_t bits = 0x3f800000u;
return reinterpret_cast<float const &>(bits);
}
/// Returns 1, the multiplicative identity element (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> one< complex<float> >() {
return complex<float>(one<float>(), float());
}
/// Returns 0, the additive identity element (specialization for float)
template <> CUTLASS_HOST_DEVICE float zero<float>() {
uint32_t bits = 0x0u;
return reinterpret_cast<float const &>(bits);
}
/// Returns 0, the additive identity element (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> zero< complex<float> >() {
return complex<float>(zero<float>(), float());
}
/// Returns 2 (specialization for float)
template <> CUTLASS_HOST_DEVICE float two<float>() {
uint32_t bits = 0x40000000u;
return reinterpret_cast<float const &>(bits);
}
/// Returns 2 (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> two< complex<float> >() {
return complex<float>(two<float>(), float());
}
/// Returns pi, approximately 3.141 (specialization for float)
template <> CUTLASS_HOST_DEVICE float pi<float>() {
uint32_t bits = 0x40490fdbu;
return reinterpret_cast<float const &>(bits);
}
/// Returns pi, approximately 3.141 (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> pi< complex<float> >() {
return complex<float>(pi<float>(), float());
}
/// Returns 2 * pi (specialization for float)
template <> CUTLASS_HOST_DEVICE float two_pi<float>() {
uint32_t bits = 0x40c90fdbu;
return reinterpret_cast<float const &>(bits);
}
/// Returns 2 * pi (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> two_pi< complex<float> >() {
return complex<float>(two_pi<float>(), float());
}
/// Returns pi / 2 (specialization for float)
template <> CUTLASS_HOST_DEVICE float half_pi<float>() {
uint32_t bits = 0x3fc90fdbu;
return reinterpret_cast<float const &>(bits);
}
/// Returns pi / 2 (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> half_pi< complex<float> >() {
return complex<float>(half_pi<float>(), float());
}
/// Returns sqrt(pi) (specialization for float)
template <> CUTLASS_HOST_DEVICE float root_pi<float>() {
uint32_t bits = 0x3fe2dfc5u;
return reinterpret_cast<float const &>(bits);
}
/// Returns sqrt(pi) (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> root_pi< complex<float> >() {
return complex<float>(root_pi<float>(), float());
}
/// Returns sqrt(pi / 2) (specialization for float)
template <> CUTLASS_HOST_DEVICE float root_half_pi<float>() {
uint32_t bits = 0x3fa06c99u;
return reinterpret_cast<float const &>(bits);
}
/// Returns sqrt(pi / 2) (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> root_half_pi< complex<float> >() {
return complex<float>(root_half_pi<float>(), float());
}
/// Returns sqrt(2 * pi) (specialization for float)
template <> CUTLASS_HOST_DEVICE float root_two_pi<float>() {
uint32_t bits = 0x40206c99u;
return reinterpret_cast<float const &>(bits);
}
/// Returns sqrt(2 * pi) (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> root_two_pi< complex<float> >() {
return complex<float>(root_two_pi<float>(), float());
}
/// Returns sqrt(ln(4)) (specialization for float)
template <> CUTLASS_HOST_DEVICE float root_ln_four<float>() {
uint32_t bits = 0x3f96b55fu;
return reinterpret_cast<float const &>(bits);
}
/// Returns sqrt(ln(4)) (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> root_ln_four< complex<float> >() {
return complex<float>(root_ln_four<float>(), float());
}
/// Returns e, approximately 2.718... (specialization for float)
template <> CUTLASS_HOST_DEVICE float e<float>() {
uint32_t bits = 0x402df854u;
return reinterpret_cast<float const &>(bits);
}
/// Returns e, approximately 2.718... (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> e< complex<float> >() {
return complex<float>(e<float>(), float());
}
/// Returns (1/2) (specialization for float)
template <> CUTLASS_HOST_DEVICE float half<float>() {
uint32_t bits = 0x3f000000u;
return reinterpret_cast<float const &>(bits);
}
/// Returns (1/2) (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> half< complex<float> >() {
return complex<float>(half<float>(), float());
}
/// Returns sqrt(2), approximately 1.414... (specialization for float)
template <> CUTLASS_HOST_DEVICE float root_two<float>() {
uint32_t bits = 0x3fb504f3u;
return reinterpret_cast<float const &>(bits);
}
/// Returns sqrt(2), approximately 1.414... (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> root_two< complex<float> >() {
return complex<float>(root_two<float>(), float());
}
/// Returns sqrt(2)/2, approximately 0.707... (specialization for float)
template <> CUTLASS_HOST_DEVICE float half_root_two<float>() {
uint32_t bits = 0x3f3504f3u;
return reinterpret_cast<float const &>(bits);
}
/// Returns sqrt(2)/2, approximately 0.707... (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> half_root_two< complex<float> >() {
return complex<float>(half_root_two<float>(), float());
}
/// Returns ln(2), approximately 0.693... (specialization for float)
template <> CUTLASS_HOST_DEVICE float ln_two<float>() {
uint32_t bits = 0x3f317218u;
return reinterpret_cast<float const &>(bits);
}
/// Returns ln(2), approximately 0.693... (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> ln_two< complex<float> >() {
return complex<float>(ln_two<float>(), float());
}
/// Returns ln(ln(2)), approximately -0.3665... (specialization for float)
template <> CUTLASS_HOST_DEVICE float ln_ln_two<float>() {
uint32_t bits = 0xbebba795u;
return reinterpret_cast<float const &>(bits);
}
/// Returns ln(ln(2)), approximately -0.3665... (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> ln_ln_two< complex<float> >() {
return complex<float>(ln_ln_two<float>(), float());
}
/// Returns 1/3, approximately 0.333... (specialization for float)
template <> CUTLASS_HOST_DEVICE float third<float>() {
uint32_t bits = 0x3eaaaaabu;
return reinterpret_cast<float const &>(bits);
}
/// Returns 1/3, approximately 0.333... (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> third< complex<float> >() {
return complex<float>(third<float>(), float());
}
/// Returns 2/3, approximately 0.666... (specialization for float)
template <> CUTLASS_HOST_DEVICE float twothirds<float>() {
uint32_t bits = 0x3f2aaaabu;
return reinterpret_cast<float const &>(bits);
}
/// Returns 2/3, approximately 0.666... (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> twothirds< complex<float> >() {
return complex<float>(twothirds<float>(), float());
}
/// Returns pi - 3, approximately 0.1416... (specialization for float)
template <> CUTLASS_HOST_DEVICE float pi_minus_three<float>() {
uint32_t bits = 0x3e10fdaau;
return reinterpret_cast<float const &>(bits);
}
/// Returns pi - 3, approximately 0.1416... (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> pi_minus_three< complex<float> >() {
return complex<float>(pi_minus_three<float>(), float());
}
/// Returns 4 - pi, approximately 0.858... (specialization for float)
template <> CUTLASS_HOST_DEVICE float four_minus_pi<float>() {
uint32_t bits = 0x3f5bc095u;
return reinterpret_cast<float const &>(bits);
}
/// Returns 4 - pi, approximately 0.858... (specialization for complex<float>)
template <> CUTLASS_HOST_DEVICE complex<float> four_minus_pi< complex<float> >() {
return complex<float>(four_minus_pi<float>(), float());
}
/////////////////////////////////////////////////////////////////////////////////////
// Specialization for tfloat32_t
/// Returns 1, the multiplicative identity element (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t one<tfloat32_t>() {
uint32_t bits = 0x3f801000u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns 1, the multiplicative identity element (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> one< complex<tfloat32_t> >() {
return complex<tfloat32_t>(one<tfloat32_t>(), tfloat32_t());
}
/// Returns 0, the additive identity element (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t zero<tfloat32_t>() {
uint32_t bits = 0x1000u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns 0, the additive identity element (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> zero< complex<tfloat32_t> >() {
return complex<tfloat32_t>(zero<tfloat32_t>(), tfloat32_t());
}
/// Returns 2 (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t two<tfloat32_t>() {
uint32_t bits = 0x40001000u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns 2 (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> two< complex<tfloat32_t> >() {
return complex<tfloat32_t>(two<tfloat32_t>(), tfloat32_t());
}
/// Returns pi, approximately 3.141 (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t pi<tfloat32_t>() {
uint32_t bits = 0x40491fdbu;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns pi, approximately 3.141 (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> pi< complex<tfloat32_t> >() {
return complex<tfloat32_t>(pi<tfloat32_t>(), tfloat32_t());
}
/// Returns 2 * pi (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t two_pi<tfloat32_t>() {
uint32_t bits = 0x40c91fdbu;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns 2 * pi (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> two_pi< complex<tfloat32_t> >() {
return complex<tfloat32_t>(two_pi<tfloat32_t>(), tfloat32_t());
}
/// Returns pi / 2 (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t half_pi<tfloat32_t>() {
uint32_t bits = 0x3fc91fdbu;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns pi / 2 (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> half_pi< complex<tfloat32_t> >() {
return complex<tfloat32_t>(half_pi<tfloat32_t>(), tfloat32_t());
}
/// Returns sqrt(pi) (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t root_pi<tfloat32_t>() {
uint32_t bits = 0x3fe2efc5u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns sqrt(pi) (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> root_pi< complex<tfloat32_t> >() {
return complex<tfloat32_t>(root_pi<tfloat32_t>(), tfloat32_t());
}
/// Returns sqrt(pi / 2) (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t root_half_pi<tfloat32_t>() {
uint32_t bits = 0x3fa07c99u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns sqrt(pi / 2) (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> root_half_pi< complex<tfloat32_t> >() {
return complex<tfloat32_t>(root_half_pi<tfloat32_t>(), tfloat32_t());
}
/// Returns sqrt(2 * pi) (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t root_two_pi<tfloat32_t>() {
uint32_t bits = 0x40207c99u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns sqrt(2 * pi) (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> root_two_pi< complex<tfloat32_t> >() {
return complex<tfloat32_t>(root_two_pi<tfloat32_t>(), tfloat32_t());
}
/// Returns sqrt(ln(4)) (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t root_ln_four<tfloat32_t>() {
uint32_t bits = 0x3f96c55fu;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns sqrt(ln(4)) (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> root_ln_four< complex<tfloat32_t> >() {
return complex<tfloat32_t>(root_ln_four<tfloat32_t>(), tfloat32_t());
}
/// Returns e, approximately 2.718... (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t e<tfloat32_t>() {
uint32_t bits = 0x402e0854u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns e, approximately 2.718... (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> e< complex<tfloat32_t> >() {
return complex<tfloat32_t>(e<tfloat32_t>(), tfloat32_t());
}
/// Returns (1/2) (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t half<tfloat32_t>() {
uint32_t bits = 0x3f001000u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns (1/2) (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> half< complex<tfloat32_t> >() {
return complex<tfloat32_t>(half<tfloat32_t>(), tfloat32_t());
}
/// Returns sqrt(2), approximately 1.414... (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t root_two<tfloat32_t>() {
uint32_t bits = 0x3fb514f3u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns sqrt(2), approximately 1.414... (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> root_two< complex<tfloat32_t> >() {
return complex<tfloat32_t>(root_two<tfloat32_t>(), tfloat32_t());
}
/// Returns sqrt(2)/2, approximately 0.707... (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t half_root_two<tfloat32_t>() {
uint32_t bits = 0x3f3514f3u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns sqrt(2)/2, approximately 0.707... (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> half_root_two< complex<tfloat32_t> >() {
return complex<tfloat32_t>(half_root_two<tfloat32_t>(), tfloat32_t());
}
/// Returns ln(2), approximately 0.693... (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t ln_two<tfloat32_t>() {
uint32_t bits = 0x3f318218u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns ln(2), approximately 0.693... (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> ln_two< complex<tfloat32_t> >() {
return complex<tfloat32_t>(ln_two<tfloat32_t>(), tfloat32_t());
}
/// Returns ln(ln(2)), approximately -0.3665... (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t ln_ln_two<tfloat32_t>() {
uint32_t bits = 0xbebbb795u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns ln(ln(2)), approximately -0.3665... (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> ln_ln_two< complex<tfloat32_t> >() {
return complex<tfloat32_t>(ln_ln_two<tfloat32_t>(), tfloat32_t());
}
/// Returns 1/3, approximately 0.333... (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t third<tfloat32_t>() {
uint32_t bits = 0x3eaabaabu;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns 1/3, approximately 0.333... (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> third< complex<tfloat32_t> >() {
return complex<tfloat32_t>(third<tfloat32_t>(), tfloat32_t());
}
/// Returns 2/3, approximately 0.666... (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t twothirds<tfloat32_t>() {
uint32_t bits = 0x3f2abaabu;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns 2/3, approximately 0.666... (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> twothirds< complex<tfloat32_t> >() {
return complex<tfloat32_t>(twothirds<tfloat32_t>(), tfloat32_t());
}
/// Returns pi - 3, approximately 0.1416... (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t pi_minus_three<tfloat32_t>() {
uint32_t bits = 0x3e110daau;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns pi - 3, approximately 0.1416... (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> pi_minus_three< complex<tfloat32_t> >() {
return complex<tfloat32_t>(pi_minus_three<tfloat32_t>(), tfloat32_t());
}
/// Returns 4 - pi, approximately 0.858... (specialization for tfloat32_t)
template <> CUTLASS_HOST_DEVICE tfloat32_t four_minus_pi<tfloat32_t>() {
uint32_t bits = 0x3f5bd095u;
return reinterpret_cast<tfloat32_t const &>(bits);
}
/// Returns 4 - pi, approximately 0.858... (specialization for complex<tfloat32_t>)
template <> CUTLASS_HOST_DEVICE complex<tfloat32_t> four_minus_pi< complex<tfloat32_t> >() {
return complex<tfloat32_t>(four_minus_pi<tfloat32_t>(), tfloat32_t());
}
/////////////////////////////////////////////////////////////////////////////////////
// Specialization for half_t
/// Returns 1, the multiplicative identity element (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t one<half_t>() {
uint16_t bits = 0x3c00u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns 1, the multiplicative identity element (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> one< complex<half_t> >() {
return complex<half_t>(one<half_t>(), half_t());
}
/// Returns 0, the additive identity element (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t zero<half_t>() {
uint16_t bits = 0x0u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns 0, the additive identity element (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> zero< complex<half_t> >() {
return complex<half_t>(zero<half_t>(), half_t());
}
/// Returns 2 (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t two<half_t>() {
uint16_t bits = 0x4000u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns 2 (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> two< complex<half_t> >() {
return complex<half_t>(two<half_t>(), half_t());
}
/// Returns pi, approximately 3.141 (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t pi<half_t>() {
uint16_t bits = 0x4248u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns pi, approximately 3.141 (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> pi< complex<half_t> >() {
return complex<half_t>(pi<half_t>(), half_t());
}
/// Returns 2 * pi (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t two_pi<half_t>() {
uint16_t bits = 0x4648u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns 2 * pi (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> two_pi< complex<half_t> >() {
return complex<half_t>(two_pi<half_t>(), half_t());
}
/// Returns pi / 2 (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t half_pi<half_t>() {
uint16_t bits = 0x3e48u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns pi / 2 (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> half_pi< complex<half_t> >() {
return complex<half_t>(half_pi<half_t>(), half_t());
}
/// Returns sqrt(pi) (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t root_pi<half_t>() {
uint16_t bits = 0x3f17u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns sqrt(pi) (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> root_pi< complex<half_t> >() {
return complex<half_t>(root_pi<half_t>(), half_t());
}
/// Returns sqrt(pi / 2) (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t root_half_pi<half_t>() {
uint16_t bits = 0x3d03u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns sqrt(pi / 2) (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> root_half_pi< complex<half_t> >() {
return complex<half_t>(root_half_pi<half_t>(), half_t());
}
/// Returns sqrt(2 * pi) (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t root_two_pi<half_t>() {
uint16_t bits = 0x4103u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns sqrt(2 * pi) (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> root_two_pi< complex<half_t> >() {
return complex<half_t>(root_two_pi<half_t>(), half_t());
}
/// Returns sqrt(ln(4)) (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t root_ln_four<half_t>() {
uint16_t bits = 0x3cb6u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns sqrt(ln(4)) (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> root_ln_four< complex<half_t> >() {
return complex<half_t>(root_ln_four<half_t>(), half_t());
}
/// Returns e, approximately 2.718... (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t e<half_t>() {
uint16_t bits = 0x4170u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns e, approximately 2.718... (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> e< complex<half_t> >() {
return complex<half_t>(e<half_t>(), half_t());
}
/// Returns (1/2) (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t half<half_t>() {
uint16_t bits = 0x3800u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns (1/2) (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> half< complex<half_t> >() {
return complex<half_t>(half<half_t>(), half_t());
}
/// Returns sqrt(2), approximately 1.414... (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t root_two<half_t>() {
uint16_t bits = 0x3da8u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns sqrt(2), approximately 1.414... (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> root_two< complex<half_t> >() {
return complex<half_t>(root_two<half_t>(), half_t());
}
/// Returns sqrt(2)/2, approximately 0.707... (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t half_root_two<half_t>() {
uint16_t bits = 0x39a8u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns sqrt(2)/2, approximately 0.707... (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> half_root_two< complex<half_t> >() {
return complex<half_t>(half_root_two<half_t>(), half_t());
}
/// Returns ln(2), approximately 0.693... (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t ln_two<half_t>() {
uint16_t bits = 0x398cu;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns ln(2), approximately 0.693... (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> ln_two< complex<half_t> >() {
return complex<half_t>(ln_two<half_t>(), half_t());
}
/// Returns ln(ln(2)), approximately -0.3665... (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t ln_ln_two<half_t>() {
uint16_t bits = 0xb5ddu;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns ln(ln(2)), approximately -0.3665... (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> ln_ln_two< complex<half_t> >() {
return complex<half_t>(ln_ln_two<half_t>(), half_t());
}
/// Returns 1/3, approximately 0.333... (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t third<half_t>() {
uint16_t bits = 0x3555u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns 1/3, approximately 0.333... (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> third< complex<half_t> >() {
return complex<half_t>(third<half_t>(), half_t());
}
/// Returns 2/3, approximately 0.666... (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t twothirds<half_t>() {
uint16_t bits = 0x3955u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns 2/3, approximately 0.666... (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> twothirds< complex<half_t> >() {
return complex<half_t>(twothirds<half_t>(), half_t());
}
/// Returns pi - 3, approximately 0.1416... (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t pi_minus_three<half_t>() {
uint16_t bits = 0x3088u;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns pi - 3, approximately 0.1416... (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> pi_minus_three< complex<half_t> >() {
return complex<half_t>(pi_minus_three<half_t>(), half_t());
}
/// Returns 4 - pi, approximately 0.858... (specialization for half_t)
template <> CUTLASS_HOST_DEVICE half_t four_minus_pi<half_t>() {
uint16_t bits = 0x3adeu;
return reinterpret_cast<half_t const &>(bits);
}
/// Returns 4 - pi, approximately 0.858... (specialization for complex<half_t>)
template <> CUTLASS_HOST_DEVICE complex<half_t> four_minus_pi< complex<half_t> >() {
return complex<half_t>(four_minus_pi<half_t>(), half_t());
}
/////////////////////////////////////////////////////////////////////////////////////
// Specialization for bfloat16_t
/// Returns 1, the multiplicative identity element (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t one<bfloat16_t>() {
uint16_t bits = 0x3f80u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns 1, the multiplicative identity element (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> one< complex<bfloat16_t> >() {
return complex<bfloat16_t>(one<bfloat16_t>(), bfloat16_t());
}
/// Returns 0, the additive identity element (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t zero<bfloat16_t>() {
uint16_t bits = 0x0u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns 0, the additive identity element (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> zero< complex<bfloat16_t> >() {
return complex<bfloat16_t>(zero<bfloat16_t>(), bfloat16_t());
}
/// Returns 2 (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t two<bfloat16_t>() {
uint16_t bits = 0x4000u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns 2 (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> two< complex<bfloat16_t> >() {
return complex<bfloat16_t>(two<bfloat16_t>(), bfloat16_t());
}
/// Returns pi, approximately 3.141 (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t pi<bfloat16_t>() {
uint16_t bits = 0x4049u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns pi, approximately 3.141 (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> pi< complex<bfloat16_t> >() {
return complex<bfloat16_t>(pi<bfloat16_t>(), bfloat16_t());
}
/// Returns 2 * pi (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t two_pi<bfloat16_t>() {
uint16_t bits = 0x40c9u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns 2 * pi (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> two_pi< complex<bfloat16_t> >() {
return complex<bfloat16_t>(two_pi<bfloat16_t>(), bfloat16_t());
}
/// Returns pi / 2 (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t half_pi<bfloat16_t>() {
uint16_t bits = 0x3fc9u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns pi / 2 (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> half_pi< complex<bfloat16_t> >() {
return complex<bfloat16_t>(half_pi<bfloat16_t>(), bfloat16_t());
}
/// Returns sqrt(pi) (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t root_pi<bfloat16_t>() {
uint16_t bits = 0x3fe3u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns sqrt(pi) (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> root_pi< complex<bfloat16_t> >() {
return complex<bfloat16_t>(root_pi<bfloat16_t>(), bfloat16_t());
}
/// Returns sqrt(pi / 2) (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t root_half_pi<bfloat16_t>() {
uint16_t bits = 0x3fa0u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns sqrt(pi / 2) (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> root_half_pi< complex<bfloat16_t> >() {
return complex<bfloat16_t>(root_half_pi<bfloat16_t>(), bfloat16_t());
}
/// Returns sqrt(2 * pi) (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t root_two_pi<bfloat16_t>() {
uint16_t bits = 0x4020u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns sqrt(2 * pi) (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> root_two_pi< complex<bfloat16_t> >() {
return complex<bfloat16_t>(root_two_pi<bfloat16_t>(), bfloat16_t());
}
/// Returns sqrt(ln(4)) (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t root_ln_four<bfloat16_t>() {
uint16_t bits = 0x3f97u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns sqrt(ln(4)) (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> root_ln_four< complex<bfloat16_t> >() {
return complex<bfloat16_t>(root_ln_four<bfloat16_t>(), bfloat16_t());
}
/// Returns e, approximately 2.718... (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t e<bfloat16_t>() {
uint16_t bits = 0x402eu;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns e, approximately 2.718... (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> e< complex<bfloat16_t> >() {
return complex<bfloat16_t>(e<bfloat16_t>(), bfloat16_t());
}
/// Returns (1/2) (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t half<bfloat16_t>() {
uint16_t bits = 0x3f00u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns (1/2) (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> half< complex<bfloat16_t> >() {
return complex<bfloat16_t>(half<bfloat16_t>(), bfloat16_t());
}
/// Returns sqrt(2), approximately 1.414... (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t root_two<bfloat16_t>() {
uint16_t bits = 0x3fb5u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns sqrt(2), approximately 1.414... (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> root_two< complex<bfloat16_t> >() {
return complex<bfloat16_t>(root_two<bfloat16_t>(), bfloat16_t());
}
/// Returns sqrt(2)/2, approximately 0.707... (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t half_root_two<bfloat16_t>() {
uint16_t bits = 0x3f35u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns sqrt(2)/2, approximately 0.707... (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> half_root_two< complex<bfloat16_t> >() {
return complex<bfloat16_t>(half_root_two<bfloat16_t>(), bfloat16_t());
}
/// Returns ln(2), approximately 0.693... (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t ln_two<bfloat16_t>() {
uint16_t bits = 0x3f31u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns ln(2), approximately 0.693... (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> ln_two< complex<bfloat16_t> >() {
return complex<bfloat16_t>(ln_two<bfloat16_t>(), bfloat16_t());
}
/// Returns ln(ln(2)), approximately -0.3665... (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t ln_ln_two<bfloat16_t>() {
uint16_t bits = 0xbebcu;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns ln(ln(2)), approximately -0.3665... (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> ln_ln_two< complex<bfloat16_t> >() {
return complex<bfloat16_t>(ln_ln_two<bfloat16_t>(), bfloat16_t());
}
/// Returns 1/3, approximately 0.333... (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t third<bfloat16_t>() {
uint16_t bits = 0x3eabu;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns 1/3, approximately 0.333... (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> third< complex<bfloat16_t> >() {
return complex<bfloat16_t>(third<bfloat16_t>(), bfloat16_t());
}
/// Returns 2/3, approximately 0.666... (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t twothirds<bfloat16_t>() {
uint16_t bits = 0x3f2bu;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns 2/3, approximately 0.666... (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> twothirds< complex<bfloat16_t> >() {
return complex<bfloat16_t>(twothirds<bfloat16_t>(), bfloat16_t());
}
/// Returns pi - 3, approximately 0.1416... (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t pi_minus_three<bfloat16_t>() {
uint16_t bits = 0x3e11u;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns pi - 3, approximately 0.1416... (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> pi_minus_three< complex<bfloat16_t> >() {
return complex<bfloat16_t>(pi_minus_three<bfloat16_t>(), bfloat16_t());
}
/// Returns 4 - pi, approximately 0.858... (specialization for bfloat16_t)
template <> CUTLASS_HOST_DEVICE bfloat16_t four_minus_pi<bfloat16_t>() {
uint16_t bits = 0x3f5cu;
return reinterpret_cast<bfloat16_t const &>(bits);
}
/// Returns 4 - pi, approximately 0.858... (specialization for complex<bfloat16_t>)
template <> CUTLASS_HOST_DEVICE complex<bfloat16_t> four_minus_pi< complex<bfloat16_t> >() {
return complex<bfloat16_t>(four_minus_pi<bfloat16_t>(), bfloat16_t());
}
///////////////////////////////////////////////////////////////////////////////////
} // namespace constants
} // namespace cutlass
///////////////////////////////////////////////////////////////////////////////////