cutlass/include/cutlass/epilogue/dispatch_policy.hpp
Vijay Thakkar be60a0b272
CUTLASS 3.5.1 (#1623)
* CUTLASS 3.5.1

* updates, optimizations, fixes
2024-07-29 08:46:24 -04:00

179 lines
6.5 KiB
C++

/***************************************************************************************************
* Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
#pragma once
#include "cutlass/numeric_conversion.h"
#include "cutlass/epilogue/thread/scale_type.h"
//////////////////////////////////////////////////////////////////////////////
namespace cutlass::epilogue {
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
//
// Builder Epilogue Schedules
//
//////////////////////////////////////////////////////////////////////////////
struct PtrArrayDefault {};
struct NoSmemWarpSpecialized {};
struct PtrArrayNoSmemWarpSpecialized {};
struct PtrArrayPlanarComplexNoSmemWarpSpecialized {};
struct TmaWarpSpecialized {};
struct TmaWarpSpecializedCooperative {};
struct PtrArrayTmaWarpSpecializedCooperative {};
// DEPRECATED schedules, will be removed in next release
struct TmaWarpSpecializedElementwiseBase : public TmaWarpSpecialized {};
struct TmaWarpSpecializedCooperativeElementwiseBase : public TmaWarpSpecializedCooperative {};
template <
template <class T> class ActivationFunctor_,
thread::ScaleType::Kind Scale_ = thread::ScaleType::Default,
FloatRoundStyle Round_ = FloatRoundStyle::round_to_nearest
>
struct [[deprecated("Use TmaWarpSpecialized with fusion::LinCombEltAct instead")]]
TmaWarpSpecializedElementwise : public TmaWarpSpecializedElementwiseBase {
template <class T>
using ActivationFunctor = ActivationFunctor_<T>;
static constexpr thread::ScaleType::Kind Scale = Scale_;
static constexpr FloatRoundStyle Round = Round_;
};
template <
template <class T> class ActivationFunctor_,
thread::ScaleType::Kind Scale_ = thread::ScaleType::Default,
FloatRoundStyle Round_ = FloatRoundStyle::round_to_nearest
>
struct [[deprecated("Use TmaWarpSpecializedCooperative with fusion::LinCombEltAct instead")]]
TmaWarpSpecializedCooperativeElementwise : public TmaWarpSpecializedCooperativeElementwiseBase {
template <class T>
using ActivationFunctor = ActivationFunctor_<T>;
static constexpr thread::ScaleType::Kind Scale = Scale_;
static constexpr FloatRoundStyle Round = Round_;
};
struct TmaWarpSpecializedBiasElementwiseBase : public TmaWarpSpecialized{};
struct TmaWarpSpecializedCooperativeBiasElementwiseBase : public TmaWarpSpecializedCooperative {};
template <
template <class T> class ActivationFunctor_,
class ElementT_,
template <class T> class BiasOp_,
bool StoreT_,
class ElementBias_
>
struct [[deprecated("Use TmaWarpSpecialized with fusion::LinCombPerRowBiasEltActAux instead")]]
TmaWarpSpecializedBiasElementwise : public TmaWarpSpecializedBiasElementwiseBase {
template <class T>
using ActivationFunctor = ActivationFunctor_<T>;
using ElementT = ElementT_;
template <class T>
using BiasOp = BiasOp_<T>;
static constexpr bool StoreT = StoreT_;
using ElementBias = ElementBias_;
};
template <
template <class T> class ActivationFunctor_,
class ElementT_,
template <class T> class BiasOp_,
bool StoreT_,
class ElementBias_
>
struct [[deprecated("Use TmaWarpSpecializedCooperative with fusion::LinCombPerRowBiasEltActAux instead")]]
TmaWarpSpecializedCooperativeBiasElementwise : public TmaWarpSpecializedCooperativeBiasElementwiseBase {
template <class T>
using ActivationFunctor = ActivationFunctor_<T>;
using ElementT = ElementT_;
template <class T>
using BiasOp = BiasOp_<T>;
static constexpr bool StoreT = StoreT_;
using ElementBias = ElementBias_;
};
//////////////////////////////////////////////////////////////////////////////
//
// Collective Dispatch Policies
//
//////////////////////////////////////////////////////////////////////////////
template<
int StagesC_,
int StagesD_,
int FragmentSize_,
bool ReuseSmemC_,
bool DelayTmaStore_
>
struct Sm90TmaWarpSpecialized {
constexpr static int StagesC = StagesC_;
constexpr static int StagesD = StagesD_;
constexpr static int FragmentSize = FragmentSize_;
constexpr static bool ReuseSmemC = ReuseSmemC_;
constexpr static bool DelayTmaStore = DelayTmaStore_;
};
template<
int StagesC_,
int StagesD_,
int FragmentSize_,
bool ReuseSmemC_,
bool DelayTmaStore_
>
struct Sm90PtrArrayTmaWarpSpecialized {
constexpr static int StagesC = StagesC_;
constexpr static int StagesD = StagesD_;
constexpr static int FragmentSize = FragmentSize_;
constexpr static bool ReuseSmemC = ReuseSmemC_;
constexpr static bool DelayTmaStore = DelayTmaStore_;
};
// DEPRECATED policies, will be removed in next release
template<
int StagesC_,
int StagesD_,
int FragmentSize_ = 2
>
struct Sm90TmaWarpSpecializedBiasElementwise {
constexpr static int StagesC = StagesC_;
constexpr static int StagesD = StagesD_;
constexpr static int FragmentSize = FragmentSize_;
};
//////////////////////////////////////////////////////////////////////////////
} // namespace cutlass::epilogue