| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | /***************************************************************************************************
 | 
					
						
							|  |  |  |  * Copyright (c) 2023 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | 
					
						
							|  |  |  |  * SPDX-License-Identifier: BSD-3-Clause | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Redistribution and use in source and binary forms, with or without | 
					
						
							|  |  |  |  * modification, are permitted provided that the following conditions are met: | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * 1. Redistributions of source code must retain the above copyright notice, this | 
					
						
							|  |  |  |  * list of conditions and the following disclaimer. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * 2. Redistributions in binary form must reproduce the above copyright notice, | 
					
						
							|  |  |  |  * this list of conditions and the following disclaimer in the documentation | 
					
						
							|  |  |  |  * and/or other materials provided with the distribution. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * 3. Neither the name of the copyright holder nor the names of its | 
					
						
							|  |  |  |  * contributors may be used to endorse or promote products derived from | 
					
						
							|  |  |  |  * this software without specific prior written permission. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
					
						
							|  |  |  |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
					
						
							|  |  |  |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
					
						
							|  |  |  |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | 
					
						
							|  |  |  |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 
					
						
							|  |  |  |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | 
					
						
							|  |  |  |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | 
					
						
							|  |  |  |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | 
					
						
							|  |  |  |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
					
						
							|  |  |  |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  **************************************************************************************************/ | 
					
						
							|  |  |  | #pragma once
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <cute/config.hpp>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <cute/arch/util.hpp>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <cute/swizzle_layout.hpp>
 | 
					
						
							|  |  |  | #include <cute/tensor.hpp>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <cute/pointer.hpp>
 | 
					
						
							|  |  |  | #include <cute/container/array.hpp>
 | 
					
						
							|  |  |  | #include <cute/numeric/int.hpp>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* This implements a swizzle pointer of the form
 | 
					
						
							|  |  |  |  *   InvolutionFn o PtrAdd | 
					
						
							|  |  |  |  * where the InvolutionFn need not be linear. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This differs subtly from swizzle_layout because the smem pointer is used | 
					
						
							|  |  |  |  * as the offset. That means that swizzle_layout will implement position-independent | 
					
						
							|  |  |  |  * swizzle layouts, while swizzle_ptr implements position-dependent swizzle tensors. | 
					
						
							|  |  |  |  * Arch chose to design hardware with position-dependent swizzles. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * For clarity: | 
					
						
							|  |  |  |  *   NormalLayout  : DeRef <- PtrAdd <- [Layout] | 
					
						
							|  |  |  |  *   ComposedLayout: DeRef <- PtrAdd <- [Swizzle <- OffsetAdd <- Layout] | 
					
						
							|  |  |  |  *   SwizzlePtr    : [DeRef <- Swizzle <- PtrAdd] <- Layout | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Furthermore, for known swizzles, this pointer attempts to decay itself | 
					
						
							|  |  |  |  *    to a normal-pointer with a new layout containing dynamic or static strides. | 
					
						
							|  |  |  |  * This is possible by determining the subdomain of the InvolutionFn | 
					
						
							|  |  |  |  *    that is identity and testing if the Layout's codomain is contained | 
					
						
							|  |  |  |  *    within it. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace cute | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <class T, class Swizzle> | 
					
						
							|  |  |  | struct smem_ptr_swizzle | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2023-04-15 11:19:34 +08:00
										 |  |  |   static_assert(is_empty<Swizzle>::value, "Swizzle can't have state."); | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-27 05:24:26 +08:00
										 |  |  |   static const uint32_t ElementsPerStoredItem = sizeof(T) * 8 / sizeof_bits_v<T>; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  |   CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  |   T* get() const | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     return ptr_; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   CUTE_HOST_DEVICE constexpr static | 
					
						
							|  |  |  |   Swizzle get_swizzle() | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     return {}; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   CUTE_HOST_DEVICE constexpr static | 
					
						
							|  |  |  |   T* apply_swizzle(T* ptr) | 
					
						
							|  |  |  |   { | 
					
						
							| 
									
										
										
										
											2023-04-15 11:19:34 +08:00
										 |  |  |     return reinterpret_cast<T*>(Swizzle::apply(reinterpret_cast<uintptr_t>(ptr))); | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  |   T& operator*() const | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     return *apply_swizzle(get()); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   template <class Int> | 
					
						
							|  |  |  |   CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  |   T& operator[](Int const& i) const | 
					
						
							|  |  |  |   { | 
					
						
							| 
									
										
										
										
											2023-09-27 05:24:26 +08:00
										 |  |  |     static_assert(sizeof_bits_v<T> >= 8, "Use subbyte_iterator to access the element"); | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  |     return *apply_swizzle(get() + i); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   template <class Int> | 
					
						
							|  |  |  |   CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  |   smem_ptr_swizzle operator+(Int const& i) const | 
					
						
							|  |  |  |   { | 
					
						
							| 
									
										
										
										
											2023-09-27 05:24:26 +08:00
										 |  |  |     return {ptr_ + i / ElementsPerStoredItem}; | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   T* ptr_; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <class T, class S> | 
					
						
							|  |  |  | struct is_smem<smem_ptr_swizzle<T,S>> : true_type {}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Make a swizzle pointer
 | 
					
						
							|  |  |  | template <class T, class Swizzle> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  | auto | 
					
						
							| 
									
										
										
										
											2023-08-08 08:50:32 +08:00
										 |  |  | make_smem_ptr(T* ptr, Swizzle const&) | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | { | 
					
						
							|  |  |  |   return smem_ptr_swizzle<T,Swizzle>{ptr}; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-08 08:50:32 +08:00
										 |  |  | // Specialization for immediate decay
 | 
					
						
							|  |  |  | template <class T, int M, int S> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | make_smem_ptr(T* ptr, Swizzle<0,M,S> const&) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   return make_smem_ptr(ptr); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | // A model of a nullptr smem_ptr<T> with B == sizeof_bits<T>::value
 | 
					
						
							|  |  |  | // That represents an unset pointer. This is a placeholder type that is waiting for an smem_ptr
 | 
					
						
							|  |  |  | template <int Bits> | 
					
						
							|  |  |  | struct smem_ptr_flag_bits : Int<0> {}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | using smem_ptr_flag = smem_ptr_flag_bits<1>; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // A flagged construction method to transform ComposedLayout
 | 
					
						
							|  |  |  | // Make a swizzle pointer tensor and check that the intended type size matches
 | 
					
						
							|  |  |  | template <class T, class Swizzle, int B, class Layout> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | make_tensor(smem_ptr<T> const& ptr, | 
					
						
							|  |  |  |             ComposedLayout<Swizzle,smem_ptr_flag_bits<B>,Layout> const& layout) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   static_assert(B == sizeof_bits<T>::value, "Expected a B-bit pointer type."); | 
					
						
							| 
									
										
										
										
											2023-08-08 08:50:32 +08:00
										 |  |  |   return make_tensor(make_smem_ptr(ptr.get(), layout.layout_a()), | 
					
						
							|  |  |  |                      layout.layout_b()); | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // NOTE: To preserve smem_ptr_flag_bits under recast ops
 | 
					
						
							|  |  |  | template <int N, class Swizzle, int B, class Layout> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | upcast(ComposedLayout<Swizzle,smem_ptr_flag_bits<B>,Layout> const& layout) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2023-08-08 08:50:32 +08:00
										 |  |  |   return composition(layout.layout_a(), smem_ptr_flag_bits<B*N>{}, upcast<N>(layout.layout_b())); | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <int N, class Swizzle, int B, class Layout> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | downcast(ComposedLayout<Swizzle,smem_ptr_flag_bits<B>,Layout> const& layout) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2023-08-08 08:50:32 +08:00
										 |  |  |   return composition(layout.layout_a(), smem_ptr_flag_bits<B/N>{}, downcast<N>(layout.layout_b())); | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Recast
 | 
					
						
							|  |  |  | //   Swizzle operates on the pointer address, so it doesn't care about the type
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <class NewT, class T, class Swizzle> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | recast(smem_ptr_swizzle<T,Swizzle> const& ptr) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   return smem_ptr_swizzle<NewT,Swizzle>{recast<NewT>(ptr.ptr_)}; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <class NewT, class T, class Swizzle> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | recast(smem_ptr_swizzle<T const,Swizzle> const& ptr) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   return smem_ptr_swizzle<NewT const,Swizzle>{recast<NewT const>(ptr.ptr_)}; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-08 08:50:32 +08:00
										 |  |  | template <class T, class Swizzle> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  | T* | 
					
						
							|  |  |  | raw_pointer_cast(smem_ptr_swizzle<T,Swizzle> ptr) { | 
					
						
							|  |  |  |   return ptr.get(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | //
 | 
					
						
							| 
									
										
										
										
											2023-04-15 11:19:34 +08:00
										 |  |  | // Conversion with swizzle_layout
 | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <class T, class Swizzle, int B, class Layout> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | as_position_independent_swizzle_layout(ComposedLayout<Swizzle,smem_ptr_flag_bits<B>,Layout> const& layout) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2023-08-08 08:50:32 +08:00
										 |  |  |   return composition(recast<uint_bit_t<8>,uint_bit_t<B>>(layout.layout_a()), Int<0>{}, layout.layout_b()); | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <class T, class Swizzle, class Layout> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | as_position_independent_swizzle_tensor(Tensor<ViewEngine<smem_ptr_swizzle<T,Swizzle>>, Layout> const& tensor) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |   uint32_t address = cast_smem_ptr_to_uint(tensor.data().get()); | 
					
						
							|  |  |  |   uint32_t mask    = ((uint32_t(1) << Swizzle::num_base) - 1) & (Swizzle::swizzle_code); | 
					
						
							|  |  |  |   assert((address & mask) == 0);  // Alignment to the Base, Z, and Y of Swizzle
 | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   auto new_swizzle = recast<uint_bit_t<8>,uint_bit_t<sizeof_bits_v<T>>>(tensor.data().get_swizzle()); | 
					
						
							|  |  |  |   return make_tensor(make_smem_ptr(tensor.data().get()), composition(new_swizzle, Int<0>{}, tensor.layout())); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <class T, class Swizzle, class Layout> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | as_position_independent_swizzle_tensor(Tensor<ViewEngine<smem_ptr_swizzle<T,Swizzle>>, Layout>& tensor) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   { | 
					
						
							| 
									
										
										
										
											2023-08-08 08:50:32 +08:00
										 |  |  |   [[maybe_unused]] uint32_t address = cast_smem_ptr_to_uint(tensor.data().get()); | 
					
						
							|  |  |  |   [[maybe_unused]] uint32_t mask    = ((uint32_t(1) << Swizzle::num_base) - 1) & (Swizzle::swizzle_code); | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  |   assert((address & mask) == 0);  // Alignment to the Base, Z, and Y of Swizzle
 | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   auto new_swizzle = recast<uint_bit_t<8>,uint_bit_t<sizeof_bits_v<T>>>(tensor.data().get_swizzle()); | 
					
						
							|  |  |  |   return make_tensor(make_smem_ptr(tensor.data().get()), composition(new_swizzle, Int<0>{}, tensor.layout())); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <class T, class Swizzle, class Layout> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | as_position_independent_swizzle_tensor(Tensor<ViewEngine<smem_ptr_swizzle<T,Swizzle>>, Layout>&& tensor) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   return as_position_independent_swizzle_tensor(tensor); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-08 08:50:32 +08:00
										 |  |  | // Pass through everything else
 | 
					
						
							|  |  |  | // Used if the tensor doesn't have a swizzled layout, e.g. Layout_MN_INTER_Atom, Layout_K_INTER_Atom
 | 
					
						
							|  |  |  | template <class Engine, class Layout> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE constexpr  | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | as_position_independent_swizzle_tensor(Tensor<Engine, Layout> const& tensor) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   return tensor; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <class Engine, class Layout> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE constexpr | 
					
						
							|  |  |  | auto | 
					
						
							|  |  |  | as_position_independent_swizzle_tensor(Tensor<Engine, Layout>&& tensor) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   return tensor; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | // Print
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Capture and cast smem_ptr_flag Layouts to offset-0 layouts
 | 
					
						
							|  |  |  | template <class Swizzle, int B, class Layout> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | print_latex(ComposedLayout<Swizzle,smem_ptr_flag_bits<B>,Layout> const& layout) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2023-08-08 08:50:32 +08:00
										 |  |  |   auto new_swizzle = recast<uint_bit_t<8>,uint_bit_t<B>>(layout.layout_a()); | 
					
						
							|  |  |  |   print_latex(composition(new_swizzle, Int<0>{}, layout.layout_b())); | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <int B> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE void print(smem_ptr_flag_bits<B> const& ptr) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   printf("smem_ptr_%db(unset)", B); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <class T, int B, int M, int S> | 
					
						
							|  |  |  | CUTE_HOST_DEVICE void print(smem_ptr_swizzle<T,Swizzle<B,M,S>> const& ptr) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2023-09-27 05:24:26 +08:00
										 |  |  |   printf("smem_ptr_S<%d,%d,%d>_%db(%p)", B, M, S, int(sizeof_bits<T>::value), ptr.get()); | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-15 11:19:34 +08:00
										 |  |  | #if !defined(__CUDACC_RTC__)
 | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | template <class T, int B, int M, int S> | 
					
						
							|  |  |  | CUTE_HOST std::ostream& operator<<(std::ostream& os, smem_ptr_swizzle<T,Swizzle<B,M,S>> const&) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2023-09-27 05:24:26 +08:00
										 |  |  |   return os << "smem_ptr_S<" << B << "," << M << "," << S << ">_" << int(sizeof_bits<T>::value) << "b"; | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2023-04-15 11:19:34 +08:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2023-01-24 09:55:28 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | } // end namespace cute
 |