Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
zip_tile_iterator.h
Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
30 #pragma once
31 
32 #include "cutlass/coord.h"
33 #include "cutlass/zip_tensor_ref.h"
34 #include "cutlass/zip_fragment.h"
35 
36 namespace cutlass {
37 
39 
41 template <typename First_, typename Second_>
43  public:
45  typedef First_ First;
46 
48  typedef Second_ Second;
49 
51  struct Params {
53  typename First::Params first;
54 
56  typename Second::Params second;
57 
60  Params() {}
61 
64  Params(typename First::Params const &_first, typename Second::Params const &_second)
65  : first(_first), second(_second) {}
66  };
67 
70 
72  typedef typename First::PredicateVector PredicateVector;
73 
75  typedef typename First::Index Index;
76 
78  typedef ZipTensorRef<
79  typename First::TensorRef,
80  typename Second::TensorRef> TensorRef;
81 
82  //
83  // Data members
84  //
85 
88 
91 
92  //
93  // Methods
94  //
95 
97  CUTLASS_DEVICE
99 
101  CUTLASS_DEVICE
102  ZipTileIterator(Params const &_params, Coord<3> const &threadblock_offset = make_Coord(0, 0, 0))
103  : first(_params.first, threadblock_offset), second(_params.second, threadblock_offset) {}
104 
106  CUTLASS_DEVICE
107  ZipTileIterator(First const &_first, Second const &_second) : first(_first), second(_second) {}
108 
110  CUTLASS_DEVICE
111  ZipTileIterator(TensorRef const &ref) : first(ref.first), second(ref.second) {}
112 
114  CUTLASS_DEVICE
115  ZipTileIterator(Params const &_params, TensorRef const &ref):
116  first(_params.first, ref.first), second(_params.second, ref.second) {}
117 
118  //
119  // Predicate initialization
120  //
121 
123  template <
125  typename PredicateIterator>
126  CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it,
127  Coord<3> const &bounds,
128  Coord<3> const &block_offset = make_Coord(0,
129  0,
130  0)) {
131  first.initialize_predicates(predicate_it, bounds, block_offset);
132  }
133 
135  template <
137  typename PredicateIterator,
139  typename PredicateFunctor>
140  CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it,
141  PredicateFunctor const &functor,
142  Coord<3> const &block_offset) {
143  first.initialize_predicates(predicate_it, functor, block_offset);
144  }
145 
146  //
147  // No predicates
148  //
149 
151  template <typename Fragment>
152  CUTLASS_DEVICE void load_post_increment(Fragment &fragment) {
153  first.load_post_increment(fragment.first);
154  second.load_post_increment(fragment.second);
155  }
156 
158  template <typename Fragment>
159  CUTLASS_DEVICE void load_post_increment(Fragment &fragment,
160  Coord<4> const &offset) {
161  first.load_post_increment(fragment.first, offset);
162  second.load_post_increment(fragment.second, offset);
163  }
164 
166  template <typename Fragment>
167  CUTLASS_DEVICE void load(Fragment &fragment) const {
168  first.load(fragment.first);
169  second.load(fragment.second);
170  }
171 
173  template <typename Fragment>
174  CUTLASS_DEVICE void load(Fragment &fragment,
175  Coord<4> const &offset) const {
176  first.load(fragment.first, offset);
177  second.load(fragment.second, offset);
178  }
179 
181  template <typename Fragment>
182  CUTLASS_DEVICE void store_post_increment(Fragment const &fragment) {
183  first.store_post_increment(fragment.first);
184  second.store_post_increment(fragment.second);
185  }
186 
188  template <typename Fragment>
189  CUTLASS_DEVICE void store_post_increment(Fragment const &fragment,
190  Coord<4> const &offset) {
191  first.store_post_increment(fragment.first, offset);
192  second.store_post_increment(fragment.second, offset);
193  }
194 
196  template <typename Fragment>
197  CUTLASS_DEVICE void store(Fragment const &fragment) const {
198  first.store(fragment.first);
199  second.store(fragment.second);
200  }
201 
203  template <typename Fragment>
204  CUTLASS_DEVICE void store(Fragment const &fragment,
205  Coord<4> const &offset) const {
206  first.store(fragment.first, offset);
207  second.store(fragment.second, offset);
208  }
209 
210  //
211  // With predication
212  //
213 
215  template <typename Fragment, typename PredicateIterator>
216  CUTLASS_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it) {
217  first.load_post_increment(fragment.first, pred_it);
218  second.load_post_increment(fragment.second, pred_it);
219  }
220 
222  template <typename Fragment, typename PredicateIterator>
223  CUTLASS_DEVICE void load(Fragment &fragment, PredicateIterator pred_it) const {
224  first.load(fragment.first, pred_it);
225  second.load(fragment.second, pred_it);
226  }
227 
229  template <typename Fragment, typename PredicateIterator>
230  CUTLASS_DEVICE void store_post_increment(Fragment const &fragment, PredicateIterator pred_it) {
231  first.store_post_increment(fragment.first, pred_it);
232  second.store_post_increment(fragment.second, pred_it);
233  }
234 
236  template <typename Fragment, typename PredicateIterator>
237  CUTLASS_DEVICE void store(Fragment const &fragment, PredicateIterator pred_it) const {
238  first.store(fragment.first, pred_it);
239  second.store(fragment.second, pred_it);
240  }
241 
242  //
243  // Advances the iterators
244  //
245 
247  CUTLASS_DEVICE ZipTileIterator &increment(int count = 1) {
248  first.increment(count);
249  second.increment(count);
250  return *this;
251  }
252 
254  CUTLASS_DEVICE ZipTileIterator &operator++() { return increment(); }
255 
256  CUTLASS_DEVICE ZipTileIterator &operator+=(int count) { return increment(count); }
257 
259  CUTLASS_DEVICE ZipTileIterator &operator+=(Coord<3> const &offset) {
260  first += offset;
261  second += offset;
262  return *this;
263  }
264 
266  CUTLASS_DEVICE ZipTileIterator &decrement(int count = 1) {
267  first.decrement(count);
268  second.decrement(count);
269  return *this;
270  }
271 
273  CUTLASS_DEVICE ZipTileIterator &operator--() { return decrement(); }
274 
276  CUTLASS_DEVICE ZipTileIterator &operator-=(int count) { return decrement(count); }
277 
279  CUTLASS_DEVICE void add_pointer_offset(Index offset) {
280  first.add_pointer_offset(offset);
281  second.add_pointer_offset(offset);
282  }
283 };
284 
286 
287 } // namspace cutlass
ZipFragment< typename First::Fragment, typename Second::Fragment > Fragment
Fragment type.
Definition: zip_tile_iterator.h:69
Second::Params second
Parameters of second iterator.
Definition: zip_tile_iterator.h:56
First_ First
First iterator type.
Definition: zip_tile_iterator.h:45
Definition: convert.h:33
Definition: zip_tensor_ref.h:38
First::Index Index
Index type.
Definition: zip_tile_iterator.h:75
CUTLASS_DEVICE ZipTileIterator(Params const &_params, Coord< 3 > const &threadblock_offset=make_Coord(0, 0, 0))
Constructs a zip iterator from params.
Definition: zip_tile_iterator.h:102
CUTLASS_DEVICE void store_post_increment(Fragment const &fragment, Coord< 4 > const &offset)
Stores a fragment and increments without predicates.
Definition: zip_tile_iterator.h:189
CUTLASS_DEVICE void load_post_increment(Fragment &fragment, Coord< 4 > const &offset)
Loads a fragment and increments without predicates.
Definition: zip_tile_iterator.h:159
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
CUTLASS_DEVICE void store(Fragment const &fragment) const
Stores a fragment without predicates.
Definition: zip_tile_iterator.h:197
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:318
CUTLASS_HOST_DEVICE Params(typename First::Params const &_first, typename Second::Params const &_second)
Constructs a parameters object.
Definition: zip_tile_iterator.h:64
CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
Initializes a predicate vector using a RegularTilePredicateFunctor.
Definition: zip_tile_iterator.h:126
CUTLASS_HOST_DEVICE Params()
Constructs a parameters object.
Definition: zip_tile_iterator.h:60
A template defining Fragment Concept.
Definition: zip_fragment.h:46
CUTLASS_DEVICE ZipTileIterator & operator+=(Coord< 3 > const &offset)
Adds a vector offset to the underlying iterators.
Definition: zip_tile_iterator.h:259
CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, PredicateFunctor const &functor, Coord< 3 > const &block_offset)
Initializes a predicate vector using an arbitrary predicate functor.
Definition: zip_tile_iterator.h:140
First::Params first
Parameters of first iterator.
Definition: zip_tile_iterator.h:53
CUTLASS_DEVICE void load_post_increment(Fragment &fragment)
Loads a fragment and increments without predicates.
Definition: zip_tile_iterator.h:152
CUTLASS_DEVICE ZipTileIterator(Params const &_params, TensorRef const &ref)
Constructs a zip iterator from iterator instances.
Definition: zip_tile_iterator.h:115
CUTLASS_DEVICE ZipTileIterator & operator+=(int count)
Definition: zip_tile_iterator.h:256
CUTLASS_DEVICE void store(Fragment const &fragment, PredicateIterator pred_it) const
Loads a fragment with predicates.
Definition: zip_tile_iterator.h:237
ZipTensorRef< typename First::TensorRef, typename Second::TensorRef > TensorRef
Tensor reference.
Definition: zip_tile_iterator.h:80
CUTLASS_DEVICE ZipTileIterator & operator-=(int count)
Decrements to previous tile.
Definition: zip_tile_iterator.h:276
CUTLASS_DEVICE void store_post_increment(Fragment const &fragment)
Stores a fragment and increments without predicates.
Definition: zip_tile_iterator.h:182
Models a pair of fragments.
First first
First fragment object.
Definition: zip_fragment.h:61
CUTLASS_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it)
Loads a fragment and increments, using predicates.
Definition: zip_tile_iterator.h:216
Defines a structure containing a pair of TensorRef-like objects.
Second_ Second
Second iterator type.
Definition: zip_tile_iterator.h:48
CUTLASS_DEVICE ZipTileIterator(First const &_first, Second const &_second)
Constructs a zip iterator from iterator instances.
Definition: zip_tile_iterator.h:107
CUTLASS_DEVICE ZipTileIterator & operator++()
Increments to next tile.
Definition: zip_tile_iterator.h:254
Constructs an iterator from a pair of iterators.
Definition: zip_tile_iterator.h:42
Second second
Second fragment object.
Definition: zip_fragment.h:64
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
CUTLASS_DEVICE ZipTileIterator()
Default constructor.
Definition: zip_tile_iterator.h:98
First first
First iterator.
Definition: zip_tile_iterator.h:87
Params object.
Definition: zip_tile_iterator.h:51
Second second
Second iterator.
Definition: zip_tile_iterator.h:90
CUTLASS_DEVICE void store(Fragment const &fragment, Coord< 4 > const &offset) const
Stores a fragment without predicates.
Definition: zip_tile_iterator.h:204
CUTLASS_DEVICE void load(Fragment &fragment) const
Loads a fragment without predicates.
Definition: zip_tile_iterator.h:167
First::PredicateVector PredicateVector
Predicate vector.
Definition: zip_tile_iterator.h:72
CUTLASS_DEVICE void load(Fragment &fragment, PredicateIterator pred_it) const
Loads a fragment with predicates.
Definition: zip_tile_iterator.h:223
CUTLASS_DEVICE ZipTileIterator & decrement(int count=1)
Increments store iterator to previous tile.
Definition: zip_tile_iterator.h:266
CUTLASS_DEVICE void add_pointer_offset(Index offset)
Adds an offset to both iterators.
Definition: zip_tile_iterator.h:279
CUTLASS_DEVICE ZipTileIterator & operator--()
Increments to subsequent tile.
Definition: zip_tile_iterator.h:273
CUTLASS_DEVICE ZipTileIterator(TensorRef const &ref)
Constructs a zip iterator from iterator instances.
Definition: zip_tile_iterator.h:111
CUTLASS_DEVICE ZipTileIterator & increment(int count=1)
Increments store iterator to next tile.
Definition: zip_tile_iterator.h:247
CUTLASS_DEVICE void store_post_increment(Fragment const &fragment, PredicateIterator pred_it)
Loads a fragment and increments, using predicates.
Definition: zip_tile_iterator.h:230
CUTLASS_DEVICE void load(Fragment &fragment, Coord< 4 > const &offset) const
Loads a fragment without predicates.
Definition: zip_tile_iterator.h:174