From ddd8f9cf4126dbd73b451d7cdd17aab7242fda53 Mon Sep 17 00:00:00 2001 From: TonyZhao <35594984+SpringWave1@users.noreply.github.com> Date: Thu, 5 May 2022 01:36:05 +0800 Subject: [PATCH] update float < int32_t * 4 (#488) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: 赵俊涛 --- .../threadblock/default_epilogue_tensor_op.h | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h b/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h index 0c7c4a88..46f23e1b 100644 --- a/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h +++ b/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h @@ -158,6 +158,30 @@ struct DefaultIteratorsTensorOp +struct DefaultIteratorsTensorOp { + + using WarpTileIterator = cutlass::epilogue::warp::TileIteratorTensorOp< + WarpShape, + InstructionShape, + int32_t, + layout::RowMajor + >; + + using SharedLoadIterator = cutlass::epilogue::threadblock::SharedLoadIterator< + ThreadMap, + int32_t + >; + + static int const kFragmentsPerIteration = 1; +}; + /// Partial specialization for half <= float x 8 epilogues avoids shared memory bank conflicts. template < typename ThreadblockShape,