diff --git a/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h b/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h index 0c7c4a88..46f23e1b 100644 --- a/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h +++ b/include/cutlass/epilogue/threadblock/default_epilogue_tensor_op.h @@ -158,6 +158,30 @@ struct DefaultIteratorsTensorOp +struct DefaultIteratorsTensorOp { + + using WarpTileIterator = cutlass::epilogue::warp::TileIteratorTensorOp< + WarpShape, + InstructionShape, + int32_t, + layout::RowMajor + >; + + using SharedLoadIterator = cutlass::epilogue::threadblock::SharedLoadIterator< + ThreadMap, + int32_t + >; + + static int const kFragmentsPerIteration = 1; +}; + /// Partial specialization for half <= float x 8 epilogues avoids shared memory bank conflicts. template < typename ThreadblockShape,