From c3881d097ea8bd4ce5274977c209655c4b5ed561 Mon Sep 17 00:00:00 2001 From: Pei Sun Date: Sat, 4 Jun 2022 20:04:00 -0700 Subject: [PATCH] Fix a comment about LDSM layout. (#514) Co-authored-by: peisun1115 --- include/cutlass/gemm/warp/mma_tensor_op_tile_iterator.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/cutlass/gemm/warp/mma_tensor_op_tile_iterator.h b/include/cutlass/gemm/warp/mma_tensor_op_tile_iterator.h index ebce4ed3..890504c7 100644 --- a/include/cutlass/gemm/warp/mma_tensor_op_tile_iterator.h +++ b/include/cutlass/gemm/warp/mma_tensor_op_tile_iterator.h @@ -260,8 +260,8 @@ public: else if (Policy::LdsmShape::kContiguous == 2 && kOperand == Operand::kA) { // Matrix multiply 16816 A - // Q0 Q2 - // Q1 Q3 + // Q0 Q1 + // Q2 Q3 partition_contiguous_idx = ((lane_in_quad_pair >> 2) ^ (i >> 1)); access_contiguous_idx = (((quad_pair & 1) + ((i & 1) << 1)) ^ lane_in_quad); @@ -269,8 +269,8 @@ public: } else if (Policy::LdsmShape::kContiguous == 2 && kOperand == Operand::kB) { // Matrix multiply 16816 B - // Q0 Q1 - // Q2 Q3 + // Q0 Q2 + // Q1 Q3 partition_contiguous_idx = ((lane_in_quad_pair >> 2) ^ (i >> 1)); access_contiguous_idx = ((quad_quad + ((i & 1) << 1)) ^ lane_in_quad); access_strided_idx = lane_in_quad_quad;