<trclass="memdesc:aa97b0e7f369f927c4db6d683dbb7f53b"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The scalar for A. <ahref="#aa97b0e7f369f927c4db6d683dbb7f53b">More...</a><br/></td></tr>
<trclass="memdesc:a389c0980e978463d3cc126342d8413c6"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The scalar for B. <ahref="#a389c0980e978463d3cc126342d8413c6">More...</a><br/></td></tr>
<trclass="memdesc:a30d9cfbbede42166e4e1964145dfe05d"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The scalar for C. <ahref="#a30d9cfbbede42166e4e1964145dfe05d">More...</a><br/></td></tr>
<trclass="memdesc:a7ad231b16f05a58900dca8ff0e0e7bfa"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The scalar for D. <ahref="#a7ad231b16f05a58900dca8ff0e0e7bfa">More...</a><br/></td></tr>
<trclass="memdesc:aedb95febe4a0b2943e233c95c36a22cd"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The functor to do D = A*B + C. <ahref="#aedb95febe4a0b2943e233c95c36a22cd">More...</a><br/></td></tr>
<trclass="memdesc:ad83132bd03f8bd844487d1a9da908c8b"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The shape of the instruction. <ahref="#ad83132bd03f8bd844487d1a9da908c8b">More...</a><br/></td></tr>
<trclass="memdesc:a9b987cfb25a32e671a47cb6376a361f3"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The number of warps. <ahref="#a9b987cfb25a32e671a47cb6376a361f3">More...</a><br/></td></tr>
<trclass="memitem:aa549183981095a3e604974e909a7d396"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#aa549183981095a3e604974e909a7d396">kWarpSize</a> = cutlass::kWarpSize</td></tr>
<trclass="memdesc:aa549183981095a3e604974e909a7d396"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The default warp size (32 threads per warp). <ahref="#aa549183981095a3e604974e909a7d396">More...</a><br/></td></tr>
<trclass="memitem:acf461f0ba3067cc5d66a04f0a176308f"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#acf461f0ba3067cc5d66a04f0a176308f">kThreads</a> = <aclass="el"href="structcutlass_1_1ShapeCount.html">ShapeCount</a><<aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#a9b987cfb25a32e671a47cb6376a361f3">Warps</a>>::kCount * <aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#aa549183981095a3e604974e909a7d396">kWarpSize</a></td></tr>
<trclass="memdesc:acf461f0ba3067cc5d66a04f0a176308f"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The numnber of threads. <ahref="#acf461f0ba3067cc5d66a04f0a176308f">More...</a><br/></td></tr>
<trclass="memitem:a64e7d66e0646c7044e754b3b6b91e761"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#a64e7d66e0646c7044e754b3b6b91e761">kScalarsPerLdgA</a> = kScalarsPerLdgA_</td></tr>
<trclass="memdesc:a64e7d66e0646c7044e754b3b6b91e761"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The number of scalars per LDG/STS/LDS for A. <ahref="#a64e7d66e0646c7044e754b3b6b91e761">More...</a><br/></td></tr>
<trclass="memitem:ab2b6167b0165ed544254dc87c2a7db8f"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#ab2b6167b0165ed544254dc87c2a7db8f">kScalarsPerStsA</a> = kScalarsPerStsA_</td></tr>
<trclass="memitem:a82295105d7ccbcce057b4c57632a644b"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#a82295105d7ccbcce057b4c57632a644b">kScalarsPerLdsA</a> = kScalarsPerLdsA_</td></tr>
<trclass="memitem:aff7409a9d2666159435d3b9db16443bc"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#aff7409a9d2666159435d3b9db16443bc">kScalarsPerLdgB</a> = kScalarsPerLdgB_</td></tr>
<trclass="memdesc:aff7409a9d2666159435d3b9db16443bc"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The number of scalars per LDG/STS/LDS for B. <ahref="#aff7409a9d2666159435d3b9db16443bc">More...</a><br/></td></tr>
<trclass="memitem:a2a424063136c56c5ca6345496485afce"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#a2a424063136c56c5ca6345496485afce">kScalarsPerStsB</a> = kScalarsPerStsB_</td></tr>
<trclass="memitem:a9ccd14b44a22dfdfeefabe1e643da65e"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#a9ccd14b44a22dfdfeefabe1e643da65e">kScalarsPerLdsB</a> = kScalarsPerLdsB_</td></tr>
<trclass="memitem:a9c3985d8de485d76a5d72c91c3e3aaff"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#a9c3985d8de485d76a5d72c91c3e3aaff">kScalarsPerLdgC</a> = kScalarsPerLdgCAndStgD_</td></tr>
<trclass="memdesc:a9c3985d8de485d76a5d72c91c3e3aaff"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The number of scalars per LDG for C. <ahref="#a9c3985d8de485d76a5d72c91c3e3aaff">More...</a><br/></td></tr>
<trclass="memitem:ade9d3c2aeb0e4d73e04a5e9a06ce5203"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#ade9d3c2aeb0e4d73e04a5e9a06ce5203">kScalarsPerStgD</a> = kScalarsPerLdgCAndStgD_</td></tr>
<trclass="memdesc:ade9d3c2aeb0e4d73e04a5e9a06ce5203"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The number of scalars per STS/LDS/STG for D. <ahref="#ade9d3c2aeb0e4d73e04a5e9a06ce5203">More...</a><br/></td></tr>
<trclass="memitem:a1e93d0163c0d150d33f4093b4a1ec87f"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#a1e93d0163c0d150d33f4093b4a1ec87f">kScalarsPerStsD</a> = kScalarsPerStsD_</td></tr>
<trclass="memitem:a26942561aa111089b3ba0f12cf233951"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#a26942561aa111089b3ba0f12cf233951">kScalarsPerLdsD</a> = kScalarsPerLdsD_</td></tr>
<trclass="memitem:a60ab48db1b87fb6063d194247bc055f5"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#a60ab48db1b87fb6063d194247bc055f5">kAccumulatorsPerLdsA</a> = <aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#a82295105d7ccbcce057b4c57632a644b">kScalarsPerLdsA</a> / InstructionShape::kD</td></tr>
<trclass="memdesc:a60ab48db1b87fb6063d194247bc055f5"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The number of accumulators that are going to be fed from one LDS A/B. <ahref="#a60ab48db1b87fb6063d194247bc055f5">More...</a><br/></td></tr>
<trclass="memitem:ab26d76515e394118bfc530d6a81b9508"><tdclass="memItemLeft"align="right"valign="top">static int const </td><tdclass="memItemRight"valign="bottom"><aclass="el"href="structcutlass_1_1gemm_1_1GemmConfig.html#ab26d76515e394118bfc530d6a81b9508">kStages</a> = kStages_</td></tr>
<trclass="memdesc:ab26d76515e394118bfc530d6a81b9508"><tdclass="mdescLeft"> </td><tdclass="mdescRight">The number of stages in shared memory to implement double, triple, more-buffering. <ahref="#ab26d76515e394118bfc530d6a81b9508">More...</a><br/></td></tr>
<trclass="memdesc:a64c1e0d794c1bdfa8b2f3a53e387155c"><tdclass="mdescLeft"> </td><tdclass="mdescRight">If true, mainloop is instantiated twice. The first instantiation contains no predicate. <ahref="#a64c1e0d794c1bdfa8b2f3a53e387155c">More...</a><br/></td></tr>
<trclass="memdesc:a600746def904eeca46d7f92a4b223b46"><tdclass="mdescLeft"> </td><tdclass="mdescRight">If true, residue is computed in the prologue. <ahref="#a600746def904eeca46d7f92a4b223b46">More...</a><br/></td></tr>
<trclass="memdesc:a54e252cedbfee75e7c4ac2f7fe5ce45b"><tdclass="mdescLeft"> </td><tdclass="mdescRight">If true, kernel is launched with launch bounds specified. <ahref="#a54e252cedbfee75e7c4ac2f7fe5ce45b">More...</a><br/></td></tr>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>
template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_, bool kResidueSeparate_ = false, bool kResidueInProlog_ = false, bool kLaunchBounds_ = true></div>