208 lines
25 KiB
HTML
208 lines
25 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
|
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
|
<meta name="generator" content="Doxygen 1.8.14"/>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
|
<title>Cutlass: gemm Directory Reference</title>
|
|
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
|
<script type="text/javascript" src="jquery.js"></script>
|
|
<script type="text/javascript" src="dynsections.js"></script>
|
|
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
|
<script type="text/javascript" src="search/searchdata.js"></script>
|
|
<script type="text/javascript" src="search/search.js"></script>
|
|
<script type="text/x-mathjax-config">
|
|
MathJax.Hub.Config({
|
|
extensions: ["tex2jax.js"],
|
|
jax: ["input/TeX","output/HTML-CSS"],
|
|
});
|
|
</script><script type="text/javascript" async src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
|
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
|
</head>
|
|
<body>
|
|
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
|
<div id="titlearea">
|
|
<table cellspacing="0" cellpadding="0">
|
|
<tbody>
|
|
<tr style="height: 56px;">
|
|
<td id="projectalign" style="padding-left: 0.5em;">
|
|
<div id="projectname">Cutlass
|
|
</div>
|
|
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<!-- end header part -->
|
|
<!-- Generated by Doxygen 1.8.14 -->
|
|
<script type="text/javascript">
|
|
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&dn=gpl-2.0.txt GPL-v2 */
|
|
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
|
/* @license-end */
|
|
</script>
|
|
<script type="text/javascript" src="menudata.js"></script>
|
|
<script type="text/javascript" src="menu.js"></script>
|
|
<script type="text/javascript">
|
|
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&dn=gpl-2.0.txt GPL-v2 */
|
|
$(function() {
|
|
initMenu('',true,false,'search.php','Search');
|
|
$(document).ready(function() { init_search(); });
|
|
});
|
|
/* @license-end */</script>
|
|
<div id="main-nav"></div>
|
|
<!-- window showing the filter options -->
|
|
<div id="MSearchSelectWindow"
|
|
onmouseover="return searchBox.OnSearchSelectShow()"
|
|
onmouseout="return searchBox.OnSearchSelectHide()"
|
|
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
|
</div>
|
|
|
|
<!-- iframe showing the search results (closed by default) -->
|
|
<div id="MSearchResultsWindow">
|
|
<iframe src="javascript:void(0)" frameborder="0"
|
|
name="MSearchResults" id="MSearchResults">
|
|
</iframe>
|
|
</div>
|
|
|
|
<div id="nav-path" class="navpath">
|
|
<ul>
|
|
<li class="navelem"><a class="el" href="dir_1417ee5ebebc309c36b7962f26a92c39.html">cutlass</a></li><li class="navelem"><a class="el" href="dir_18d6a367a3982a494d65599933fc67a3.html">gemm</a></li> </ul>
|
|
</div>
|
|
</div><!-- top -->
|
|
<div class="header">
|
|
<div class="headertitle">
|
|
<div class="title">gemm Directory Reference</div> </div>
|
|
</div><!--header-->
|
|
<div class="contents">
|
|
<table class="memberdecls">
|
|
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="files"></a>
|
|
Files</h2></td></tr>
|
|
<tr class="memitem:clear__accumulators_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="clear__accumulators_8h.html">clear_accumulators.h</a> <a href="clear__accumulators_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:clear__accumulators_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines abstractions for efficiently clearing accumulator tiles. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:device__gemm_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="device__gemm_8h.html">device_gemm.h</a> <a href="device__gemm_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:device__gemm_8h"><td class="mdescLeft"> </td><td class="mdescRight">device level GEMM implemented by more than one kernels. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:device__gemm__traits_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="device__gemm__traits_8h.html">device_gemm_traits.h</a> <a href="device__gemm__traits_8h_source.html">[code]</a></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:dgemm__traits_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="dgemm__traits_8h.html">dgemm_traits.h</a> <a href="dgemm__traits_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:dgemm__traits_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines structural traits of double-precision GEMM. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:fp16__sgemm__multiply__add_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="fp16__sgemm__multiply__add_8h.html">fp16_sgemm_multiply_add.h</a> <a href="fp16__sgemm__multiply__add_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:fp16__sgemm__multiply__add_8h"><td class="mdescLeft"> </td><td class="mdescRight">Template implementing matrix multiply-add operations on fragments. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:fp16__sgemm__traits_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="fp16__sgemm__traits_8h.html">fp16_sgemm_traits.h</a> <a href="fp16__sgemm__traits_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:fp16__sgemm__traits_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defies structural properties of single-precision GEMM where any number of the input/output could be fp16 or fp32. The accumulator type stays in fp32. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm_8h.html">gemm.h</a> <a href="gemm_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm_8h"><td class="mdescLeft"> </td><td class="mdescRight">Implements a software-pipelined efficient GEMM. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__config_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__config_8h.html">gemm_config.h</a> <a href="gemm__config_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__config_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines properties of GEMM computation that impose some constraints on caller. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__coord_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__coord_8h.html">gemm_coord.h</a> <a href="gemm__coord_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__coord_8h"><td class="mdescLeft"> </td><td class="mdescRight">GemmCoord is a structure derived from <a class="el" href="structcutlass_1_1Coord.html">Coord<4></a> that specifies a location within the coordinate system of a GEMM problem. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__desc_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__desc_8h.html">gemm_desc.h</a> <a href="gemm__desc_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__desc_8h"><td class="mdescLeft"> </td><td class="mdescRight">Implements a software-pipelined efficient GEMM. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__epilogue_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__epilogue_8h.html">gemm_epilogue.h</a> <a href="gemm__epilogue_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__epilogue_8h"><td class="mdescLeft"> </td><td class="mdescRight">Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__epilogue__traits_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__epilogue__traits_8h.html">gemm_epilogue_traits.h</a> <a href="gemm__epilogue__traits_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__epilogue__traits_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines structural properties of the GEMM epilogue. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__global__stream_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__global__stream_8h.html">gemm_global_stream.h</a> <a href="gemm__global__stream_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__global__stream_8h"><td class="mdescLeft"> </td><td class="mdescRight">Implements efficient loading of the thread block-level tile from global memory and storing to shared memory. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__global__tile_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__global__tile_8h.html">gemm_global_tile.h</a> <a href="gemm__global__tile_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__global__tile_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines iterators for efficiently loading and storing to global memory. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__operand_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__operand_8h.html">gemm_operand.h</a> <a href="gemm__operand_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__operand_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__shared__stream_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__shared__stream_8h.html">gemm_shared_stream.h</a> <a href="gemm__shared__stream_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__shared__stream_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEMM pipeline. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__shared__tile_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__shared__tile_8h.html">gemm_shared_tile.h</a> <a href="gemm__shared__tile_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__shared__tile_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines iterators for efficiently loading and storing tiles to and from shared memory. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__stream__pair_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__stream__pair_8h.html">gemm_stream_pair.h</a> <a href="gemm__stream__pair_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__stream__pair_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines a pair of GEMM tile streams. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm__traits_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm__traits_8h.html">gemm_traits.h</a> <a href="gemm__traits_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm__traits_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines structural properties of complete GEMM computation. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:hgemm__global__tile_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="hgemm__global__tile_8h.html">hgemm_global_tile.h</a> <a href="hgemm__global__tile_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:hgemm__global__tile_8h"><td class="mdescLeft"> </td><td class="mdescRight">Tile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:hgemm__multiply__add_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="hgemm__multiply__add_8h.html">hgemm_multiply_add.h</a> <a href="hgemm__multiply__add_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:hgemm__multiply__add_8h"><td class="mdescLeft"> </td><td class="mdescRight">Specialization implementing multiply-add operation on half-precision floating point fragments. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:hgemm__swizzle_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="hgemm__swizzle_8h.html">hgemm_swizzle.h</a> <a href="hgemm__swizzle_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:hgemm__swizzle_8h"><td class="mdescLeft"> </td><td class="mdescRight">Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:hgemm__traits_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="hgemm__traits_8h.html">hgemm_traits.h</a> <a href="hgemm__traits_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:hgemm__traits_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defies structural properties of half-precision GEMM computation. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:igemm__epilogue_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="igemm__epilogue_8h.html">igemm_epilogue.h</a> <a href="igemm__epilogue_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:igemm__epilogue_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point output matrix formats. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:igemm__global__tile_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="igemm__global__tile_8h.html">igemm_global_tile.h</a> <a href="igemm__global__tile_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:igemm__global__tile_8h"><td class="mdescLeft"> </td><td class="mdescRight">Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:igemm__multiply__add_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="igemm__multiply__add_8h.html">igemm_multiply_add.h</a> <a href="igemm__multiply__add_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:igemm__multiply__add_8h"><td class="mdescLeft"> </td><td class="mdescRight">Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:igemm__swizzle_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="igemm__swizzle_8h.html">igemm_swizzle.h</a> <a href="igemm__swizzle_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:igemm__swizzle_8h"><td class="mdescLeft"> </td><td class="mdescRight">Transposes a fragment of data containing packed 8-bit integer elements. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:igemm__traits_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="igemm__traits_8h.html">igemm_traits.h</a> <a href="igemm__traits_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:igemm__traits_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:linear__scaling_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="linear__scaling_8h.html">linear_scaling.h</a> <a href="linear__scaling_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:linear__scaling_8h"><td class="mdescLeft"> </td><td class="mdescRight">Implements the BLAS linear scaling function alpha*AB + beta*C. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:linear__scaling__device__ptr_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="linear__scaling__device__ptr_8h.html">linear_scaling_device_ptr.h</a> <a href="linear__scaling__device__ptr_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:linear__scaling__device__ptr_8h"><td class="mdescLeft"> </td><td class="mdescRight">Implements the BLAS linear scaling function alpha*AB + beta*C. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:scalar__or__pointer_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="scalar__or__pointer_8h.html">scalar_or_pointer.h</a> <a href="scalar__or__pointer_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:scalar__or__pointer_8h"><td class="mdescLeft"> </td><td class="mdescRight">Implements the BLAS linear scaling function alpha*AB + beta*C. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:sgemm__traits_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="sgemm__traits_8h.html">sgemm_traits.h</a> <a href="sgemm__traits_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:sgemm__traits_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defies structural properties of single-precision GEMM. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:thread__multiply__add_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="thread__multiply__add_8h.html">thread_multiply_add.h</a> <a href="thread__multiply__add_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:thread__multiply__add_8h"><td class="mdescLeft"> </td><td class="mdescRight">Template implementing matrix multiply-add operations on fragments. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:gemm_2threadblock__swizzle_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="gemm_2threadblock__swizzle_8h.html">gemm/threadblock_swizzle.h</a> <a href="gemm_2threadblock__swizzle_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:gemm_2threadblock__swizzle_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defies functors for mapping blockIdx to partitions of the GEMM computation. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:wmma__gemm__epilogue__traits_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="wmma__gemm__epilogue__traits_8h.html">wmma_gemm_epilogue_traits.h</a> <a href="wmma__gemm__epilogue__traits_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:wmma__gemm__epilogue__traits_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines structural properties of WMMA GEMM's epilogue phase. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:wmma__gemm__global__tile_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="wmma__gemm__global__tile_8h.html">wmma_gemm_global_tile.h</a> <a href="wmma__gemm__global__tile_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:wmma__gemm__global__tile_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines tile iterator traits for loading thread block-level tile from global memory. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:wmma__gemm__multiply__add_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="wmma__gemm__multiply__add_8h.html">wmma_gemm_multiply_add.h</a> <a href="wmma__gemm__multiply__add_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:wmma__gemm__multiply__add_8h"><td class="mdescLeft"> </td><td class="mdescRight">Implements warp-level matrix multiply-accumulate operation using CUDA WMMA API. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:wmma__gemm__shared__tile_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="wmma__gemm__shared__tile_8h.html">wmma_gemm_shared_tile.h</a> <a href="wmma__gemm__shared__tile_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:wmma__gemm__shared__tile_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defines iterator traits for efficiently loading and storing fragment to and from shared memory, specialized for WMMA GEMM. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:wmma__gemm__traits_8h"><td class="memItemLeft" align="right" valign="top">file  </td><td class="memItemRight" valign="bottom"><a class="el" href="wmma__gemm__traits_8h.html">wmma_gemm_traits.h</a> <a href="wmma__gemm__traits_8h_source.html">[code]</a></td></tr>
|
|
<tr class="memdesc:wmma__gemm__traits_8h"><td class="mdescLeft"> </td><td class="mdescRight">Defies structural properties of GEMM targeting WMMA API in CUDA. <br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
</table>
|
|
</div><!-- contents -->
|
|
<!-- start footer part -->
|
|
<hr class="footer"/><address class="footer"><small>
|
|
Generated on Fri Oct 26 2018 14:53:40 for Cutlass by  <a href="http://www.doxygen.org/index.html">
|
|
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
|
</a> 1.8.14
|
|
</small></address>
|
|
</body>
|
|
</html>
|