cutlass/docs/files.html

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.14"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>Cutlass: File List</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/x-mathjax-config">
  MathJax.Hub.Config({
    extensions: ["tex2jax.js"],
    jax: ["input/TeX","output/HTML-CSS"],
});
</script><script type="text/javascript" async src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
 <tbody>
 <tr style="height: 56px;">
  <td id="projectalign" style="padding-left: 0.5em;">
   <div id="projectname">Cutlass
   </div>
   <div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
  </td>
 </tr>
 </tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.14 -->
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
var searchBox = new SearchBox("searchBox", "search",false,'Search');
/* @license-end */
</script>
<script type="text/javascript" src="menudata.js"></script>
<script type="text/javascript" src="menu.js"></script>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
$(function() {
  initMenu('',true,false,'search.php','Search');
  $(document).ready(function() { init_search(); });
});
/* @license-end */</script>
<div id="main-nav"></div>
</div><!-- top -->
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
     onmouseover="return searchBox.OnSearchSelectShow()"
     onmouseout="return searchBox.OnSearchSelectHide()"
     onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>

<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
        name="MSearchResults" id="MSearchResults">
</iframe>
</div>

<div class="header">
  <div class="headertitle">
<div class="title">File List</div>  </div>
</div><!--header-->
<div class="contents">
<div class="textblock">Here is a list of all files with brief descriptions:</div><div class="directory">
<table class="directory">
<tr id="row_0_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="batched__reduction_8h_source.html"><span class="icondoc"></span></a><a class="el" href="batched__reduction_8h.html" target="_self">batched_reduction.h</a></td><td class="desc">Implements a software-pipelined efficient batched reduction. D = alpha * Reduction(A) + beta * C </td></tr>
<tr id="row_1_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="batched__reduction__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="batched__reduction__traits_8h.html" target="_self">batched_reduction_traits.h</a></td><td class="desc">Defines structural properties of complete batched reduction. D = alpha * Reduction(A) + beta * C </td></tr>
<tr id="row_2_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="clear__accumulators_8h_source.html"><span class="icondoc"></span></a><a class="el" href="clear__accumulators_8h.html" target="_self">clear_accumulators.h</a></td><td class="desc">Defines abstractions for efficiently clearing accumulator tiles </td></tr>
<tr id="row_3_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="complex_8h_source.html"><span class="icondoc"></span></a><a class="el" href="complex_8h.html" target="_self">complex.h</a></td><td class="desc"></td></tr>
<tr id="row_4_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="convert_8h_source.html"><span class="icondoc"></span></a><a class="el" href="convert_8h.html" target="_self">convert.h</a></td><td class="desc">Defines conversion operations among Fragments of different base type </td></tr>
<tr id="row_5_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="coord_8h_source.html"><span class="icondoc"></span></a><a class="el" href="coord_8h.html" target="_self">coord.h</a></td><td class="desc">A Coord is a coordinate of arbitrary rank into a tensor or matrix </td></tr>
<tr id="row_6_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="core__io_8h_source.html"><span class="icondoc"></span></a><a class="el" href="core__io_8h.html" target="_self">core_io.h</a></td><td class="desc">Helpers for printing cutlass/core objects </td></tr>
<tr id="row_7_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="cutlass_8h_source.html"><span class="icondoc"></span></a><a class="el" href="cutlass_8h.html" target="_self">cutlass.h</a></td><td class="desc">Basic include for CUTLASS macros </td></tr>
<tr id="row_8_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="cutlass__math_8h_source.html"><span class="icondoc"></span></a><a class="el" href="cutlass__math_8h.html" target="_self">cutlass_math.h</a></td><td class="desc">Math utilities </td></tr>
<tr id="row_9_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="debug_8h_source.html"><span class="icondoc"></span></a><a class="el" href="debug_8h.html" target="_self">debug.h</a></td><td class="desc">Debugging and logging functionality </td></tr>
<tr id="row_10_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device__gemm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device__gemm_8h.html" target="_self">device_gemm.h</a></td><td class="desc">Device level GEMM implemented by more than one kernels </td></tr>
<tr id="row_11_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device__gemm__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device__gemm__traits_8h.html" target="_self">device_gemm_traits.h</a></td><td class="desc"></td></tr>
<tr id="row_12_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="dgemm__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="dgemm__traits_8h.html" target="_self">dgemm_traits.h</a></td><td class="desc">Defines structural traits of double-precision GEMM </td></tr>
<tr id="row_13_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="fp16__sgemm__multiply__add_8h_source.html"><span class="icondoc"></span></a><a class="el" href="fp16__sgemm__multiply__add_8h.html" target="_self">fp16_sgemm_multiply_add.h</a></td><td class="desc">Template implementing matrix multiply-add operations on fragments </td></tr>
<tr id="row_14_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="fp16__sgemm__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="fp16__sgemm__traits_8h.html" target="_self">fp16_sgemm_traits.h</a></td><td class="desc">Defies structural properties of single-precision GEMM where any number of the input/output could be fp16 or fp32. The accumulator type stays in fp32 </td></tr>
<tr id="row_15_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="fragment_8h_source.html"><span class="icondoc"></span></a><a class="el" href="fragment_8h.html" target="_self">fragment.h</a></td><td class="desc">Defines Fragment, a statically-sized array for storing parts of matrices within a thread's registers </td></tr>
<tr id="row_16_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="fragment__multiply__add_8h_source.html"><span class="icondoc"></span></a><a class="el" href="fragment__multiply__add_8h.html" target="_self">fragment_multiply_add.h</a></td><td class="desc">Defines multiply-add operations on fragments within a thread </td></tr>
<tr id="row_17_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm_8h.html" target="_self">gemm.h</a></td><td class="desc">Implements a software-pipelined efficient GEMM </td></tr>
<tr id="row_18_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__config_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__config_8h.html" target="_self">gemm_config.h</a></td><td class="desc">Defines properties of GEMM computation that impose some constraints on caller </td></tr>
<tr id="row_19_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__coord_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__coord_8h.html" target="_self">gemm_coord.h</a></td><td class="desc">GemmCoord is a structure derived from <a class="el" href="structcutlass_1_1Coord.html">Coord&lt;4&gt;</a> that specifies a location within the coordinate system of a GEMM problem </td></tr>
<tr id="row_20_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__desc_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__desc_8h.html" target="_self">gemm_desc.h</a></td><td class="desc">Implements a software-pipelined efficient GEMM </td></tr>
<tr id="row_21_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__epilogue_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__epilogue_8h.html" target="_self">gemm_epilogue.h</a></td><td class="desc">Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product </td></tr>
<tr id="row_22_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__epilogue__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__epilogue__traits_8h.html" target="_self">gemm_epilogue_traits.h</a></td><td class="desc">Defines structural properties of the GEMM epilogue </td></tr>
<tr id="row_23_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__global__stream_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__global__stream_8h.html" target="_self">gemm_global_stream.h</a></td><td class="desc">Implements efficient loading of the thread block-level tile from global memory and storing to shared memory </td></tr>
<tr id="row_24_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__global__tile_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__global__tile_8h.html" target="_self">gemm_global_tile.h</a></td><td class="desc">Defines iterators for efficiently loading and storing to global memory </td></tr>
<tr id="row_25_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__operand_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__operand_8h.html" target="_self">gemm_operand.h</a></td><td class="desc">Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory </td></tr>
<tr id="row_26_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__shared__stream_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__shared__stream_8h.html" target="_self">gemm_shared_stream.h</a></td><td class="desc">Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEMM pipeline </td></tr>
<tr id="row_27_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__shared__tile_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__shared__tile_8h.html" target="_self">gemm_shared_tile.h</a></td><td class="desc">Defines iterators for efficiently loading and storing tiles to and from shared memory </td></tr>
<tr id="row_28_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__stream__pair_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__stream__pair_8h.html" target="_self">gemm_stream_pair.h</a></td><td class="desc">Defines a pair of GEMM tile streams </td></tr>
<tr id="row_29_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__traits_8h.html" target="_self">gemm_traits.h</a></td><td class="desc">Defines structural properties of complete GEMM computation </td></tr>
<tr id="row_30_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="hgemm__global__tile_8h_source.html"><span class="icondoc"></span></a><a class="el" href="hgemm__global__tile_8h.html" target="_self">hgemm_global_tile.h</a></td><td class="desc">Tile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits </td></tr>
<tr id="row_31_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="hgemm__multiply__add_8h_source.html"><span class="icondoc"></span></a><a class="el" href="hgemm__multiply__add_8h.html" target="_self">hgemm_multiply_add.h</a></td><td class="desc">Specialization implementing multiply-add operation on half-precision floating point fragments </td></tr>
<tr id="row_32_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="hgemm__swizzle_8h_source.html"><span class="icondoc"></span></a><a class="el" href="hgemm__swizzle_8h.html" target="_self">hgemm_swizzle.h</a></td><td class="desc">Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands </td></tr>
<tr id="row_33_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="hgemm__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="hgemm__traits_8h.html" target="_self">hgemm_traits.h</a></td><td class="desc">Defies structural properties of half-precision GEMM computation </td></tr>
<tr id="row_34_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="igemm__epilogue_8h_source.html"><span class="icondoc"></span></a><a class="el" href="igemm__epilogue_8h.html" target="_self">igemm_epilogue.h</a></td><td class="desc">Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point output matrix formats </td></tr>
<tr id="row_35_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="igemm__global__tile_8h_source.html"><span class="icondoc"></span></a><a class="el" href="igemm__global__tile_8h.html" target="_self">igemm_global_tile.h</a></td><td class="desc">Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory </td></tr>
<tr id="row_36_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="igemm__multiply__add_8h_source.html"><span class="icondoc"></span></a><a class="el" href="igemm__multiply__add_8h.html" target="_self">igemm_multiply_add.h</a></td><td class="desc">Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction </td></tr>
<tr id="row_37_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="igemm__swizzle_8h_source.html"><span class="icondoc"></span></a><a class="el" href="igemm__swizzle_8h.html" target="_self">igemm_swizzle.h</a></td><td class="desc">Transposes a fragment of data containing packed 8-bit integer elements </td></tr>
<tr id="row_38_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="igemm__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="igemm__traits_8h.html" target="_self">igemm_traits.h</a></td><td class="desc">Defies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary </td></tr>
<tr id="row_39_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="iterator__access_8h_source.html"><span class="icondoc"></span></a><a class="el" href="iterator__access_8h.html" target="_self">iterator_access.h</a></td><td class="desc">Free functions for loading and storing to implementations of tile iteartor concepts </td></tr>
<tr id="row_40_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="kernel__launch_8h_source.html"><span class="icondoc"></span></a><a class="el" href="kernel__launch_8h.html" target="_self">kernel_launch.h</a></td><td class="desc">Defines structures and helpers to launch CUDA kernels within CUTLASS </td></tr>
<tr id="row_41_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="linear__scaling_8h_source.html"><span class="icondoc"></span></a><a class="el" href="linear__scaling_8h.html" target="_self">linear_scaling.h</a></td><td class="desc">Implements the BLAS linear scaling function alpha*AB + beta*C </td></tr>
<tr id="row_42_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="linear__scaling__device__ptr_8h_source.html"><span class="icondoc"></span></a><a class="el" href="linear__scaling__device__ptr_8h.html" target="_self">linear_scaling_device_ptr.h</a></td><td class="desc">Implements the BLAS linear scaling function alpha*AB + beta*C </td></tr>
<tr id="row_43_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="load__store_8h_source.html"><span class="icondoc"></span></a><a class="el" href="load__store_8h.html" target="_self">load_store.h</a></td><td class="desc">Defines abstractions for efficiently loading and storing vectors to memory </td></tr>
<tr id="row_44_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="matrix__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="matrix__traits_8h.html" target="_self">matrix_traits.h</a></td><td class="desc">Defines properties of matrices used to denote layout and operands to GEMM kernels </td></tr>
<tr id="row_45_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="numeric__types_8h_source.html"><span class="icondoc"></span></a><a class="el" href="numeric__types_8h.html" target="_self">numeric_types.h</a></td><td class="desc"></td></tr>
<tr id="row_46_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="pair_8h_source.html"><span class="icondoc"></span></a><a class="el" href="pair_8h.html" target="_self">pair.h</a></td><td class="desc">Defines a pair&lt;&gt; </td></tr>
<tr id="row_47_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="performance__tuning_8h_source.html"><span class="icondoc"></span></a><a class="el" href="performance__tuning_8h.html" target="_self">performance_tuning.h</a></td><td class="desc"></td></tr>
<tr id="row_48_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="platform_8h_source.html"><span class="icondoc"></span></a><a class="el" href="platform_8h.html" target="_self">platform.h</a></td><td class="desc">C++ features that may be otherwise unimplemented for CUDA device functions </td></tr>
<tr id="row_49_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="predicate__vector_8h_source.html"><span class="icondoc"></span></a><a class="el" href="predicate__vector_8h.html" target="_self">predicate_vector.h</a></td><td class="desc">Defines container classes and iterators for managing a statically sized vector of boolean predicates </td></tr>
<tr id="row_50_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="reshape__tile_8h_source.html"><span class="icondoc"></span></a><a class="el" href="reshape__tile_8h.html" target="_self">reshape_tile.h</a></td><td class="desc">Defines a type for restructuring a tile </td></tr>
<tr id="row_51_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="scalar__or__pointer_8h_source.html"><span class="icondoc"></span></a><a class="el" href="scalar__or__pointer_8h.html" target="_self">scalar_or_pointer.h</a></td><td class="desc">Implements the BLAS linear scaling function alpha*AB + beta*C </td></tr>
<tr id="row_52_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="sgemm__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="sgemm__traits_8h.html" target="_self">sgemm_traits.h</a></td><td class="desc">Defies structural properties of single-precision GEMM </td></tr>
<tr id="row_53_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="shape_8h_source.html"><span class="icondoc"></span></a><a class="el" href="shape_8h.html" target="_self">shape.h</a></td><td class="desc">Defines Shape implementing the Layout concept for representing a 4D hypercube of objects </td></tr>
<tr id="row_54_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__ref_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__ref_8h.html" target="_self">tensor_ref.h</a></td><td class="desc">Defines a structure containing strides, bounds, and a pointer to tensor data </td></tr>
<tr id="row_55_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__ref__collection_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__ref__collection_8h.html" target="_self">tensor_ref_collection.h</a></td><td class="desc">Introduces TensorRefCollection concept and defines TensorRefBatch and TensorRefArray </td></tr>
<tr id="row_56_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__view_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__view_8h.html" target="_self">tensor_view.h</a></td><td class="desc">Defines a structure containing strides and a pointer to tensor data </td></tr>
<tr id="row_57_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="thread__multiply__add_8h_source.html"><span class="icondoc"></span></a><a class="el" href="thread__multiply__add_8h.html" target="_self">thread_multiply_add.h</a></td><td class="desc">Template implementing matrix multiply-add operations on fragments </td></tr>
<tr id="row_58_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm_2threadblock__swizzle_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm_2threadblock__swizzle_8h.html" target="_self">gemm/threadblock_swizzle.h</a></td><td class="desc">Defies functors for mapping blockIdx to partitions of the GEMM computation </td></tr>
<tr id="row_59_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="reduction_2threadblock__swizzle_8h_source.html"><span class="icondoc"></span></a><a class="el" href="reduction_2threadblock__swizzle_8h.html" target="_self">reduction/threadblock_swizzle.h</a></td><td class="desc">Defies functors for mapping blockIdx to partitions of the batched reduction computation </td></tr>
<tr id="row_60_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tile__allocation_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tile__allocation_8h.html" target="_self">tile_allocation.h</a></td><td class="desc">Defines a fragment based on a Shape&lt;&gt; template </td></tr>
<tr id="row_61_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tile__coord_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tile__coord_8h.html" target="_self">tile_coord.h</a></td><td class="desc">Defines a coordinate used for the CUTLASS 4-D tile structure </td></tr>
<tr id="row_62_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tile__iterator_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tile__iterator_8h.html" target="_self">tile_iterator.h</a></td><td class="desc">Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently </td></tr>
<tr id="row_63_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tile__stream_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tile__stream_8h.html" target="_self">tile_stream.h</a></td><td class="desc">Implements the tile stream concept, composing an iterator with a transformation. Offers split-phase semantics, separating the initiation of an asynchronous memory operation with a fence forcing it to complete </td></tr>
<tr id="row_64_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tile__traits__standard_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tile__traits__standard_8h.html" target="_self">tile_traits_standard.h</a></td><td class="desc">Defines tile traits for several tile partitioning arrangements of threads expected to achieve efficient streaming performance </td></tr>
<tr id="row_65_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="vector_8h_source.html"><span class="icondoc"></span></a><a class="el" href="vector_8h.html" target="_self">vector.h</a></td><td class="desc">Defines a 1D vector of elements held in the registers of each thread </td></tr>
<tr id="row_66_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__gemm__epilogue__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__gemm__epilogue__traits_8h.html" target="_self">wmma_gemm_epilogue_traits.h</a></td><td class="desc">Defines structural properties of WMMA GEMM's epilogue phase </td></tr>
<tr id="row_67_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__gemm__global__tile_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__gemm__global__tile_8h.html" target="_self">wmma_gemm_global_tile.h</a></td><td class="desc">Defines tile iterator traits for loading thread block-level tile from global memory </td></tr>
<tr id="row_68_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__gemm__multiply__add_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__gemm__multiply__add_8h.html" target="_self">wmma_gemm_multiply_add.h</a></td><td class="desc">Implements warp-level matrix multiply-accumulate operation using CUDA WMMA API </td></tr>
<tr id="row_69_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__gemm__shared__tile_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__gemm__shared__tile_8h.html" target="_self">wmma_gemm_shared_tile.h</a></td><td class="desc">Defines iterator traits for efficiently loading and storing fragment to and from shared memory, specialized for WMMA GEMM </td></tr>
<tr id="row_70_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__gemm__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__gemm__traits_8h.html" target="_self">wmma_gemm_traits.h</a></td><td class="desc">Defies structural properties of GEMM targeting WMMA API in CUDA </td></tr>
<tr id="row_71_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__matrix_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__matrix_8h.html" target="_self">wmma_matrix.h</a></td><td class="desc">Abstractions for loading and storing matrices using the CUDA WMMA API </td></tr>
<tr id="row_72_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="zip__fragment_8h_source.html"><span class="icondoc"></span></a><a class="el" href="zip__fragment_8h.html" target="_self">zip_fragment.h</a></td><td class="desc">Models a pair of fragments </td></tr>
<tr id="row_73_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="zip__tensor__ref_8h_source.html"><span class="icondoc"></span></a><a class="el" href="zip__tensor__ref_8h.html" target="_self">zip_tensor_ref.h</a></td><td class="desc">Defines a structure containing a pair of TensorRef-like objects </td></tr>
<tr id="row_74_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="zip__tile__iterator_8h_source.html"><span class="icondoc"></span></a><a class="el" href="zip__tile__iterator_8h.html" target="_self">zip_tile_iterator.h</a></td><td class="desc">Constructs an iterator that owns two tile iterator instances </td></tr>
</table>
</div><!-- directory -->
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated on Fri Oct 26 2018 14:53:41 for Cutlass by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.8.14
</small></address>
</body>
</html>