529 lines
		
	
	
		
			39 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
		
		
			
		
	
	
			529 lines
		
	
	
		
			39 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
|   | <!doctype html> | |||
|  | <html class="no-js" lang="en"> | |||
|  |   <head><meta charset="utf-8"/> | |||
|  |     <meta name="viewport" content="width=device-width,initial-scale=1"/> | |||
|  |     <meta name="color-scheme" content="light dark"><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> | |||
|  | <link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Installation" href="install.html" /> | |||
|  |         <link rel="canonical" href="docs/index.html" /> | |||
|  | 
 | |||
|  |     <!-- Generated with Sphinx 6.1.3 and Furo 2023.03.27 --> | |||
|  |         <title>CUTLASS Python</title> | |||
|  |       <link rel="stylesheet" type="text/css" href="_static/pygments.css" /> | |||
|  |     <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=fad236701ea90a88636c2a8c73b44ae642ed2a53" /> | |||
|  |     <link rel="stylesheet" type="text/css" href="_static/copybutton.css" /> | |||
|  |     <link rel="stylesheet" type="text/css" href="_static/tabs.css" /> | |||
|  |     <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" /> | |||
|  |      | |||
|  |      | |||
|  | 
 | |||
|  | 
 | |||
|  | <style> | |||
|  |   body { | |||
|  |     --color-code-background: #eeffcc; | |||
|  |   --color-code-foreground: black; | |||
|  |   --color-brand-primary: #76B900; | |||
|  |   --color-brand-content: #76B900; | |||
|  |    | |||
|  |   } | |||
|  |   @media not print { | |||
|  |     body[data-theme="dark"] { | |||
|  |       --color-code-background: #272822; | |||
|  |   --color-code-foreground: #f8f8f2; | |||
|  |   --color-brand-primary: #76B900; | |||
|  |   --color-brand-content: #76B900; | |||
|  |    | |||
|  |     } | |||
|  |     @media (prefers-color-scheme: dark) { | |||
|  |       body:not([data-theme="light"]) { | |||
|  |         --color-code-background: #272822; | |||
|  |   --color-code-foreground: #f8f8f2; | |||
|  |   --color-brand-primary: #76B900; | |||
|  |   --color-brand-content: #76B900; | |||
|  |    | |||
|  |       } | |||
|  |     } | |||
|  |   } | |||
|  | </style></head> | |||
|  |   <body> | |||
|  |      | |||
|  |     <script> | |||
|  |       document.body.dataset.theme = localStorage.getItem("theme") || "auto"; | |||
|  |     </script> | |||
|  |      | |||
|  | 
 | |||
|  | <svg xmlns="http://www.w3.org/2000/svg" style="display: none;"> | |||
|  |   <symbol id="svg-toc" viewBox="0 0 24 24"> | |||
|  |     <title>Contents</title> | |||
|  |     <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024"> | |||
|  |       <path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/> | |||
|  |     </svg> | |||
|  |   </symbol> | |||
|  |   <symbol id="svg-menu" viewBox="0 0 24 24"> | |||
|  |     <title>Menu</title> | |||
|  |     <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" | |||
|  |       stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu"> | |||
|  |       <line x1="3" y1="12" x2="21" y2="12"></line> | |||
|  |       <line x1="3" y1="6" x2="21" y2="6"></line> | |||
|  |       <line x1="3" y1="18" x2="21" y2="18"></line> | |||
|  |     </svg> | |||
|  |   </symbol> | |||
|  |   <symbol id="svg-arrow-right" viewBox="0 0 24 24"> | |||
|  |     <title>Expand</title> | |||
|  |     <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" | |||
|  |       stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right"> | |||
|  |       <polyline points="9 18 15 12 9 6"></polyline> | |||
|  |     </svg> | |||
|  |   </symbol> | |||
|  |   <symbol id="svg-sun" viewBox="0 0 24 24"> | |||
|  |     <title>Light mode</title> | |||
|  |     <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" | |||
|  |       stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun"> | |||
|  |       <circle cx="12" cy="12" r="5"></circle> | |||
|  |       <line x1="12" y1="1" x2="12" y2="3"></line> | |||
|  |       <line x1="12" y1="21" x2="12" y2="23"></line> | |||
|  |       <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line> | |||
|  |       <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line> | |||
|  |       <line x1="1" y1="12" x2="3" y2="12"></line> | |||
|  |       <line x1="21" y1="12" x2="23" y2="12"></line> | |||
|  |       <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line> | |||
|  |       <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line> | |||
|  |     </svg> | |||
|  |   </symbol> | |||
|  |   <symbol id="svg-moon" viewBox="0 0 24 24"> | |||
|  |     <title>Dark mode</title> | |||
|  |     <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" | |||
|  |       stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon"> | |||
|  |       <path stroke="none" d="M0 0h24v24H0z" fill="none" /> | |||
|  |       <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" /> | |||
|  |     </svg> | |||
|  |   </symbol> | |||
|  |   <symbol id="svg-sun-half" viewBox="0 0 24 24"> | |||
|  |     <title>Auto light/dark mode</title> | |||
|  |     <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" | |||
|  |       stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow"> | |||
|  |       <path stroke="none" d="M0 0h24v24H0z" fill="none"/> | |||
|  |       <circle cx="12" cy="12" r="9" /> | |||
|  |       <path d="M13 12h5" /> | |||
|  |       <path d="M13 15h4" /> | |||
|  |       <path d="M13 18h1" /> | |||
|  |       <path d="M13 9h4" /> | |||
|  |       <path d="M13 6h1" /> | |||
|  |     </svg> | |||
|  |   </symbol> | |||
|  | </svg> | |||
|  | 
 | |||
|  | <input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation"> | |||
|  | <input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc"> | |||
|  | <label class="overlay sidebar-overlay" for="__navigation"> | |||
|  |   <div class="visually-hidden">Hide navigation sidebar</div> | |||
|  | </label> | |||
|  | <label class="overlay toc-overlay" for="__toc"> | |||
|  |   <div class="visually-hidden">Hide table of contents sidebar</div> | |||
|  | </label> | |||
|  | 
 | |||
|  | 
 | |||
|  | 
 | |||
|  | <div class="page"> | |||
|  |   <header class="mobile-header"> | |||
|  |     <div class="header-left"> | |||
|  |       <label class="nav-overlay-icon" for="__navigation"> | |||
|  |         <div class="visually-hidden">Toggle site navigation sidebar</div> | |||
|  |         <i class="icon"><svg><use href="#svg-menu"></use></svg></i> | |||
|  |       </label> | |||
|  |     </div> | |||
|  |     <div class="header-center"> | |||
|  |       <a href="#"><div class="brand">CUTLASS Python</div></a> | |||
|  |     </div> | |||
|  |     <div class="header-right"> | |||
|  |       <div class="theme-toggle-container theme-toggle-header"> | |||
|  |         <button class="theme-toggle"> | |||
|  |           <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div> | |||
|  |           <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg> | |||
|  |           <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg> | |||
|  |           <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg> | |||
|  |         </button> | |||
|  |       </div> | |||
|  |       <label class="toc-overlay-icon toc-header-icon" for="__toc"> | |||
|  |         <div class="visually-hidden">Toggle table of contents sidebar</div> | |||
|  |         <i class="icon"><svg><use href="#svg-toc"></use></svg></i> | |||
|  |       </label> | |||
|  |     </div> | |||
|  |   </header> | |||
|  |   <aside class="sidebar-drawer"> | |||
|  |     <div class="sidebar-container"> | |||
|  |        | |||
|  |       <div class="sidebar-sticky"><a class="sidebar-brand" href="#"> | |||
|  |    | |||
|  |   <div class="sidebar-logo-container"> | |||
|  |     <img class="sidebar-logo only-light" src="_static/cutlass-logo-small.png" alt="Light Logo"/> | |||
|  |     <img class="sidebar-logo only-dark" src="_static/cutlass-logo-small.png" alt="Dark Logo"/> | |||
|  |   </div> | |||
|  |    | |||
|  |   <span class="sidebar-brand-text">CUTLASS Python</span> | |||
|  |    | |||
|  | </a><form class="sidebar-search-container" method="get" action="search.html" role="search"> | |||
|  |   <input class="sidebar-search" placeholder="Search" name="q" aria-label="Search"> | |||
|  |   <input type="hidden" name="check_keywords" value="yes"> | |||
|  |   <input type="hidden" name="area" value="default"> | |||
|  | </form> | |||
|  | <div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree"> | |||
|  |   <ul class="current"> | |||
|  | <li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Home</a></li> | |||
|  | </ul> | |||
|  | <p class="caption" role="heading"><span class="caption-text">Getting Started:</span></p> | |||
|  | <ul> | |||
|  | <li class="toctree-l1"><a class="reference internal" href="install.html">Installation</a></li> | |||
|  | <li class="toctree-l1"><a class="reference internal" href="externals/00_basic_gemm.html">Getting Started</a></li> | |||
|  | <li class="toctree-l1"><a class="reference internal" href="contribute.html">Contributing</a></li> | |||
|  | </ul> | |||
|  | <p class="caption" role="heading"><span class="caption-text">Python Documentation:</span></p> | |||
|  | <ul> | |||
|  | <li class="toctree-l1 has-children"><a class="reference internal" href="modules.html">CUTLASS Python API</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul> | |||
|  | <li class="toctree-l2 has-children"><a class="reference internal" href="cutlass.html">CUTLASS</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul> | |||
|  | <li class="toctree-l3"><a class="reference internal" href="cutlass.emit.html">Emitters</a></li> | |||
|  | <li class="toctree-l3"><a class="reference internal" href="cutlass.op.html">Operations</a></li> | |||
|  | <li class="toctree-l3"><a class="reference internal" href="cutlass.utils.html">Utilities</a></li> | |||
|  | </ul> | |||
|  | </li> | |||
|  | </ul> | |||
|  | </li> | |||
|  | </ul> | |||
|  | <p class="caption" role="heading"><span class="caption-text">Examples and Tutorials:</span></p> | |||
|  | <ul> | |||
|  | <li class="toctree-l1 has-children"><a class="reference internal" href="examples.html">Examples</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul> | |||
|  | <li class="toctree-l2"><a class="reference internal" href="externals/00_basic_gemm.html">Basic GEMM</a></li> | |||
|  | <li class="toctree-l2"><a class="reference internal" href="externals/01_epilogue.html">Epilogue</a></li> | |||
|  | <li class="toctree-l2"><a class="reference internal" href="externals/02_pytorch_extension_grouped_gemm.html">PyTorch Extension</a></li> | |||
|  | </ul> | |||
|  | </li> | |||
|  | </ul> | |||
|  | <p class="caption" role="heading"><span class="caption-text">Reference:</span></p> | |||
|  | <ul> | |||
|  | <li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/cutlass">Github</a></li> | |||
|  | </ul> | |||
|  | 
 | |||
|  | </div> | |||
|  | </div> | |||
|  | 
 | |||
|  |       </div> | |||
|  |        | |||
|  |     </div> | |||
|  |   </aside> | |||
|  |   <div class="main"> | |||
|  |     <div class="content"> | |||
|  |       <div class="article-container"> | |||
|  |         <a href="#" class="back-to-top muted-link"> | |||
|  |           <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"> | |||
|  |             <path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path> | |||
|  |           </svg> | |||
|  |           <span>Back to top</span> | |||
|  |         </a> | |||
|  |         <div class="content-icon-container"> | |||
|  |            | |||
|  | <div class="theme-toggle-container theme-toggle-content"> | |||
|  |             <button class="theme-toggle"> | |||
|  |               <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div> | |||
|  |               <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg> | |||
|  |               <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg> | |||
|  |               <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg> | |||
|  |             </button> | |||
|  |           </div> | |||
|  |           <label class="toc-overlay-icon toc-content-icon" for="__toc"> | |||
|  |             <div class="visually-hidden">Toggle table of contents sidebar</div> | |||
|  |             <i class="icon"><svg><use href="#svg-toc"></use></svg></i> | |||
|  |           </label> | |||
|  |         </div> | |||
|  |         <article role="main"> | |||
|  |           <section id="cutlass-python-interface"> | |||
|  | <h1>CUTLASS Python Interface<a class="headerlink" href="#cutlass-python-interface" title="Permalink to this heading">#</a></h1> | |||
|  | <p>The CUTLASS Python interface enables one to compile and run CUTLASS operations from within Python.</p> | |||
|  | <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">cutlass</span> | |||
|  | <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> | |||
|  | 
 | |||
|  | <span class="n">plan</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">Gemm</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">float16</span><span class="p">,</span> <span class="n">layout</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">)</span> | |||
|  | <span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">,</span> <span class="n">C</span><span class="p">,</span> <span class="n">D</span> <span class="o">=</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">((</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">4096</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">float16</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">)]</span> | |||
|  | <span class="n">plan</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">,</span> <span class="n">C</span><span class="p">,</span> <span class="n">D</span><span class="p">)</span> | |||
|  | </pre></div> | |||
|  | </div> | |||
|  | <p><strong>NOTE:</strong> The CUTLASS Python interface is currently an experimental release. The API may change in the future. | |||
|  | We welcome feedback from the community.</p> | |||
|  | <section id="overview"> | |||
|  | <h2>Overview<a class="headerlink" href="#overview" title="Permalink to this heading">#</a></h2> | |||
|  | <p>The CUTLASS Python interface aims to provide an ease-of-use interface for using CUTLASS via Python. Toward this goal, | |||
|  | the CUTLASS Python interface attempts to:</p> | |||
|  | <ul class="simple"> | |||
|  | <li><p>Present high-level interfaces for operators that require only few parameters</p></li> | |||
|  | <li><p>Select sensible default configurations for an operator given the parameters that have been specified</p></li> | |||
|  | <li><p>Enumerate configurations for users that are known to work in a given setting</p></li> | |||
|  | <li><p>Reduce the occurrence of C++ compile-time errors in favor of descriptive Python exceptions</p></li> | |||
|  | <li><p>Make it easy to export CUTLASS kernels to framework extensions (e.g., PyTorch CUDA extensions)</p></li> | |||
|  | </ul> | |||
|  | <section id="non-goals"> | |||
|  | <h3>Non-goals<a class="headerlink" href="#non-goals" title="Permalink to this heading">#</a></h3> | |||
|  | <p>The CUTLASS Python interface does not intended to:</p> | |||
|  | <p><strong>Select optimal kernel configurations.</strong> | |||
|  | As an ease-of-use interface, the default selections for operator parameters made by the CUTLASS Python interface may | |||
|  | not achieve the highest possible performance in all scenarios. Users wishing to achieve the highest performance possible | |||
|  | should consider profile different combinations of configuration parameters, or use a library such as <a class="reference external" href="https://developer.nvidia.com/cublas">cuBLAS</a> | |||
|  | that contains heuristics for selecting kernels.</p> | |||
|  | <p><strong>Act as a fast container for CUTLASS kernels.</strong> | |||
|  | The CUTLASS Python interface does not strive to minimize overhead in its Python functions surrounding the running of a kernel. | |||
|  | Those wishing to deploy a CUTLASS kernel should consider either using the C++ emitted by the Python interface directly, or using | |||
|  | one of the CUTLASS emitters for automatically creating a framework extension for the kernel (e.g., a PyTorch CUDA extension).</p> | |||
|  | <p><strong>Act as a Python-to-CUDA-kernel JIT compilation engine.</strong> | |||
|  | The CUTLASS Python interface intends to enable one to use CUTLASS via Python. It can be used by frameworks for JIT compiling | |||
|  | Python to CUDA kernels, but does not set out to be such a framework.</p> | |||
|  | </section> | |||
|  | <section id="comparison-to-pycutlass"> | |||
|  | <h3>Comparison to PyCUTLASS<a class="headerlink" href="#comparison-to-pycutlass" title="Permalink to this heading">#</a></h3> | |||
|  | <p>The CUTLASS Python interface builds atop CUTLASS’s <a class="reference external" href="https://github.com/NVIDIA/cutlass/tree/v3.0.0/tools/library/scripts/pycutlass">PyCUTLASS</a> library. PyCUTLASS enables | |||
|  | one to declare, compile, and run GEMMs, convolutions, and grouped GEMM operators with nearly the same configuration | |||
|  | space as CUTLASS’s C++ interface. While this flexibility enables one to achieve the similar levels of functionality | |||
|  | as available in CUTLASS’s C++ interface, it comes with the burden of needing to specify many configuration parameters | |||
|  | to operators – similar to what one must do in specifying template parameters to operations in CUTLASS’s C++ interface.</p> | |||
|  | <p>In contrast, the CUTLASS Python interface aims to provide a higher-level API for declaring, emitting, and compiling | |||
|  | kernels that does not require exhaustively defining template parameters.</p> | |||
|  | <section id="transitioning-from-pycutlass"> | |||
|  | <h4>Transitioning from PyCUTLASS<a class="headerlink" href="#transitioning-from-pycutlass" title="Permalink to this heading">#</a></h4> | |||
|  | <p>At present, existing PyCUTLASS functionality remains available via the CUTLASS Python interface. One can | |||
|  | continue to use PyCUTLASS by replacing references to the PyCUTLASS <code class="docutils literal notranslate"><span class="pre">cutlass</span></code> module with <code class="docutils literal notranslate"><span class="pre">cutlass_bindings</span></code> | |||
|  | and the PyCUTLASS <code class="docutils literal notranslate"><span class="pre">pycutlass</span></code> module with <code class="docutils literal notranslate"><span class="pre">cutlass.backend</span></code>.</p> | |||
|  | <p>For example, the following code using PyCUTLASS:</p> | |||
|  | <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">pycutlass</span> | |||
|  | <span class="kn">import</span> <span class="nn">cutlass</span> | |||
|  | 
 | |||
|  | <span class="n">math_inst</span> <span class="o">=</span> <span class="n">pycutlass</span><span class="o">.</span><span class="n">MathInstruction</span><span class="p">(</span> | |||
|  |     <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> | |||
|  |     <span class="n">cutlass</span><span class="o">.</span><span class="n">OpClass</span><span class="o">.</span><span class="n">Simt</span><span class="p">,</span> <span class="n">pycutlass</span><span class="o">.</span><span class="n">MathOperation</span><span class="o">.</span><span class="n">multiply_add</span> | |||
|  | <span class="p">)</span> | |||
|  | </pre></div> | |||
|  | </div> | |||
|  | <p>can work with the Python interface via:</p> | |||
|  | <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">cutlass.backend</span> <span class="k">as</span> <span class="nn">pycutlass</span> | |||
|  | <span class="kn">import</span> <span class="nn">cutlass_bindings</span> | |||
|  | 
 | |||
|  | <span class="n">math_inst</span> <span class="o">=</span> <span class="n">pycutlass</span><span class="o">.</span><span class="n">MathInstruction</span><span class="p">(</span> | |||
|  |     <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">cutlass_bindings</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> <span class="n">cutlass_bindings</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> <span class="n">cutlass_bindings</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> | |||
|  |     <span class="n">cutlass_bindings</span><span class="o">.</span><span class="n">OpClass</span><span class="o">.</span><span class="n">Simt</span><span class="p">,</span> <span class="n">pycutlass</span><span class="o">.</span><span class="n">MathOperation</span><span class="o">.</span><span class="n">multiply_add</span> | |||
|  | <span class="p">)</span> | |||
|  | </pre></div> | |||
|  | </div> | |||
|  | <p><strong>NOTE:</strong> backwards compatibility of <code class="docutils literal notranslate"><span class="pre">cutlass.backend</span></code> with <code class="docutils literal notranslate"><span class="pre">pycutlass</span></code> will not be maintained moving forward.</p> | |||
|  | </section> | |||
|  | </section> | |||
|  | </section> | |||
|  | <section id="current-functionality"> | |||
|  | <h2>Current functionality<a class="headerlink" href="#current-functionality" title="Permalink to this heading">#</a></h2> | |||
|  | <p>The CUTLASS Python interface currently supports the following operations:</p> | |||
|  | <ul class="simple"> | |||
|  | <li><p>GEMMs</p></li> | |||
|  | <li><p>GEMMs with fused elementwise epilogues (e.g., ReLU) (for pre-SM90 kernels)</p></li> | |||
|  | <li><p>Stream K swizzling (for pre-SM90 kernels)</p></li> | |||
|  | <li><p>Grouped GEMM (for pre-SM90 kernels)</p></li> | |||
|  | </ul> | |||
|  | </section> | |||
|  | <section id="getting-started"> | |||
|  | <h2>Getting started<a class="headerlink" href="#getting-started" title="Permalink to this heading">#</a></h2> | |||
|  | <p>We recommend using the CUTLASS Python interface via one of the Docker images located in the <a href="#id1"><span class="problematic" id="id2">docker</span></a> directory.</p> | |||
|  | <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>build<span class="w"> </span>-t<span class="w"> </span>cutlass-cuda12.1:latest<span class="w"> </span>-f<span class="w"> </span>docker/Dockerfile-cuda12.1-pytorch<span class="w"> </span>. | |||
|  | docker<span class="w"> </span>run<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>-it<span class="w"> </span>--rm<span class="w"> </span>cutlass-cuda12.1:latest | |||
|  | </pre></div> | |||
|  | </div> | |||
|  | <p>The CUTLASS Python interface has been tested with CUDA 11.8, 12.0, and 12.1 on Python 3.8.10 and 3.9.7.</p> | |||
|  | <section id="optional-environment-variables"> | |||
|  | <h3>Optional environment variables<a class="headerlink" href="#optional-environment-variables" title="Permalink to this heading">#</a></h3> | |||
|  | <p>Prior to installing the CUTLASS Python interface, one may optionally set the following environment variables:</p> | |||
|  | <ul class="simple"> | |||
|  | <li><p><code class="docutils literal notranslate"><span class="pre">CUTLASS_PATH</span></code>: the path to the cloned CUTLASS repository</p></li> | |||
|  | <li><p><code class="docutils literal notranslate"><span class="pre">CUDA_INSTALL_PATH</span></code>: the path to the installation of CUDA</p></li> | |||
|  | </ul> | |||
|  | <p>If these environment variables are not set, the installation process will infer them to be the following:</p> | |||
|  | <ul class="simple"> | |||
|  | <li><p><code class="docutils literal notranslate"><span class="pre">CUTLASS_PATH</span></code>: one directory level above the current directory (i.e., <code class="docutils literal notranslate"><span class="pre">$(pwd)/..</span></code>)</p></li> | |||
|  | <li><p><code class="docutils literal notranslate"><span class="pre">CUDA_INSTALL_PATH</span></code>: the directory holding <code class="docutils literal notranslate"><span class="pre">/bin/nvcc</span></code> for the first version of <code class="docutils literal notranslate"><span class="pre">nvcc</span></code> on <code class="docutils literal notranslate"><span class="pre">$PATH</span></code> (i.e., <code class="docutils literal notranslate"><span class="pre">which</span> <span class="pre">nvcc</span> <span class="pre">|</span> <span class="pre">awk</span> <span class="pre">-F'/bin/nvcc'</span> <span class="pre">'{print</span> <span class="pre">$1}'</span></code>)</p></li> | |||
|  | </ul> | |||
|  | <p><strong>NOTE:</strong> The version of <code class="docutils literal notranslate"><span class="pre">cuda-python</span></code> installed must match the CUDA version in <code class="docutils literal notranslate"><span class="pre">CUDA_INSTALL_PATH</span></code>.</p> | |||
|  | </section> | |||
|  | <section id="installation"> | |||
|  | <h3>Installation<a class="headerlink" href="#installation" title="Permalink to this heading">#</a></h3> | |||
|  | <p>The CUTLASS Python interface can currently be installed via:</p> | |||
|  | <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>setup.py<span class="w"> </span>develop<span class="w"> </span>--user | |||
|  | </pre></div> | |||
|  | </div> | |||
|  | <p>This will allow changes to the Python interface source to be reflected when using the Python interface.</p> | |||
|  | <p>We plan to add support for installing via <code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">setup.py</span> <span class="pre">install</span></code> in a future release.</p> | |||
|  | </section> | |||
|  | </section> | |||
|  | <section id="examples"> | |||
|  | <h2>Examples<a class="headerlink" href="#examples" title="Permalink to this heading">#</a></h2> | |||
|  | <p>Jupyter notebook examples of using the CUTLASS Python interface are located in <a href="#id3"><span class="problematic" id="id4">examples/python</span></a>.</p> | |||
|  | <p>To launch these notebooks from this directory, run:</p> | |||
|  | <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>jupyter-lab<span class="w"> </span>../examples/python | |||
|  | </pre></div> | |||
|  | </div> | |||
|  | </section> | |||
|  | <section id="building-documentation"> | |||
|  | <h2>Building documentation<a class="headerlink" href="#building-documentation" title="Permalink to this heading">#</a></h2> | |||
|  | <p>The CUTLASS Python interface uses <a class="reference external" href="https://www.sphinx-doc.org/en/master/">Sphinx</a> for documentation.</p> | |||
|  | <p>Building the documentation requires additional packages. These can be installed via:</p> | |||
|  | <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>sudo<span class="w"> </span>apt-get<span class="w"> </span>install<span class="w"> </span>pandoc | |||
|  | pip<span class="w"> </span>install<span class="w"> </span>--upgrade<span class="w"> </span>Sphinx<span class="w"> </span>furo<span class="w"> </span>pandoc<span class="w"> </span>myst-parser<span class="w"> </span>sphinx-copybutton<span class="w"> </span>nbsphinx<span class="w"> </span>nbsphinx-link<span class="w"> </span>sphinx-inline-tabs | |||
|  | </pre></div> | |||
|  | </div> | |||
|  | <p>To build documentation, you must first have installed the CUTLASS Python interface via the | |||
|  | <a class="reference internal" href="install.html#installation"><span class="std std-ref">installation instructions</span></a>.</p> | |||
|  | <p>Documentation can then be built via the following commands:</p> | |||
|  | <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>sphinx-apidoc<span class="w"> </span>-o<span class="w"> </span>docs_src/source/<span class="w"> </span>cutlass/<span class="w"> </span>cutlass/backend* | |||
|  | <span class="nb">cd</span><span class="w"> </span>docs_src | |||
|  | make<span class="w"> </span>html | |||
|  | mv<span class="w"> </span>_build/*<span class="w"> </span>../docs | |||
|  | </pre></div> | |||
|  | </div> | |||
|  | </section> | |||
|  | </section> | |||
|  | <section id="copyright"> | |||
|  | <h1>Copyright<a class="headerlink" href="#copyright" title="Permalink to this heading">#</a></h1> | |||
|  | <p>Copyright (c) 2023 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |||
|  | SPDX-License-Identifier: BSD-3-Clause</p> | |||
|  | <div class="highlight-default notranslate"><div class="highlight"><pre><span></span>  <span class="n">Redistribution</span> <span class="ow">and</span> <span class="n">use</span> <span class="ow">in</span> <span class="n">source</span> <span class="ow">and</span> <span class="n">binary</span> <span class="n">forms</span><span class="p">,</span> <span class="k">with</span> <span class="ow">or</span> <span class="n">without</span> | |||
|  |   <span class="n">modification</span><span class="p">,</span> <span class="n">are</span> <span class="n">permitted</span> <span class="n">provided</span> <span class="n">that</span> <span class="n">the</span> <span class="n">following</span> <span class="n">conditions</span> <span class="n">are</span> <span class="n">met</span><span class="p">:</span> | |||
|  | 
 | |||
|  |   <span class="mf">1.</span> <span class="n">Redistributions</span> <span class="n">of</span> <span class="n">source</span> <span class="n">code</span> <span class="n">must</span> <span class="n">retain</span> <span class="n">the</span> <span class="n">above</span> <span class="n">copyright</span> <span class="n">notice</span><span class="p">,</span> <span class="n">this</span> | |||
|  |   <span class="nb">list</span> <span class="n">of</span> <span class="n">conditions</span> <span class="ow">and</span> <span class="n">the</span> <span class="n">following</span> <span class="n">disclaimer</span><span class="o">.</span> | |||
|  | 
 | |||
|  |   <span class="mf">2.</span> <span class="n">Redistributions</span> <span class="ow">in</span> <span class="n">binary</span> <span class="n">form</span> <span class="n">must</span> <span class="n">reproduce</span> <span class="n">the</span> <span class="n">above</span> <span class="n">copyright</span> <span class="n">notice</span><span class="p">,</span> | |||
|  |   <span class="n">this</span> <span class="nb">list</span> <span class="n">of</span> <span class="n">conditions</span> <span class="ow">and</span> <span class="n">the</span> <span class="n">following</span> <span class="n">disclaimer</span> <span class="ow">in</span> <span class="n">the</span> <span class="n">documentation</span> | |||
|  |   <span class="ow">and</span><span class="o">/</span><span class="ow">or</span> <span class="n">other</span> <span class="n">materials</span> <span class="n">provided</span> <span class="k">with</span> <span class="n">the</span> <span class="n">distribution</span><span class="o">.</span> | |||
|  | 
 | |||
|  |   <span class="mf">3.</span> <span class="n">Neither</span> <span class="n">the</span> <span class="n">name</span> <span class="n">of</span> <span class="n">the</span> <span class="n">copyright</span> <span class="n">holder</span> <span class="n">nor</span> <span class="n">the</span> <span class="n">names</span> <span class="n">of</span> <span class="n">its</span> | |||
|  |   <span class="n">contributors</span> <span class="n">may</span> <span class="n">be</span> <span class="n">used</span> <span class="n">to</span> <span class="n">endorse</span> <span class="ow">or</span> <span class="n">promote</span> <span class="n">products</span> <span class="n">derived</span> <span class="kn">from</span> | |||
|  |   <span class="nn">this</span> <span class="n">software</span> <span class="n">without</span> <span class="n">specific</span> <span class="n">prior</span> <span class="n">written</span> <span class="n">permission</span><span class="o">.</span> | |||
|  | 
 | |||
|  |   <span class="n">THIS</span> <span class="n">SOFTWARE</span> <span class="n">IS</span> <span class="n">PROVIDED</span> <span class="n">BY</span> <span class="n">THE</span> <span class="n">COPYRIGHT</span> <span class="n">HOLDERS</span> <span class="n">AND</span> <span class="n">CONTRIBUTORS</span> <span class="s2">"AS IS"</span> | |||
|  |   <span class="n">AND</span> <span class="n">ANY</span> <span class="n">EXPRESS</span> <span class="n">OR</span> <span class="n">IMPLIED</span> <span class="n">WARRANTIES</span><span class="p">,</span> <span class="n">INCLUDING</span><span class="p">,</span> <span class="n">BUT</span> <span class="n">NOT</span> <span class="n">LIMITED</span> <span class="n">TO</span><span class="p">,</span> <span class="n">THE</span> | |||
|  |   <span class="n">IMPLIED</span> <span class="n">WARRANTIES</span> <span class="n">OF</span> <span class="n">MERCHANTABILITY</span> <span class="n">AND</span> <span class="n">FITNESS</span> <span class="n">FOR</span> <span class="n">A</span> <span class="n">PARTICULAR</span> <span class="n">PURPOSE</span> <span class="n">ARE</span> | |||
|  |   <span class="n">DISCLAIMED</span><span class="o">.</span> <span class="n">IN</span> <span class="n">NO</span> <span class="n">EVENT</span> <span class="n">SHALL</span> <span class="n">THE</span> <span class="n">COPYRIGHT</span> <span class="n">HOLDER</span> <span class="n">OR</span> <span class="n">CONTRIBUTORS</span> <span class="n">BE</span> <span class="n">LIABLE</span> | |||
|  |   <span class="n">FOR</span> <span class="n">ANY</span> <span class="n">DIRECT</span><span class="p">,</span> <span class="n">INDIRECT</span><span class="p">,</span> <span class="n">INCIDENTAL</span><span class="p">,</span> <span class="n">SPECIAL</span><span class="p">,</span> <span class="n">EXEMPLARY</span><span class="p">,</span> <span class="n">OR</span> <span class="n">CONSEQUENTIAL</span> | |||
|  |   <span class="n">DAMAGES</span> <span class="p">(</span><span class="n">INCLUDING</span><span class="p">,</span> <span class="n">BUT</span> <span class="n">NOT</span> <span class="n">LIMITED</span> <span class="n">TO</span><span class="p">,</span> <span class="n">PROCUREMENT</span> <span class="n">OF</span> <span class="n">SUBSTITUTE</span> <span class="n">GOODS</span> <span class="n">OR</span> | |||
|  |   <span class="n">SERVICES</span><span class="p">;</span> <span class="n">LOSS</span> <span class="n">OF</span> <span class="n">USE</span><span class="p">,</span> <span class="n">DATA</span><span class="p">,</span> <span class="n">OR</span> <span class="n">PROFITS</span><span class="p">;</span> <span class="n">OR</span> <span class="n">BUSINESS</span> <span class="n">INTERRUPTION</span><span class="p">)</span> <span class="n">HOWEVER</span> | |||
|  |   <span class="n">CAUSED</span> <span class="n">AND</span> <span class="n">ON</span> <span class="n">ANY</span> <span class="n">THEORY</span> <span class="n">OF</span> <span class="n">LIABILITY</span><span class="p">,</span> <span class="n">WHETHER</span> <span class="n">IN</span> <span class="n">CONTRACT</span><span class="p">,</span> <span class="n">STRICT</span> <span class="n">LIABILITY</span><span class="p">,</span> | |||
|  |   <span class="n">OR</span> <span class="n">TORT</span> <span class="p">(</span><span class="n">INCLUDING</span> <span class="n">NEGLIGENCE</span> <span class="n">OR</span> <span class="n">OTHERWISE</span><span class="p">)</span> <span class="n">ARISING</span> <span class="n">IN</span> <span class="n">ANY</span> <span class="n">WAY</span> <span class="n">OUT</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">USE</span> | |||
|  |   <span class="n">OF</span> <span class="n">THIS</span> <span class="n">SOFTWARE</span><span class="p">,</span> <span class="n">EVEN</span> <span class="n">IF</span> <span class="n">ADVISED</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">POSSIBILITY</span> <span class="n">OF</span> <span class="n">SUCH</span> <span class="n">DAMAGE</span><span class="o">.</span> | |||
|  | </pre></div> | |||
|  | </div> | |||
|  | </section> | |||
|  | <div class="toctree-wrapper compound"> | |||
|  | </div> | |||
|  | <div class="toctree-wrapper compound"> | |||
|  | </div> | |||
|  | <div class="toctree-wrapper compound"> | |||
|  | </div> | |||
|  | <div class="toctree-wrapper compound"> | |||
|  | </div> | |||
|  | <div class="toctree-wrapper compound"> | |||
|  | </div> | |||
|  | <div class="toctree-wrapper compound"> | |||
|  | </div> | |||
|  | <div class="toctree-wrapper compound"> | |||
|  | </div> | |||
|  | <section id="indices-and-tables"> | |||
|  | <h1>Indices and tables<a class="headerlink" href="#indices-and-tables" title="Permalink to this heading">#</a></h1> | |||
|  | <ul class="simple"> | |||
|  | <li><p><a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a></p></li> | |||
|  | <li><p><a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a></p></li> | |||
|  | <li><p><a class="reference internal" href="search.html"><span class="std std-ref">Search Page</span></a></p></li> | |||
|  | </ul> | |||
|  | </section> | |||
|  | 
 | |||
|  |         </article> | |||
|  |       </div> | |||
|  |       <footer> | |||
|  |          | |||
|  |         <div class="related-pages"> | |||
|  |           <a class="next-page" href="install.html"> | |||
|  |               <div class="page-info"> | |||
|  |                 <div class="context"> | |||
|  |                   <span>Next</span> | |||
|  |                 </div> | |||
|  |                 <div class="title">Installation</div> | |||
|  |               </div> | |||
|  |               <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg> | |||
|  |             </a> | |||
|  |            | |||
|  |         </div> | |||
|  |         <div class="bottom-of-page"> | |||
|  |           <div class="left-details"> | |||
|  |             <div class="copyright"> | |||
|  |                 Copyright © 2023, NVIDIA | |||
|  |             </div> | |||
|  |             Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s | |||
|  |              | |||
|  |             <a href="https://github.com/pradyunsg/furo">Furo</a> | |||
|  |              | |||
|  |           </div> | |||
|  |           <div class="right-details"> | |||
|  |             <div class="icons"> | |||
|  |               <a class="muted-link " href="https://github.com/NVIDIA/cutlass" aria-label="GitHub"> | |||
|  |                 <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16"> | |||
|  |                     <path fill-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path> | |||
|  |                 </svg> | |||
|  |             </a> | |||
|  |                | |||
|  |             </div> | |||
|  |           </div> | |||
|  |         </div> | |||
|  |          | |||
|  |       </footer> | |||
|  |     </div> | |||
|  |     <aside class="toc-drawer"> | |||
|  |        | |||
|  |        | |||
|  |       <div class="toc-sticky toc-scroll"> | |||
|  |         <div class="toc-title-container"> | |||
|  |           <span class="toc-title"> | |||
|  |             On this page | |||
|  |           </span> | |||
|  |         </div> | |||
|  |         <div class="toc-tree-container"> | |||
|  |           <div class="toc-tree"> | |||
|  |             <ul> | |||
|  | <li><a class="reference internal" href="#">CUTLASS Python Interface</a><ul> | |||
|  | <li><a class="reference internal" href="#overview">Overview</a><ul> | |||
|  | <li><a class="reference internal" href="#non-goals">Non-goals</a></li> | |||
|  | <li><a class="reference internal" href="#comparison-to-pycutlass">Comparison to PyCUTLASS</a><ul> | |||
|  | <li><a class="reference internal" href="#transitioning-from-pycutlass">Transitioning from PyCUTLASS</a></li> | |||
|  | </ul> | |||
|  | </li> | |||
|  | </ul> | |||
|  | </li> | |||
|  | <li><a class="reference internal" href="#current-functionality">Current functionality</a></li> | |||
|  | <li><a class="reference internal" href="#getting-started">Getting started</a><ul> | |||
|  | <li><a class="reference internal" href="#optional-environment-variables">Optional environment variables</a></li> | |||
|  | <li><a class="reference internal" href="#installation">Installation</a></li> | |||
|  | </ul> | |||
|  | </li> | |||
|  | <li><a class="reference internal" href="#examples">Examples</a></li> | |||
|  | <li><a class="reference internal" href="#building-documentation">Building documentation</a></li> | |||
|  | </ul> | |||
|  | </li> | |||
|  | <li><a class="reference internal" href="#copyright">Copyright</a></li> | |||
|  | <li><a class="reference internal" href="#indices-and-tables">Indices and tables</a></li> | |||
|  | </ul> | |||
|  | 
 | |||
|  |           </div> | |||
|  |         </div> | |||
|  |       </div> | |||
|  |        | |||
|  |        | |||
|  |     </aside> | |||
|  |   </div> | |||
|  | </div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script> | |||
|  |     <script src="_static/doctools.js"></script> | |||
|  |     <script src="_static/sphinx_highlight.js"></script> | |||
|  |     <script src="_static/scripts/furo.js"></script> | |||
|  |     <script src="_static/clipboard.min.js"></script> | |||
|  |     <script src="_static/copybutton.js"></script> | |||
|  |     <script src="_static/tabs.js"></script> | |||
|  |     <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> | |||
|  |     </body> | |||
|  | </html> |