Updated README and CHANGELOG.

2018-09-19 20:42:51 -07:00 · 2018-09-19 20:42:51 -07:00 · 77d1e0ca81
commit 77d1e0ca81
parent d7137f9c0a
5 changed files with 5 additions and 14 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -3,7 +3,7 @@

 ## 1.1.0 (2018-09-19)
  * Turing Features
-    * WMMA GEMM targeting TensorCores - INT8, INT4, INT1
+    * WMMA GEMM targeting TensorCores - INT8, INT4, 1-bit
  * Batched Strided GEMM
  * Threadblock rasterization strategies
    * Improved performance for adverse problem sizes and data layouts
--- a/README.md
+++ b/README.md
@ -33,7 +33,7 @@ We describe the structure of an efficient GEMM in our talk at the
 * [Examples](examples/)
  * Basic GEMM, tensor views, CUTLASS utilities, batched GEMM, WMMA GEMM
 * Turing Features
-  * [WMMA GEMM targeting TensorCores](tools/test/unit/gemm/wmma_integer_gemm.cu) - INT8, INT4, INT1
+  * [WMMA GEMM targeting TensorCores](tools/test/unit/gemm/wmma_integer_gemm.cu) - INT8, INT4, 1-bit
 * [Batched Strided GEMM](tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu)
 * [Threadblock rasterization strategies](tools/test/unit/gemm/sgemm_threadblock_swizzle_nt.cu)
  * Improved performance for adverse problem sizes and data layouts
--- a/cutlass/cutlass.h
+++ b/cutlass/cutlass.h
@ -32,8 +32,8 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////

 #define CUTLASS_MAJOR 1
-#define CUTLASS_MINOR 0
-#define CUTLASS_PATCH 1
+#define CUTLASS_MINOR 1
+#define CUTLASS_PATCH 0
 #define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)

 #ifdef __NVCC__
--- a/media/images/cutlass-performance-plot.png
+++ b/media/images/cutlass-performance-plot.png
--- a/tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu
+++ b/tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu
@ -34,7 +34,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_nn) {
  typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
                                     cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
      SgemmTraits;
-  //think about using run_gemm directly
  run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
 }

@ -44,7 +43,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nn) {
  typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
    cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
    SgemmTraits;
-  //think about using run_gemm directly
  run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
 }

@ -54,7 +52,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_127x384x192x2_nn) {
  typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
    cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
    SgemmTraits;
-  //think about using run_gemm directly
  run_batched_strided_gemm<SgemmTraits>(127/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
 }

@ -64,7 +61,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_127x388x190x2_nn) {
  typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
    cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
    SgemmTraits;
-  //think about using run_gemm directly
  run_batched_strided_gemm<SgemmTraits>(127/*m*/, 388/*n*/, 190/*k*/, 2 /*batch_size*/);
 }

@ -74,7 +70,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_nt) {
  typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
    cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
    SgemmTraits;
-  //think about using run_gemm directly
  run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
 }

@ -84,7 +79,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nt) {
  typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
    cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
    SgemmTraits;
-  //think about using run_gemm directly
  run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
 }

@ -96,7 +90,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_tn) {
  typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
    cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
    SgemmTraits;
-  //think about using run_gemm directly
  run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
 }

@ -106,7 +99,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tn) {
  typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
    cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
    SgemmTraits;
-  //think about using run_gemm directly
  run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
 }

@ -118,7 +110,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_tt) {
  typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
    cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
    SgemmTraits;
-  //think about using run_gemm directly
  run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
 }

@ -128,8 +119,8 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tt) {
  typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
    cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
    SgemmTraits;
-  //think about using run_gemm directly
  run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
+