Updated README and CHANGELOG.

This commit is contained in:
akerr 2018-09-19 20:42:51 -07:00
parent d7137f9c0a
commit 77d1e0ca81
5 changed files with 5 additions and 14 deletions

View File

@ -3,7 +3,7 @@
## 1.1.0 (2018-09-19)
* Turing Features
* WMMA GEMM targeting TensorCores - INT8, INT4, INT1
* WMMA GEMM targeting TensorCores - INT8, INT4, 1-bit
* Batched Strided GEMM
* Threadblock rasterization strategies
* Improved performance for adverse problem sizes and data layouts

View File

@ -33,7 +33,7 @@ We describe the structure of an efficient GEMM in our talk at the
* [Examples](examples/)
* Basic GEMM, tensor views, CUTLASS utilities, batched GEMM, WMMA GEMM
* Turing Features
* [WMMA GEMM targeting TensorCores](tools/test/unit/gemm/wmma_integer_gemm.cu) - INT8, INT4, INT1
* [WMMA GEMM targeting TensorCores](tools/test/unit/gemm/wmma_integer_gemm.cu) - INT8, INT4, 1-bit
* [Batched Strided GEMM](tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu)
* [Threadblock rasterization strategies](tools/test/unit/gemm/sgemm_threadblock_swizzle_nt.cu)
* Improved performance for adverse problem sizes and data layouts

View File

@ -32,8 +32,8 @@
////////////////////////////////////////////////////////////////////////////////////////////////////
#define CUTLASS_MAJOR 1
#define CUTLASS_MINOR 0
#define CUTLASS_PATCH 1
#define CUTLASS_MINOR 1
#define CUTLASS_PATCH 0
#define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
#ifdef __NVCC__

Binary file not shown.

Before

Width:  |  Height:  |  Size: 112 KiB

After

Width:  |  Height:  |  Size: 89 KiB

View File

@ -34,7 +34,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_nn) {
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
SgemmTraits;
//think about using run_gemm directly
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
}
@ -44,7 +43,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nn) {
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
SgemmTraits;
//think about using run_gemm directly
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
}
@ -54,7 +52,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_127x384x192x2_nn) {
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
SgemmTraits;
//think about using run_gemm directly
run_batched_strided_gemm<SgemmTraits>(127/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
}
@ -64,7 +61,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_127x388x190x2_nn) {
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
SgemmTraits;
//think about using run_gemm directly
run_batched_strided_gemm<SgemmTraits>(127/*m*/, 388/*n*/, 190/*k*/, 2 /*batch_size*/);
}
@ -74,7 +70,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_nt) {
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
SgemmTraits;
//think about using run_gemm directly
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
}
@ -84,7 +79,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nt) {
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
SgemmTraits;
//think about using run_gemm directly
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
}
@ -96,7 +90,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_tn) {
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
SgemmTraits;
//think about using run_gemm directly
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
}
@ -106,7 +99,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tn) {
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
SgemmTraits;
//think about using run_gemm directly
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
}
@ -118,7 +110,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_tt) {
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
SgemmTraits;
//think about using run_gemm directly
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
}
@ -128,8 +119,8 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tt) {
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
SgemmTraits;
//think about using run_gemm directly
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
}
////////////////////////////////////////////////////////////////////////////////////////////////////