Updated README and CHANGELOG.
This commit is contained in:
parent
d7137f9c0a
commit
77d1e0ca81
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
## 1.1.0 (2018-09-19)
|
## 1.1.0 (2018-09-19)
|
||||||
* Turing Features
|
* Turing Features
|
||||||
* WMMA GEMM targeting TensorCores - INT8, INT4, INT1
|
* WMMA GEMM targeting TensorCores - INT8, INT4, 1-bit
|
||||||
* Batched Strided GEMM
|
* Batched Strided GEMM
|
||||||
* Threadblock rasterization strategies
|
* Threadblock rasterization strategies
|
||||||
* Improved performance for adverse problem sizes and data layouts
|
* Improved performance for adverse problem sizes and data layouts
|
||||||
|
@ -33,7 +33,7 @@ We describe the structure of an efficient GEMM in our talk at the
|
|||||||
* [Examples](examples/)
|
* [Examples](examples/)
|
||||||
* Basic GEMM, tensor views, CUTLASS utilities, batched GEMM, WMMA GEMM
|
* Basic GEMM, tensor views, CUTLASS utilities, batched GEMM, WMMA GEMM
|
||||||
* Turing Features
|
* Turing Features
|
||||||
* [WMMA GEMM targeting TensorCores](tools/test/unit/gemm/wmma_integer_gemm.cu) - INT8, INT4, INT1
|
* [WMMA GEMM targeting TensorCores](tools/test/unit/gemm/wmma_integer_gemm.cu) - INT8, INT4, 1-bit
|
||||||
* [Batched Strided GEMM](tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu)
|
* [Batched Strided GEMM](tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu)
|
||||||
* [Threadblock rasterization strategies](tools/test/unit/gemm/sgemm_threadblock_swizzle_nt.cu)
|
* [Threadblock rasterization strategies](tools/test/unit/gemm/sgemm_threadblock_swizzle_nt.cu)
|
||||||
* Improved performance for adverse problem sizes and data layouts
|
* Improved performance for adverse problem sizes and data layouts
|
||||||
|
@ -32,8 +32,8 @@
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define CUTLASS_MAJOR 1
|
#define CUTLASS_MAJOR 1
|
||||||
#define CUTLASS_MINOR 0
|
#define CUTLASS_MINOR 1
|
||||||
#define CUTLASS_PATCH 1
|
#define CUTLASS_PATCH 0
|
||||||
#define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
|
#define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
|
||||||
|
|
||||||
#ifdef __NVCC__
|
#ifdef __NVCC__
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 112 KiB After Width: | Height: | Size: 89 KiB |
@ -34,7 +34,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_nn) {
|
|||||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||||
SgemmTraits;
|
SgemmTraits;
|
||||||
//think about using run_gemm directly
|
|
||||||
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -44,7 +43,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nn) {
|
|||||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||||
SgemmTraits;
|
SgemmTraits;
|
||||||
//think about using run_gemm directly
|
|
||||||
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -54,7 +52,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_127x384x192x2_nn) {
|
|||||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||||
SgemmTraits;
|
SgemmTraits;
|
||||||
//think about using run_gemm directly
|
|
||||||
run_batched_strided_gemm<SgemmTraits>(127/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
run_batched_strided_gemm<SgemmTraits>(127/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,7 +61,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_127x388x190x2_nn) {
|
|||||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||||
SgemmTraits;
|
SgemmTraits;
|
||||||
//think about using run_gemm directly
|
|
||||||
run_batched_strided_gemm<SgemmTraits>(127/*m*/, 388/*n*/, 190/*k*/, 2 /*batch_size*/);
|
run_batched_strided_gemm<SgemmTraits>(127/*m*/, 388/*n*/, 190/*k*/, 2 /*batch_size*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -74,7 +70,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_nt) {
|
|||||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||||
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
||||||
SgemmTraits;
|
SgemmTraits;
|
||||||
//think about using run_gemm directly
|
|
||||||
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,7 +79,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nt) {
|
|||||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||||
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
||||||
SgemmTraits;
|
SgemmTraits;
|
||||||
//think about using run_gemm directly
|
|
||||||
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,7 +90,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_tn) {
|
|||||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
||||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||||
SgemmTraits;
|
SgemmTraits;
|
||||||
//think about using run_gemm directly
|
|
||||||
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -106,7 +99,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tn) {
|
|||||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
||||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||||
SgemmTraits;
|
SgemmTraits;
|
||||||
//think about using run_gemm directly
|
|
||||||
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,7 +110,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_tt) {
|
|||||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
||||||
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
||||||
SgemmTraits;
|
SgemmTraits;
|
||||||
//think about using run_gemm directly
|
|
||||||
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,8 +119,8 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tt) {
|
|||||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
||||||
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
||||||
SgemmTraits;
|
SgemmTraits;
|
||||||
//think about using run_gemm directly
|
|
||||||
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user