Updated README and CHANGELOG.
This commit is contained in:
parent
d7137f9c0a
commit
77d1e0ca81
@ -3,7 +3,7 @@
|
||||
|
||||
## 1.1.0 (2018-09-19)
|
||||
* Turing Features
|
||||
* WMMA GEMM targeting TensorCores - INT8, INT4, INT1
|
||||
* WMMA GEMM targeting TensorCores - INT8, INT4, 1-bit
|
||||
* Batched Strided GEMM
|
||||
* Threadblock rasterization strategies
|
||||
* Improved performance for adverse problem sizes and data layouts
|
||||
|
@ -33,7 +33,7 @@ We describe the structure of an efficient GEMM in our talk at the
|
||||
* [Examples](examples/)
|
||||
* Basic GEMM, tensor views, CUTLASS utilities, batched GEMM, WMMA GEMM
|
||||
* Turing Features
|
||||
* [WMMA GEMM targeting TensorCores](tools/test/unit/gemm/wmma_integer_gemm.cu) - INT8, INT4, INT1
|
||||
* [WMMA GEMM targeting TensorCores](tools/test/unit/gemm/wmma_integer_gemm.cu) - INT8, INT4, 1-bit
|
||||
* [Batched Strided GEMM](tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu)
|
||||
* [Threadblock rasterization strategies](tools/test/unit/gemm/sgemm_threadblock_swizzle_nt.cu)
|
||||
* Improved performance for adverse problem sizes and data layouts
|
||||
|
@ -32,8 +32,8 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define CUTLASS_MAJOR 1
|
||||
#define CUTLASS_MINOR 0
|
||||
#define CUTLASS_PATCH 1
|
||||
#define CUTLASS_MINOR 1
|
||||
#define CUTLASS_PATCH 0
|
||||
#define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
|
||||
|
||||
#ifdef __NVCC__
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 112 KiB After Width: | Height: | Size: 89 KiB |
@ -34,7 +34,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_nn) {
|
||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||
SgemmTraits;
|
||||
//think about using run_gemm directly
|
||||
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
||||
}
|
||||
|
||||
@ -44,7 +43,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nn) {
|
||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||
SgemmTraits;
|
||||
//think about using run_gemm directly
|
||||
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
||||
}
|
||||
|
||||
@ -54,7 +52,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_127x384x192x2_nn) {
|
||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||
SgemmTraits;
|
||||
//think about using run_gemm directly
|
||||
run_batched_strided_gemm<SgemmTraits>(127/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
||||
}
|
||||
|
||||
@ -64,7 +61,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_127x388x190x2_nn) {
|
||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||
SgemmTraits;
|
||||
//think about using run_gemm directly
|
||||
run_batched_strided_gemm<SgemmTraits>(127/*m*/, 388/*n*/, 190/*k*/, 2 /*batch_size*/);
|
||||
}
|
||||
|
||||
@ -74,7 +70,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_nt) {
|
||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
||||
SgemmTraits;
|
||||
//think about using run_gemm directly
|
||||
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
||||
}
|
||||
|
||||
@ -84,7 +79,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nt) {
|
||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kColumnMajor,
|
||||
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
||||
SgemmTraits;
|
||||
//think about using run_gemm directly
|
||||
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
||||
}
|
||||
|
||||
@ -96,7 +90,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_tn) {
|
||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||
SgemmTraits;
|
||||
//think about using run_gemm directly
|
||||
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
||||
}
|
||||
|
||||
@ -106,7 +99,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tn) {
|
||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
||||
cutlass::MatrixLayout::kColumnMajor, cutlass::Shape<8, 128, 128> >
|
||||
SgemmTraits;
|
||||
//think about using run_gemm directly
|
||||
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
||||
}
|
||||
|
||||
@ -118,7 +110,6 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_tt) {
|
||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
||||
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
||||
SgemmTraits;
|
||||
//think about using run_gemm directly
|
||||
run_batched_strided_gemm<SgemmTraits>(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/);
|
||||
}
|
||||
|
||||
@ -128,8 +119,8 @@ TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tt) {
|
||||
typedef cutlass::gemm::SgemmTraits<cutlass::MatrixLayout::kRowMajor,
|
||||
cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> >
|
||||
SgemmTraits;
|
||||
//think about using run_gemm directly
|
||||
run_batched_strided_gemm<SgemmTraits>(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user