From 26556d7206bcd0cc1814bff6930c178bb3cc229d Mon Sep 17 00:00:00 2001 From: Manikandan Ananth Date: Wed, 7 Apr 2021 13:32:55 -0700 Subject: [PATCH] fix a broken sparse gemm example. found by the community. --- .../ampere_sparse_tensorop_gemm.cu | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/examples/15_ampere_sparse_tensorop_gemm/ampere_sparse_tensorop_gemm.cu b/examples/15_ampere_sparse_tensorop_gemm/ampere_sparse_tensorop_gemm.cu index c88a889b..f8c300f2 100644 --- a/examples/15_ampere_sparse_tensorop_gemm/ampere_sparse_tensorop_gemm.cu +++ b/examples/15_ampere_sparse_tensorop_gemm/ampere_sparse_tensorop_gemm.cu @@ -111,7 +111,8 @@ using Gemm = cutlass::gemm::device::SparseGemm tensor_e( cutlass::make_Coord(problem_size.m(), problem_size.k() / kSparse / kElementsPerElementE)); // Same size as the above. The above one needs to be reordered and stored in this one. - cutlass::HostTensor tensor_e_reordered( + cutlass::HostTensor tensor_e_reordered( cutlass::make_Coord(problem_size.m(), problem_size.k() / kSparse / kElementsPerElementE)); // Fill input and output matrices on host using CUTLASS helper functions cutlass::reference::host::TensorFillRandomUniform( tensor_a.host_view(), 1, - ElementInputA(1), - ElementInputA(-1), + ElementInputA(2), + ElementInputA(-2), 0); // <- Fill matrix A on host with uniform-distribution random data cutlass::reference::host::TensorFillRandomUniform( tensor_b.host_view(), 1, - ElementInputB(1), - ElementInputB(-1), + ElementInputB(2), + ElementInputB(-2), 0); // <- Fill matrix B on host with uniform-distribution random data cutlass::reference::host::TensorFillRandomUniform( tensor_c.host_view(), 1, - ElementOutput(1), - ElementOutput(-1), + ElementOutput(2), + ElementOutput(-2), 0); // <- Fill matrix C on host with uniform-distribution random data cutlass::reference::host::TensorFillRandomSparseMeta( tensor_e.host_view(), @@ -210,7 +211,7 @@ int run() { tensor_b.device_ref(), // <- reference to matrix B on device tensor_c.device_ref(), // <- reference to matrix C on device tensor_d.device_ref(), // <- reference to matrix D on device - tensor_e.device_ref(), // <- reference to matrix E on device + tensor_e_reordered.device_ref(), // <- reference to matrix E on device {alpha, beta}, // <- tuple of alpha and beta split_k_slices}; // <- k-dimension split factor