cutlass/tools/test/unit/reduction/batched_reduction_testbed.h
2018-10-26 14:38:46 -07:00

302 lines
9.0 KiB
C++

/***************************************************************************************************
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
/*! \file
\brief Test environment for batched reduction
*/
#pragma once
#include "cutlass/matrix_traits.h"
#include "cutlass/util/platform.h"
#include "tools/util/host_matrix.h"
#include "tools/util/host_matrix_view.h"
#include "tools/util/host_tensor.h"
#include "tools/util/tensor_view_io.h"
#include "tools/util/type_traits.h"
#include <assert.h>
namespace test {
inline cublasOperation_t convert(cutlass::MatrixLayout::Kind layout) {
switch (layout) {
case cutlass::MatrixLayout::kRowMajor:
return CUBLAS_OP_T;
case cutlass::MatrixLayout::kColumnMajor:
return CUBLAS_OP_N;
default:
break;
}
return CUBLAS_OP_N;
}
inline cutlass::MatrixLayout::Kind convert(cublasOperation_t transform) {
switch (transform) {
case CUBLAS_OP_T:
return cutlass::MatrixLayout::kRowMajor;
case CUBLAS_OP_N:
return cutlass::MatrixLayout::kColumnMajor;
default:
break;
}
return cutlass::MatrixLayout::kColumnMajor;
}
/// Testbed for evaluating batched reduction
template <
typename AType,
typename CType,
typename DType,
typename ScalarAlpha,
typename ScalarBeta,
typename ScalarAccum,
// input matrix depth size to be sumed
int ReductionSize
>
struct BatchedReductionTestbed {
//
// Type definitions
//
/// Host tensor for operand C
typedef cutlass::HostTensor<AType, 3> HostTensorA;
/// Host tensor for operand C
typedef cutlass::HostMatrix<CType> HostMatrixC;
/// Host tensor for operand D
typedef cutlass::HostMatrix<DType> HostMatrixD;
/// Generates random elements
template <typename T>
struct RandomGenerator {
RandomGenerator(int seed = -1, bool only_ones_ = false) : only_ones(only_ones_) { srand(seed); }
T operator()() {
if (only_ones) {
return T(1);
}
else {
int val = (rand() % 16) - 8;
return T(val);
}
}
bool only_ones;
};
template <typename T>
struct RandomBitGenerator {
RandomBitGenerator(int seed = -1) { srand(seed); }
T operator()() {
uint32_t val = 0;
for (int i = 0; i < 32; i++) {
val |= rand() % 2;
val <<= 1;
}
return T(val);
}
};
//
// Data members
//
/// input/output number of rows
int m;
/// input/output number of columns
int n;
/// A matrix operand, always column major, no trans
HostTensorA A;
/// C matrix operand, always column major, no trans
HostMatrixC C;
/// D matrix operand, always column major, no trans
HostMatrixD D;
/// Reference
cutlass::HostTensor<AType, 3> ref_A;
///
cutlass::HostMatrix<CType> ref_C;
/// Reference result computed on the host
cutlass::HostMatrix<DType> ref_D;
/// lda
int lda;
/// ldc
int ldc;
/// ldd
int ldd;
/// Linear scalaring factor
ScalarAlpha alpha;
/// Linear scaling factor
ScalarBeta beta;
/// stride between two element that will be sumed
long long int reduction_stride;
//
// Static helpers
//
/// Helper to resize a matrix with a given size and layout
template <typename T>
static void resize(cutlass::HostMatrix<T>& tensor,
int rows,
int columns,
cublasOperation_t layout,
int ldm = 0,
bool device_backed = true) {
tensor.resize(cutlass::make_Coord(rows, columns), convert(layout), ldm, device_backed);
}
template <typename T>
static void resize(cutlass::HostTensor<T, 3>& tensor,
int rows,
int columns,
int batches,
cublasOperation_t layout,
int ldm,
long long int batch_stride,
bool device_backed = true) {
assert(CUBLAS_OP_N == layout);
//tensor.resize(cutlass::make_Coord(rows, columns), convert(layout), ldm, device_backed);
tensor.reset(cutlass::make_Coord(static_cast<int>(batch_stride), ldm, 1), /*stride, slowest moving dim on the left*/
cutlass::make_Coord(batches, columns, rows), /*size, slowest moving dim on the left*/
device_backed);
}
//
// Methods
//
/// Ctor.
BatchedReductionTestbed(int m_,
int n_,
int lda_,
int ldc_,
int ldd_,
typename cutlass::TypeTraits<ScalarAlpha>::host_type alpha_ =
typename cutlass::TypeTraits<ScalarAlpha>::host_type(2),
typename cutlass::TypeTraits<ScalarAlpha>::host_type beta_ =
typename cutlass::TypeTraits<ScalarAlpha>::host_type(3))
: m(m_),
n(n_),
lda(lda_),
ldc(ldc_),
ldd(ldd_),
alpha(alpha_),
beta(beta_),
reduction_stride(ldc_ * n_) {
/// column major, batch along rows
resize(A, m_, n_, ReductionSize, CUBLAS_OP_N, lda_, reduction_stride, true);
resize(C, m_, n_, CUBLAS_OP_N, ldc_, true);
resize(D, m_, n_, CUBLAS_OP_N, ldd_, true);
resize(ref_A, m_, n_, ReductionSize, CUBLAS_OP_N, lda_, reduction_stride, false);
resize(ref_C, m_, n_, CUBLAS_OP_N, ldc_, false);
resize(ref_D, m_, n_, CUBLAS_OP_N, ldd_, false);
}
/// Dtor
~BatchedReductionTestbed() { }
/// Getters
/// Returns a pointer to the C operand
typename HostTensorA::DeviceType* ptr_A() const { return A.device_data(); }
/// Returns a pointer to the C operand
typename HostMatrixC::DeviceType* ptr_C() const { return C.device_data(); }
/// Returns a pointer to the D operand
typename HostMatrixD::DeviceType* ptr_D() const { return D.device_data(); }
///
int M() const { return m; }
///
int N() const { return n; }
///
int get_lda() const { return lda; }
///
int get_ldc() const { return ldc; }
///
int get_ldd() const { return ldd; }
///
ScalarAlpha get_alpha() const { return alpha; }
///
ScalarBeta get_beta() const { return beta; }
///
long long int get_reduction_stride() const { return reduction_stride; }
/// Initializes data, randomly
void initialize(int seed = -1) {
A.fill_random(RandomGenerator<AType>(seed + 7));
//A.fill(3);
C.fill_random(RandomGenerator<CType>(seed));
//C.fill(1);
D.fill_random(RandomGenerator<DType>(seed + 11));
//D.fill(2);
}
/// compute_host
void compute_host() {
ref_A.fill(A);
ref_C.fill(C);
ref_D.fill(D);
/// D = alpha * reduction(A) + beta * C
for (int m_idx = 0; m_idx < m; m_idx++) {
for (int n_idx = 0; n_idx < n; n_idx++) {
ScalarAccum accum = static_cast<ScalarAccum>(0.0);
for (int r_idx = 0; r_idx < static_cast<int>(ReductionSize); r_idx++) {
accum += static_cast<ScalarAccum>(ref_A.at(cutlass::make_Coord(r_idx, n_idx, m_idx)));
}
ref_D.at(cutlass::make_Coord(m_idx, n_idx)) = static_cast<DType>(
alpha * static_cast<ScalarAlpha>(accum) +
beta * static_cast<ScalarBeta>(ref_C.at(cutlass::make_Coord(m_idx, n_idx)))
);
}
}
}
/// Verifies the contents of C equal the host-side reference
bool verify_with_host() {
compute_host();
D.sync_host();
bool passed = D.bit_equals(ref_D);
return passed;
}
};
} //namespace test