2018-05-17 02:44:56 +08:00
|
|
|
/***************************************************************************************************
|
|
|
|
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
|
|
|
* provided that the following conditions are met:
|
|
|
|
* * Redistributions of source code must retain the above copyright notice, this list of
|
|
|
|
* conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright notice, this list of
|
|
|
|
* conditions and the following disclaimer in the documentation and/or other materials
|
|
|
|
* provided with the distribution.
|
|
|
|
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
|
|
|
|
* to endorse or promote products derived from this software without specific prior written
|
|
|
|
* permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
|
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
|
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
|
|
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
|
|
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
**************************************************************************************************/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <tools/util/command_line.h>
|
|
|
|
|
|
|
|
namespace perf {
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/// Range of problem sizes
|
|
|
|
struct Range {
|
|
|
|
int start;
|
|
|
|
int end;
|
|
|
|
int increment;
|
|
|
|
|
|
|
|
Range(int _start = 0) : start(_start), end(_start), increment(1) {}
|
|
|
|
|
|
|
|
Range(int _start, int _end, int _increment = 1)
|
|
|
|
: start(_start), end(_end), increment(_increment) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/// Defines a space of problem sizes
|
|
|
|
struct GemmProblemRange {
|
|
|
|
public:
|
|
|
|
/// Range of sizes in GEMM M dimension
|
|
|
|
Range M;
|
|
|
|
|
|
|
|
/// Range of sizes in GEMM N dimension
|
|
|
|
Range N;
|
|
|
|
|
|
|
|
/// Range of sizes in GEMM K dimension
|
|
|
|
Range K;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Methods
|
|
|
|
//
|
|
|
|
|
|
|
|
/// Constructor to define a space of probelm sizes
|
|
|
|
GemmProblemRange(Range _M = Range(256), Range _N = Range(256), Range _K = Range(256))
|
|
|
|
: M(_M), N(_N), K(_K) {}
|
|
|
|
|
|
|
|
/// Parses a command line argument as a Range object
|
|
|
|
static void get_range(Range &range,
|
|
|
|
cutlass::CommandLine const &args,
|
|
|
|
std::string const &arg,
|
|
|
|
Range const &_default = Range(256)) {
|
|
|
|
range = Range(0, 0, 1);
|
|
|
|
|
|
|
|
if (args.check_cmd_line_flag(arg.c_str())) {
|
|
|
|
std::vector<std::string> values;
|
|
|
|
args.get_cmd_line_arguments(arg.c_str(), values, ':');
|
|
|
|
|
|
|
|
if (values.size() > 0) {
|
|
|
|
std::stringstream ss;
|
|
|
|
ss << values.at(0);
|
|
|
|
ss >> range.start;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (values.size() > 1) {
|
|
|
|
std::stringstream ss;
|
|
|
|
ss << values.at(1);
|
|
|
|
ss >> range.end;
|
|
|
|
} else {
|
|
|
|
range.end = range.start;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (values.size() > 2) {
|
|
|
|
std::stringstream ss;
|
|
|
|
ss << values.at(2);
|
|
|
|
ss >> range.increment;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
range = _default;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Initializes the GEMM problem size from command line arguments
|
|
|
|
GemmProblemRange(cutlass::CommandLine const &args) {
|
|
|
|
get_range(M, args, "m", Range(10240));
|
|
|
|
get_range(N, args, "n", Range(4096));
|
|
|
|
get_range(K, args, "k", Range(4096));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/// Distribution type
|
|
|
|
struct Distribution {
|
|
|
|
/// Variant types
|
|
|
|
enum Kind { Invalid, Uniform, Gaussian, Linear, Identity };
|
|
|
|
|
|
|
|
/// Distribution state
|
|
|
|
union {
|
|
|
|
/// Uniform distribution
|
|
|
|
struct {
|
|
|
|
double min;
|
|
|
|
double max;
|
|
|
|
} uniform;
|
|
|
|
|
|
|
|
/// Gaussian distribution
|
|
|
|
struct {
|
|
|
|
double mean;
|
|
|
|
double stddev;
|
|
|
|
} gaussian;
|
|
|
|
|
|
|
|
/// Elements are linear combination of row and column index
|
|
|
|
struct {
|
|
|
|
double offset;
|
|
|
|
double delta_row;
|
|
|
|
double delta_column;
|
|
|
|
} linear;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Active variant kind
|
|
|
|
Kind kind;
|
|
|
|
|
|
|
|
/// Random values are cast to integer after scaling by this power of two
|
|
|
|
int int_scale;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Methods
|
|
|
|
//
|
|
|
|
|
|
|
|
Distribution() : kind(Invalid), int_scale(0) {}
|
|
|
|
|
|
|
|
/// Configures distribution as uniform random
|
|
|
|
Distribution &set_uniform(double _min, double _max, int _int_scale = 0) {
|
|
|
|
kind = Uniform;
|
|
|
|
uniform.min = _min;
|
|
|
|
uniform.max = _max;
|
|
|
|
int_scale = _int_scale;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Configures distribution as Gaussian distribution
|
|
|
|
Distribution &set_gaussian(double _mean, double _stddev, int _int_scale = 0) {
|
|
|
|
kind = Gaussian;
|
|
|
|
gaussian.mean = _mean;
|
|
|
|
gaussian.stddev = _stddev;
|
|
|
|
int_scale = _int_scale;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Sets identity
|
|
|
|
Distribution &set_identity() {
|
|
|
|
kind = Identity;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace perf
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/// Prints a Distribution to ostream
|
|
|
|
inline std::ostream &operator<<(std::ostream &out, perf::Distribution const &dist) {
|
|
|
|
switch (dist.kind) {
|
|
|
|
case perf::Distribution::Uniform:
|
|
|
|
out << "uniorm, min: " << dist.uniform.min << ", max: " << dist.uniform.max;
|
|
|
|
break;
|
|
|
|
case perf::Distribution::Gaussian:
|
|
|
|
out << "gaussian, mean: " << dist.gaussian.mean << ", stddev: " << dist.gaussian.stddev;
|
|
|
|
break;
|
|
|
|
case perf::Distribution::Linear:
|
|
|
|
out << "linear, mean: " << dist.linear.offset << ", delta_row: " << dist.linear.delta_row
|
|
|
|
<< ", delta_column: " << dist.linear.delta_column;
|
|
|
|
break;
|
|
|
|
case perf::Distribution::Identity:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
out << "unknown";
|
|
|
|
}
|
|
|
|
|
|
|
|
out << ", int_scale: " << dist.int_scale;
|
|
|
|
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
namespace perf {
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/// Defines a vector of string pairs
|
|
|
|
typedef std::vector<std::pair<std::string, std::string> > KeyValueVector;
|
|
|
|
|
|
|
|
/// Defines a const iterator to a KeyValueVector
|
|
|
|
typedef KeyValueVector::const_iterator KeyValueIterator;
|
|
|
|
|
|
|
|
/// Structure captures the initial configuration of matrices
|
|
|
|
struct InitialDistribution {
|
|
|
|
/// Distribution of A matrix operand
|
|
|
|
Distribution dist_A;
|
|
|
|
|
|
|
|
/// Distribution of B matrix operand
|
|
|
|
Distribution dist_B;
|
|
|
|
|
|
|
|
/// Distribution of C matrix operand
|
|
|
|
Distribution dist_C;
|
|
|
|
|
|
|
|
/// Seed for random number generation
|
|
|
|
int64_t seed;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Static function members
|
|
|
|
//
|
|
|
|
|
|
|
|
/// Gets the initial distribution
|
|
|
|
static void get_distribution(cutlass::CommandLine const &args,
|
|
|
|
std::string const &arg,
|
|
|
|
Distribution &dist) {
|
|
|
|
struct {
|
|
|
|
const char *label;
|
|
|
|
Distribution::Kind kind;
|
|
|
|
} distribution_kinds[] = {{"uniform", Distribution::Uniform},
|
|
|
|
{"gaussian", Distribution::Gaussian},
|
|
|
|
{"linear", Distribution::Linear},
|
|
|
|
{"identity", Distribution::Identity},
|
|
|
|
{0, Distribution::Invalid}};
|
|
|
|
|
|
|
|
struct {
|
|
|
|
char const *label;
|
|
|
|
double *member;
|
|
|
|
} members[] = {{"min", &dist.uniform.min},
|
|
|
|
{"max", &dist.uniform.max},
|
|
|
|
{"mean", &dist.gaussian.mean},
|
|
|
|
{"stddev", &dist.gaussian.stddev},
|
|
|
|
{"offset", &dist.linear.offset},
|
|
|
|
{"delta_row", &dist.linear.delta_row},
|
|
|
|
{"delta_column", &dist.linear.delta_column},
|
|
|
|
{0, 0}};
|
|
|
|
|
|
|
|
KeyValueVector values;
|
|
|
|
args.get_cmd_line_argument_pairs(arg.c_str(), values);
|
|
|
|
|
|
|
|
// The parser expects the first token to be a string identifying the distribution type.
|
|
|
|
KeyValueIterator it = values.begin();
|
|
|
|
if (it != values.end()) {
|
|
|
|
for (int i = 0; distribution_kinds[i].label; ++i) {
|
|
|
|
if (it->first == distribution_kinds[i].label) {
|
|
|
|
dist.kind = distribution_kinds[i].kind;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++it;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Subsequent key-value pairs update the named field of the distribution struct.
|
|
|
|
for (; it != values.end(); ++it) {
|
|
|
|
|
|
|
|
// Integer scaling factor - if < 0, no integer rounding is performed.
|
|
|
|
if (it->first == "scale" && !it->second.empty()) {
|
|
|
|
std::stringstream ss;
|
|
|
|
ss << it->second;
|
|
|
|
ss >> dist.int_scale;
|
|
|
|
|
|
|
|
continue; // next token
|
|
|
|
}
|
|
|
|
|
|
|
|
// Casts as integer without scaling
|
|
|
|
if (it->first == "integer") {
|
|
|
|
dist.int_scale = 0;
|
|
|
|
continue; // next token
|
|
|
|
}
|
|
|
|
|
|
|
|
// initialize other members
|
|
|
|
for (int m = 0; members[m].label; ++m) {
|
|
|
|
if (it->first == members[m].label && !it->second.empty()) {
|
|
|
|
std::stringstream ss;
|
|
|
|
ss << it->second;
|
|
|
|
ss >> *(members[m].member);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Methods
|
|
|
|
//
|
|
|
|
|
|
|
|
/// Basic uniform random distribution
|
|
|
|
InitialDistribution(int64_t _seed = 700) : seed(_seed) {
|
|
|
|
dist_A.set_uniform(-8, 8);
|
|
|
|
dist_B.set_uniform(-8, 8);
|
|
|
|
dist_C.set_uniform(-8, 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Extracts initial distribution from command line arguments
|
|
|
|
InitialDistribution(cutlass::CommandLine const &args) {
|
|
|
|
// Set initial values
|
|
|
|
seed = 700;
|
|
|
|
dist_A.set_uniform(-8, 8);
|
|
|
|
dist_B.set_uniform(-8, 8);
|
|
|
|
dist_C.set_uniform(-8, 8);
|
|
|
|
|
|
|
|
// Update with command line arguments
|
|
|
|
args.get_cmd_line_argument("seed", seed, seed);
|
|
|
|
|
|
|
|
// Update all distributions at once
|
|
|
|
Distribution dist_all;
|
|
|
|
if (args.check_cmd_line_flag("dist")) {
|
|
|
|
get_distribution(args, "dist", dist_all);
|
|
|
|
dist_A = dist_all;
|
|
|
|
dist_B = dist_all;
|
|
|
|
dist_C = dist_all;
|
|
|
|
}
|
|
|
|
|
|
|
|
get_distribution(args, "dist_A", dist_A);
|
|
|
|
get_distribution(args, "dist_B", dist_B);
|
|
|
|
get_distribution(args, "dist_C", dist_C);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/// Defines how to execute the benchmarks
|
|
|
|
struct ExecutionMode {
|
|
|
|
enum Kind {
|
|
|
|
Profile,
|
|
|
|
Verify,
|
|
|
|
Single,
|
|
|
|
Invalid
|
|
|
|
};
|
|
|
|
|
|
|
|
static std::string to_string(Kind kind) {
|
|
|
|
switch (kind) {
|
|
|
|
case Profile: return "profile";
|
|
|
|
case Verify: return "verify";
|
|
|
|
case Single: return "single";
|
|
|
|
default: return "invalid";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static Kind from_string(std::string const &str) {
|
|
|
|
if (str == "profile") return Profile;
|
|
|
|
if (str == "verify") return Verify;
|
|
|
|
if (str == "single") return Single;
|
|
|
|
return Profile;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Indicates when the workspace is saved
|
|
|
|
struct WorkspaceSaveMode {
|
|
|
|
enum Kind {
|
|
|
|
Never,
|
|
|
|
Incorrect,
|
|
|
|
Always
|
|
|
|
};
|
|
|
|
|
|
|
|
static std::string to_string(Kind kind) {
|
|
|
|
switch (kind) {
|
|
|
|
case Never: return "never";
|
|
|
|
case Incorrect: return "incorrect";
|
|
|
|
case Always: return "always";
|
|
|
|
default: return "incorrect";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static Kind from_string(std::string const &str) {
|
|
|
|
if (str == "never") return Never;
|
|
|
|
if (str == "incorrect") return Incorrect;
|
|
|
|
if (str == "always") return Always;
|
|
|
|
return Incorrect;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/// Class holding testbench command line options
|
|
|
|
struct TestbenchOptions {
|
|
|
|
|
|
|
|
//
|
|
|
|
// Data members
|
|
|
|
//
|
|
|
|
|
|
|
|
/// Describes the random initial state of the input matrices
|
|
|
|
InitialDistribution initial_distribution;
|
|
|
|
|
|
|
|
// Path to output file name
|
|
|
|
std::string output_filename;
|
|
|
|
|
|
|
|
/// If true, output is appended
|
|
|
|
bool append;
|
|
|
|
|
|
|
|
/// Number of iterations
|
|
|
|
int iterations;
|
|
|
|
|
|
|
|
/// Defines how to run the benchmark
|
|
|
|
ExecutionMode::Kind execution_mode;
|
|
|
|
|
|
|
|
/// Indicates when the workspace is saved
|
|
|
|
WorkspaceSaveMode::Kind save_workspace_mode;
|
|
|
|
|
|
|
|
/// Enabled kernel names
|
|
|
|
std::vector<std::string> kernels;
|
|
|
|
|
|
|
|
/// Scalar value for GEMM
|
|
|
|
double alpha;
|
|
|
|
|
|
|
|
/// Scalar value for GEMM
|
|
|
|
double beta;
|
|
|
|
|
|
|
|
/// Range of problem sizes
|
|
|
|
GemmProblemRange problem_range;
|
|
|
|
|
|
|
|
/// Tags to describe the profiler output
|
|
|
|
KeyValueVector pivot_tags;
|
|
|
|
|
|
|
|
/// If enabled, only the peak performance for a given kernel is reported
|
|
|
|
bool peak_performance;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Methods
|
|
|
|
//
|
|
|
|
|
|
|
|
/// Constructs the testbench from tags
|
|
|
|
TestbenchOptions(cutlass::CommandLine const &args)
|
|
|
|
: initial_distribution(args),
|
|
|
|
execution_mode(ExecutionMode::Profile),
|
|
|
|
save_workspace_mode(WorkspaceSaveMode::Never),
|
|
|
|
problem_range(args) {
|
|
|
|
|
|
|
|
// fetch command line arguments
|
|
|
|
args.get_cmd_line_argument("iterations", iterations, 25);
|
|
|
|
args.get_cmd_line_argument("append", append, false);
|
|
|
|
args.get_cmd_line_argument("output", output_filename);
|
|
|
|
args.get_cmd_line_argument("alpha", alpha, 1.0);
|
|
|
|
args.get_cmd_line_argument("beta", beta, 0.0);
|
|
|
|
args.get_cmd_line_argument("peak", peak_performance, false);
|
|
|
|
args.get_cmd_line_argument_pairs("tags", pivot_tags);
|
|
|
|
|
|
|
|
if (args.check_cmd_line_flag("execution_mode")) {
|
|
|
|
std::string str;
|
|
|
|
args.get_cmd_line_argument("execution_mode", str);
|
|
|
|
execution_mode = ExecutionMode::from_string(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (args.check_cmd_line_flag("save_workspace")) {
|
|
|
|
std::string str;
|
|
|
|
args.get_cmd_line_argument("save_workspace", str);
|
|
|
|
save_workspace_mode = WorkspaceSaveMode::from_string(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
// query for enabled kernels or enable all of them
|
|
|
|
if (args.check_cmd_line_flag("kernels")) {
|
|
|
|
args.get_cmd_line_arguments("kernels", kernels, ',');
|
|
|
|
} else {
|
|
|
|
char const *gemms[] = {"sgemm", "dgemm", "hgemm", "igemm", "wmma_gemm", 0};
|
|
|
|
char const *layouts[] = {"nn", "nt", "tn", "tt", 0};
|
|
|
|
for (int i = 0; gemms[i]; ++i) {
|
|
|
|
for (int j = 0; layouts[j]; ++j) {
|
|
|
|
kernels.push_back(std::string(gemms[i]) + "_" + layouts[j]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if the kernel name appears among the enabled kernels
|
|
|
|
bool kernel_enabled(std::string const &kernel) const {
|
|
|
|
typedef std::vector<std::string>::const_iterator kernel_iterator;
|
|
|
|
|
|
|
|
for (kernel_iterator it = kernels.begin(); it != kernels.end(); ++it) {
|
|
|
|
if (kernel.find(*it) != std::string::npos) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Given the disposition of a GEMM problem, returns true if the results should
|
|
|
|
/// be saved to the file system.
|
|
|
|
bool save_workspace(bool correct) const {
|
|
|
|
if (save_workspace_mode == WorkspaceSaveMode::Always ||
|
|
|
|
(save_workspace_mode == WorkspaceSaveMode::Incorrect && !correct)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Prints the usage statement
|
|
|
|
static void usage(std::ostream &out) {
|
|
|
|
|
|
|
|
out << "cutlass_perf_test [options]\n\n"
|
|
|
|
|
|
|
|
<< " --help\n"
|
|
|
|
|
|
|
|
<< " --append=<true|false*> "
|
|
|
|
<< " If true, appends output to existing CSV file. If false, overwrites.\n"
|
|
|
|
|
|
|
|
<< " --alpha=<alpha> "
|
|
|
|
<< " Value for alpha to be used in GEMM experiments\n"
|
|
|
|
|
|
|
|
<< " --beta=<beta> "
|
|
|
|
<< " Value for beta to be used in GEMM experiments\n"
|
|
|
|
|
|
|
|
<< " --dist_{A,B,C}=<distribution> "
|
|
|
|
<< " Describes the random distribution of each of the input matrix operands.\n"
|
|
|
|
|
|
|
|
<< " --execution_mode=<mode> "
|
|
|
|
<< " Specifies execution mode: profile, verify, single\n"
|
|
|
|
|
|
|
|
<< " --output=<filename.csv> "
|
|
|
|
<< " Writes summary of profiling to specified .csv file\n"
|
|
|
|
|
|
|
|
<< " --iterations=<timing iterations> "
|
|
|
|
<< " maximum number of iterations to execute when profiling\n"
|
|
|
|
|
|
|
|
<< " --m=<height>[:max height[:step]] "
|
|
|
|
<< " Height of GEMM problem (number of rows of C). May specify a range with optional "
|
|
|
|
"step size.\n"
|
|
|
|
|
|
|
|
<< " --n=<width>[:max width[:step]] "
|
|
|
|
<< " Width of GEMM problem (number of columns of C). May specify a range with optional "
|
|
|
|
"step size.\n"
|
|
|
|
|
|
|
|
<< " --k=<depth>[:max depth[:step]] "
|
|
|
|
<< " Size of inner dimension of A and B. May specify a range with optional step size.\n"
|
|
|
|
|
2018-05-18 06:10:55 +08:00
|
|
|
<< " --kernels={s|d|h|i|wmma_}gemm_{nn,nt,tn,tt} "
|
2018-05-17 02:44:56 +08:00
|
|
|
<< " Select GEMM datatype and layout to use for tests\n"
|
|
|
|
|
|
|
|
<< " --peak=<bool> "
|
|
|
|
<< " If true, only reports peak performance per kernel after profiling specified "
|
|
|
|
"problem space.\n"
|
|
|
|
|
|
|
|
<< " --save_workspace={*never,incorrect,always} "
|
|
|
|
<< " Specifies when to save the GEMM inputs and results to the filesystem.\n"
|
|
|
|
|
|
|
|
<< " --seed=<seed> "
|
|
|
|
<< " Random seed used by the random number generator in initializing input matrices.\n"
|
|
|
|
|
|
|
|
<< " --tags=<column:tag,...> "
|
|
|
|
<< " Inserts leading columns in output table and uniform values for each column. Useful "
|
|
|
|
"for generating pivot tables.\n"
|
|
|
|
|
|
|
|
<< "\n\n"
|
|
|
|
|
|
|
|
<< "Example usage:\n\n"
|
|
|
|
|
|
|
|
<< "# Runs one problem size for all kernels\n"
|
|
|
|
<< "./tools/test/perf/cutlass_perf_test --m=10240 --n=1024 --k=1024\n\n"
|
|
|
|
|
|
|
|
<< "# Varies GEMM K dimension for SGEMM and IGEMM with column-major multiplicands\n"
|
|
|
|
<< "./tools/test/perf/cutlass_perf_test --m=10240 --n=4096 --k=1024:8192:128 "
|
|
|
|
"--kernels=sgemm_nn,igemm_nn\n\n"
|
|
|
|
|
|
|
|
<< std::flush;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
} // namespace perf
|