cutlass/tools/test/perf/testbench_options.h

/***************************************************************************************************
 * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted
 * provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright notice, this list of
 *       conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright notice, this list of
 *       conditions and the following disclaimer in the documentation and/or other materials
 *       provided with the distribution.
 *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
 *       to endorse or promote products derived from this software without specific prior written
 *       permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 **************************************************************************************************/

#pragma once

#include <stdint.h>
#include <tools/util/command_line.h>

namespace perf {

///////////////////////////////////////////////////////////////////////////////////////////////////

/// Range of problem sizes
struct Range {
  int start;
  int end;
  int increment;

  Range(int _start = 0) : start(_start), end(_start), increment(1) {}

  Range(int _start, int _end, int _increment = 1)
      : start(_start), end(_end), increment(_increment) {}
};

///////////////////////////////////////////////////////////////////////////////////////////////////

/// Defines a space of problem sizes
struct GemmProblemRange {
 public:
  /// Range of sizes in GEMM M dimension
  Range M;

  /// Range of sizes in GEMM N dimension
  Range N;

  /// Range of sizes in GEMM K dimension
  Range K;

  //
  // Methods
  //

  /// Constructor to define a space of probelm sizes
  GemmProblemRange(Range _M = Range(256), Range _N = Range(256), Range _K = Range(256))
      : M(_M), N(_N), K(_K) {}

  /// Parses a command line argument as a Range object
  static void get_range(Range &range,
                        cutlass::CommandLine const &args,
                        std::string const &arg,
                        Range const &_default = Range(256)) {
    range = Range(0, 0, 1);

    if (args.check_cmd_line_flag(arg.c_str())) {
      std::vector<std::string> values;
      args.get_cmd_line_arguments(arg.c_str(), values, ':');

      if (values.size() > 0) {
        std::stringstream ss;
        ss << values.at(0);
        ss >> range.start;
      }

      if (values.size() > 1) {
        std::stringstream ss;
        ss << values.at(1);
        ss >> range.end;
      } else {
        range.end = range.start;
      }

      if (values.size() > 2) {
        std::stringstream ss;
        ss << values.at(2);
        ss >> range.increment;
      }
    } else {
      range = _default;
    }
  }

  /// Initializes the GEMM problem size from command line arguments
  GemmProblemRange(cutlass::CommandLine const &args) {
    get_range(M, args, "m", Range(10240));
    get_range(N, args, "n", Range(4096));
    get_range(K, args, "k", Range(4096));
  }
};

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Distribution type
struct Distribution {
  /// Variant types
  enum Kind { Invalid, Uniform, Gaussian, Linear, Identity };

  /// Distribution state
  union {
    /// Uniform distribution
    struct {
      double min;
      double max;
    } uniform;

    /// Gaussian distribution
    struct {
      double mean;
      double stddev;
    } gaussian;

    /// Elements are linear combination of row and column index
    struct {
      double offset;
      double delta_row;
      double delta_column;
    } linear;
  };

  /// Active variant kind
  Kind kind;

  /// Random values are cast to integer after scaling by this power of two
  int int_scale;

  //
  // Methods
  //

  Distribution() : kind(Invalid), int_scale(0) {}

  /// Configures distribution as uniform random
  Distribution &set_uniform(double _min, double _max, int _int_scale = 0) {
    kind = Uniform;
    uniform.min = _min;
    uniform.max = _max;
    int_scale = _int_scale;
    return *this;
  }

  /// Configures distribution as Gaussian distribution
  Distribution &set_gaussian(double _mean, double _stddev, int _int_scale = 0) {
    kind = Gaussian;
    gaussian.mean = _mean;
    gaussian.stddev = _stddev;
    int_scale = _int_scale;
    return *this;
  }


  /// Sets identity
  Distribution &set_identity() {
    kind = Identity;
    return *this;
  }
};

}  // namespace perf

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Prints a Distribution to ostream
inline std::ostream &operator<<(std::ostream &out, perf::Distribution const &dist) {
  switch (dist.kind) {
    case perf::Distribution::Uniform:
      out << "uniorm, min: " << dist.uniform.min << ", max: " << dist.uniform.max;
      break;
    case perf::Distribution::Gaussian:
      out << "gaussian, mean: " << dist.gaussian.mean << ", stddev: " << dist.gaussian.stddev;
      break;
    case perf::Distribution::Linear:
      out << "linear, mean: " << dist.linear.offset << ", delta_row: " << dist.linear.delta_row
          << ", delta_column: " << dist.linear.delta_column;
      break;
    case perf::Distribution::Identity:
      break;
    default:
      out << "unknown";
  }

  out << ", int_scale: " << dist.int_scale;

  return out;
}

////////////////////////////////////////////////////////////////////////////////////////////////////

namespace perf {

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Defines a vector of string pairs
typedef std::vector<std::pair<std::string, std::string> > KeyValueVector;

/// Defines a const iterator to a KeyValueVector
typedef KeyValueVector::const_iterator KeyValueIterator;

/// Structure captures the initial configuration of matrices
struct InitialDistribution {
  /// Distribution of A matrix operand
  Distribution dist_A;

  /// Distribution of B matrix operand
  Distribution dist_B;

  /// Distribution of C matrix operand
  Distribution dist_C;

  /// Seed for random number generation
  int64_t seed;

  //
  // Static function members
  //

  /// Gets the initial distribution
  static void get_distribution(cutlass::CommandLine const &args,
                               std::string const &arg,
                               Distribution &dist) {
    struct {
      const char *label;
      Distribution::Kind kind;
    } distribution_kinds[] = {{"uniform", Distribution::Uniform},
                              {"gaussian", Distribution::Gaussian},
                              {"linear", Distribution::Linear},
                              {"identity", Distribution::Identity},
                              {0, Distribution::Invalid}};

    struct {
      char const *label;
      double *member;
    } members[] = {{"min", &dist.uniform.min},
                   {"max", &dist.uniform.max},
                   {"mean", &dist.gaussian.mean},
                   {"stddev", &dist.gaussian.stddev},
                   {"offset", &dist.linear.offset},
                   {"delta_row", &dist.linear.delta_row},
                   {"delta_column", &dist.linear.delta_column},
                   {0, 0}};

    KeyValueVector values;
    args.get_cmd_line_argument_pairs(arg.c_str(), values);

    // The parser expects the first token to be a string identifying the distribution type.
    KeyValueIterator it = values.begin();
    if (it != values.end()) {
      for (int i = 0; distribution_kinds[i].label; ++i) {
        if (it->first == distribution_kinds[i].label) {
          dist.kind = distribution_kinds[i].kind;
          break;
        }
      }
      ++it;
    }

    // Subsequent key-value pairs update the named field of the distribution struct.
    for (; it != values.end(); ++it) {

      // Integer scaling factor - if < 0, no integer rounding is performed.
      if (it->first == "scale" && !it->second.empty()) {
        std::stringstream ss;
        ss << it->second;
        ss >> dist.int_scale;

        continue;  // next token
      }

      // Casts as integer without scaling
      if (it->first == "integer") {
        dist.int_scale = 0;
        continue;  // next token
      }

      // initialize other members
      for (int m = 0; members[m].label; ++m) {
        if (it->first == members[m].label && !it->second.empty()) {
          std::stringstream ss;
          ss << it->second;
          ss >> *(members[m].member);
        }
      }
    }
  }

  //
  // Methods
  //

  /// Basic uniform random distribution
  InitialDistribution(int64_t _seed = 700) : seed(_seed) {
    dist_A.set_uniform(-8, 8);
    dist_B.set_uniform(-8, 8);
    dist_C.set_uniform(-8, 8);
  }

  /// Extracts initial distribution from command line arguments
  InitialDistribution(cutlass::CommandLine const &args) {
    // Set initial values
    seed = 700;
    dist_A.set_uniform(-8, 8);
    dist_B.set_uniform(-8, 8);
    dist_C.set_uniform(-8, 8);

    // Update with command line arguments
    args.get_cmd_line_argument("seed", seed, seed);

    // Update all distributions at once
    Distribution dist_all;
    if (args.check_cmd_line_flag("dist")) {
       get_distribution(args, "dist", dist_all);
       dist_A = dist_all;
       dist_B = dist_all;
       dist_C = dist_all;
    }

    get_distribution(args, "dist_A", dist_A);
    get_distribution(args, "dist_B", dist_B);
    get_distribution(args, "dist_C", dist_C);
  }
};

///////////////////////////////////////////////////////////////////////////////////////////////////

/// Defines how to execute the benchmarks
struct ExecutionMode {
  enum Kind {
    Profile,
    Verify,
    Single,
    Invalid
  };

  static std::string to_string(Kind kind) {
    switch (kind) {
      case Profile: return "profile";
      case Verify: return "verify";
      case Single: return "single";
      default: return "invalid";
    }
  }

  static Kind from_string(std::string const &str) {
    if (str == "profile") return Profile;
    if (str == "verify") return Verify;
    if (str == "single") return Single;
    return Profile;
  }
};

/// Indicates when the workspace is saved
struct WorkspaceSaveMode {
  enum Kind {
    Never,
    Incorrect,
    Always
  };

  static std::string to_string(Kind kind) {
    switch (kind) {
      case Never: return "never";
      case Incorrect: return "incorrect";
      case Always: return "always";
      default: return "incorrect";
    }
  }

  static Kind from_string(std::string const &str) {
    if (str == "never") return Never;
    if (str == "incorrect") return Incorrect;
    if (str == "always") return Always;
    return Incorrect;
  }
};

///////////////////////////////////////////////////////////////////////////////////////////////////

/// Class holding testbench command line options
struct TestbenchOptions {

  //
  // Data members
  //

  /// Describes the random initial state of the input matrices
  InitialDistribution initial_distribution;

  // Path to output file name
  std::string output_filename;

  /// If true, output is appended
  bool append;

  /// Number of iterations
  int iterations;

  /// Defines how to run the benchmark
  ExecutionMode::Kind execution_mode;

  /// Indicates when the workspace is saved
  WorkspaceSaveMode::Kind save_workspace_mode;

  /// Enabled kernel names
  std::vector<std::string> kernels;

  /// Scalar value for GEMM
  double alpha;

  /// Scalar value for GEMM
  double beta;

  /// Range of problem sizes
  GemmProblemRange problem_range;

  /// Tags to describe the profiler output
  KeyValueVector pivot_tags;

  /// If enabled, only the peak performance for a given kernel is reported
  bool peak_performance;

  //
  // Methods
  //

  /// Constructs the testbench from tags
  TestbenchOptions(cutlass::CommandLine const &args)
      : initial_distribution(args),
        execution_mode(ExecutionMode::Profile),
        save_workspace_mode(WorkspaceSaveMode::Never),
        problem_range(args) {

    // fetch command line arguments
    args.get_cmd_line_argument("iterations", iterations, 25);
    args.get_cmd_line_argument("append", append, false);
    args.get_cmd_line_argument("output", output_filename);
    args.get_cmd_line_argument("alpha", alpha, 1.0);
    args.get_cmd_line_argument("beta", beta, 0.0);
    args.get_cmd_line_argument("peak", peak_performance, false);
    args.get_cmd_line_argument_pairs("tags", pivot_tags);

    if (args.check_cmd_line_flag("execution_mode")) {
      std::string str;
      args.get_cmd_line_argument("execution_mode", str);
      execution_mode = ExecutionMode::from_string(str);
    }

    if (args.check_cmd_line_flag("save_workspace")) {
      std::string str;
      args.get_cmd_line_argument("save_workspace", str);
      save_workspace_mode = WorkspaceSaveMode::from_string(str);
    }

    // query for enabled kernels or enable all of them
    if (args.check_cmd_line_flag("kernels")) {
      args.get_cmd_line_arguments("kernels", kernels, ',');
    } else {
      char const *gemms[] = {"sgemm", "dgemm", "hgemm", "igemm", "wmma_gemm", 0};
      char const *layouts[] = {"nn", "nt", "tn", "tt", 0};
      for (int i = 0; gemms[i]; ++i) {
        for (int j = 0; layouts[j]; ++j) {
          kernels.push_back(std::string(gemms[i]) + "_" + layouts[j]);
        }
      }
    }
  }

  /// Returns true if the kernel name appears among the enabled kernels
  bool kernel_enabled(std::string const &kernel) const {
    typedef std::vector<std::string>::const_iterator kernel_iterator;

    for (kernel_iterator it = kernels.begin(); it != kernels.end(); ++it) {
      if (kernel.find(*it) != std::string::npos) {
        return true;
      }
    }

    return false;
  }

  /// Given the disposition of a GEMM problem, returns true if the results should
  /// be saved to the file system.
  bool save_workspace(bool correct) const {
    if (save_workspace_mode == WorkspaceSaveMode::Always ||
      (save_workspace_mode == WorkspaceSaveMode::Incorrect && !correct)) {
      return true;
    }
    return false;
  }

  /// Prints the usage statement
  static void usage(std::ostream &out) {

    out << "cutlass_perf_test [options]\n\n"

        << "  --help\n"

        << "  --append=<true|false*>                        "
        << "    If true, appends output to existing CSV file. If false, overwrites.\n"

        << "  --alpha=<alpha>                               "
        << "    Value for alpha to be used in GEMM experiments\n"

        << "  --beta=<beta>                                 "
        << "    Value for beta to be used in GEMM experiments\n"

        << "  --dist_{A,B,C}=<distribution>                 "
        << "    Describes the random distribution of each of the input matrix operands.\n"

        << "  --execution_mode=<mode>                       "
        << "    Specifies execution mode: profile, verify, single\n"

        << "  --output=<filename.csv>                       "
        << "    Writes summary of profiling to specified .csv file\n"

        << "  --iterations=<timing iterations>              "
        << "    maximum number of iterations to execute when profiling\n"

        << "  --m=<height>[:max height[:step]]              "
        << "    Height of GEMM problem (number of rows of C). May specify a range with optional "
           "step size.\n"

        << "  --n=<width>[:max width[:step]]                "
        << "    Width of GEMM problem (number of columns of C). May specify a range with optional "
           "step size.\n"

        << "  --k=<depth>[:max depth[:step]]                "
        << "    Size of inner dimension of A and B. May specify a range with optional step size.\n"

        << "  --kernels={s|d|h|i|wmma_}gemm_{nn,nt,tn,tt}   "
        << "    Select GEMM datatype and layout to use for tests\n"

        << "  --peak=<bool>                                 "
        << "    If true, only reports peak performance per kernel after profiling specified "
           "problem space.\n"

        << "  --save_workspace={*never,incorrect,always}    "
        << "    Specifies when to save the GEMM inputs and results to the filesystem.\n"

        << "  --seed=<seed>                                 "
        << "    Random seed used by the random number generator in initializing input matrices.\n"

        << "  --tags=<column:tag,...>                       "
        << "    Inserts leading columns in output table and uniform values for each column. Useful "
           "for generating pivot tables.\n"

        << "\n\n"

        << "Example usage:\n\n"

        << "# Runs one problem size for all kernels\n"
        << "./tools/test/perf/cutlass_perf_test --m=10240 --n=1024 --k=1024\n\n"

        << "# Varies GEMM K dimension for SGEMM and IGEMM with column-major multiplicands\n"
        << "./tools/test/perf/cutlass_perf_test --m=10240 --n=4096 --k=1024:8192:128 "
           "--kernels=sgemm_nn,igemm_nn\n\n"

        << std::flush;
  }
};

///////////////////////////////////////////////////////////////////////////////////////////////////

}  // namespace perf
CUTLASS v1.0 release 2018-05-17 02:44:56 +08:00			`/***************************************************************************************************`
			`* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.`
			`*`
			`* Redistribution and use in source and binary forms, with or without modification, are permitted`
			`* provided that the following conditions are met:`
			`* * Redistributions of source code must retain the above copyright notice, this list of`
			`* conditions and the following disclaimer.`
			`* * Redistributions in binary form must reproduce the above copyright notice, this list of`
			`* conditions and the following disclaimer in the documentation and/or other materials`
			`* provided with the distribution.`
			`* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used`
			`* to endorse or promote products derived from this software without specific prior written`
			`* permission.`
			`*`
			`* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR`
			`* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND`
			`* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE`
			`* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,`
			`* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;`
			`* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,`
			`* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`*`
			`**************************************************************************************************/`

			`#pragma once`

			`#include <stdint.h>`
			`#include <tools/util/command_line.h>`

			`namespace perf {`

			`///////////////////////////////////////////////////////////////////////////////////////////////////`

			`/// Range of problem sizes`
			`struct Range {`
			`int start;`
			`int end;`
			`int increment;`

			`Range(int _start = 0) : start(_start), end(_start), increment(1) {}`

			`Range(int _start, int _end, int _increment = 1)`
			`: start(_start), end(_end), increment(_increment) {}`
			`};`

			`///////////////////////////////////////////////////////////////////////////////////////////////////`

			`/// Defines a space of problem sizes`
			`struct GemmProblemRange {`
			`public:`
			`/// Range of sizes in GEMM M dimension`
			`Range M;`

			`/// Range of sizes in GEMM N dimension`
			`Range N;`

			`/// Range of sizes in GEMM K dimension`
			`Range K;`

			`//`
			`// Methods`
			`//`

			`/// Constructor to define a space of probelm sizes`
			`GemmProblemRange(Range _M = Range(256), Range _N = Range(256), Range _K = Range(256))`
			`: M(_M), N(_N), K(_K) {}`

			`/// Parses a command line argument as a Range object`
			`static void get_range(Range &range,`
			`cutlass::CommandLine const &args,`
			`std::string const &arg,`
			`Range const &_default = Range(256)) {`
			`range = Range(0, 0, 1);`

			`if (args.check_cmd_line_flag(arg.c_str())) {`
			`std::vector<std::string> values;`
			`args.get_cmd_line_arguments(arg.c_str(), values, ':');`

			`if (values.size() > 0) {`
			`std::stringstream ss;`
			`ss << values.at(0);`
			`ss >> range.start;`
			`}`

			`if (values.size() > 1) {`
			`std::stringstream ss;`
			`ss << values.at(1);`
			`ss >> range.end;`
			`} else {`
			`range.end = range.start;`
			`}`

			`if (values.size() > 2) {`
			`std::stringstream ss;`
			`ss << values.at(2);`
			`ss >> range.increment;`
			`}`
			`} else {`
			`range = _default;`
			`}`
			`}`

			`/// Initializes the GEMM problem size from command line arguments`
			`GemmProblemRange(cutlass::CommandLine const &args) {`
			`get_range(M, args, "m", Range(10240));`
			`get_range(N, args, "n", Range(4096));`
			`get_range(K, args, "k", Range(4096));`
			`}`
			`};`

			`////////////////////////////////////////////////////////////////////////////////////////////////////`

			`/// Distribution type`
			`struct Distribution {`
			`/// Variant types`
			`enum Kind { Invalid, Uniform, Gaussian, Linear, Identity };`

			`/// Distribution state`
			`union {`
			`/// Uniform distribution`
			`struct {`
			`double min;`
			`double max;`
			`} uniform;`

			`/// Gaussian distribution`
			`struct {`
			`double mean;`
			`double stddev;`
			`} gaussian;`

			`/// Elements are linear combination of row and column index`
			`struct {`
			`double offset;`
			`double delta_row;`
			`double delta_column;`
			`} linear;`
			`};`

			`/// Active variant kind`
			`Kind kind;`

			`/// Random values are cast to integer after scaling by this power of two`
			`int int_scale;`

			`//`
			`// Methods`
			`//`

			`Distribution() : kind(Invalid), int_scale(0) {}`

			`/// Configures distribution as uniform random`
			`Distribution &set_uniform(double _min, double _max, int _int_scale = 0) {`
			`kind = Uniform;`
			`uniform.min = _min;`
			`uniform.max = _max;`
			`int_scale = _int_scale;`
			`return *this;`
			`}`

			`/// Configures distribution as Gaussian distribution`
			`Distribution &set_gaussian(double _mean, double _stddev, int _int_scale = 0) {`
			`kind = Gaussian;`
			`gaussian.mean = _mean;`
			`gaussian.stddev = _stddev;`
			`int_scale = _int_scale;`
			`return *this;`
			`}`


			`/// Sets identity`
			`Distribution &set_identity() {`
			`kind = Identity;`
			`return *this;`
			`}`
			`};`

			`} // namespace perf`

			`////////////////////////////////////////////////////////////////////////////////////////////////////`

			`/// Prints a Distribution to ostream`
			`inline std::ostream &operator<<(std::ostream &out, perf::Distribution const &dist) {`
			`switch (dist.kind) {`
			`case perf::Distribution::Uniform:`
			`out << "uniorm, min: " << dist.uniform.min << ", max: " << dist.uniform.max;`
			`break;`
			`case perf::Distribution::Gaussian:`
			`out << "gaussian, mean: " << dist.gaussian.mean << ", stddev: " << dist.gaussian.stddev;`
			`break;`
			`case perf::Distribution::Linear:`
			`out << "linear, mean: " << dist.linear.offset << ", delta_row: " << dist.linear.delta_row`
			`<< ", delta_column: " << dist.linear.delta_column;`
			`break;`
			`case perf::Distribution::Identity:`
			`break;`
			`default:`
			`out << "unknown";`
			`}`

			`out << ", int_scale: " << dist.int_scale;`

			`return out;`
			`}`

			`////////////////////////////////////////////////////////////////////////////////////////////////////`

			`namespace perf {`

			`////////////////////////////////////////////////////////////////////////////////////////////////////`

			`/// Defines a vector of string pairs`
			`typedef std::vector<std::pair<std::string, std::string> > KeyValueVector;`

			`/// Defines a const iterator to a KeyValueVector`
			`typedef KeyValueVector::const_iterator KeyValueIterator;`

			`/// Structure captures the initial configuration of matrices`
			`struct InitialDistribution {`
			`/// Distribution of A matrix operand`
			`Distribution dist_A;`

			`/// Distribution of B matrix operand`
			`Distribution dist_B;`

			`/// Distribution of C matrix operand`
			`Distribution dist_C;`

			`/// Seed for random number generation`
			`int64_t seed;`

			`//`
			`// Static function members`
			`//`

			`/// Gets the initial distribution`
			`static void get_distribution(cutlass::CommandLine const &args,`
			`std::string const &arg,`
			`Distribution &dist) {`
			`struct {`
			`const char *label;`
			`Distribution::Kind kind;`
			`} distribution_kinds[] = {{"uniform", Distribution::Uniform},`
			`{"gaussian", Distribution::Gaussian},`
			`{"linear", Distribution::Linear},`
			`{"identity", Distribution::Identity},`
			`{0, Distribution::Invalid}};`

			`struct {`
			`char const *label;`
			`double *member;`
			`} members[] = {{"min", &dist.uniform.min},`
			`{"max", &dist.uniform.max},`
			`{"mean", &dist.gaussian.mean},`
			`{"stddev", &dist.gaussian.stddev},`
			`{"offset", &dist.linear.offset},`
			`{"delta_row", &dist.linear.delta_row},`
			`{"delta_column", &dist.linear.delta_column},`
			`{0, 0}};`

			`KeyValueVector values;`
			`args.get_cmd_line_argument_pairs(arg.c_str(), values);`

			`// The parser expects the first token to be a string identifying the distribution type.`
			`KeyValueIterator it = values.begin();`
			`if (it != values.end()) {`
			`for (int i = 0; distribution_kinds[i].label; ++i) {`
			`if (it->first == distribution_kinds[i].label) {`
			`dist.kind = distribution_kinds[i].kind;`
			`break;`
			`}`
			`}`
			`++it;`
			`}`

			`// Subsequent key-value pairs update the named field of the distribution struct.`
			`for (; it != values.end(); ++it) {`

			`// Integer scaling factor - if < 0, no integer rounding is performed.`
			`if (it->first == "scale" && !it->second.empty()) {`
			`std::stringstream ss;`
			`ss << it->second;`
			`ss >> dist.int_scale;`

			`continue; // next token`
			`}`

			`// Casts as integer without scaling`
			`if (it->first == "integer") {`
			`dist.int_scale = 0;`
			`continue; // next token`
			`}`

			`// initialize other members`
			`for (int m = 0; members[m].label; ++m) {`
			`if (it->first == members[m].label && !it->second.empty()) {`
			`std::stringstream ss;`
			`ss << it->second;`
			`ss >> *(members[m].member);`
			`}`
			`}`
			`}`
			`}`

			`//`
			`// Methods`
			`//`

			`/// Basic uniform random distribution`
			`InitialDistribution(int64_t _seed = 700) : seed(_seed) {`
			`dist_A.set_uniform(-8, 8);`
			`dist_B.set_uniform(-8, 8);`
			`dist_C.set_uniform(-8, 8);`
			`}`

			`/// Extracts initial distribution from command line arguments`
			`InitialDistribution(cutlass::CommandLine const &args) {`
			`// Set initial values`
			`seed = 700;`
			`dist_A.set_uniform(-8, 8);`
			`dist_B.set_uniform(-8, 8);`
			`dist_C.set_uniform(-8, 8);`

			`// Update with command line arguments`
			`args.get_cmd_line_argument("seed", seed, seed);`

			`// Update all distributions at once`
			`Distribution dist_all;`
			`if (args.check_cmd_line_flag("dist")) {`
			`get_distribution(args, "dist", dist_all);`
			`dist_A = dist_all;`
			`dist_B = dist_all;`
			`dist_C = dist_all;`
			`}`

			`get_distribution(args, "dist_A", dist_A);`
			`get_distribution(args, "dist_B", dist_B);`
			`get_distribution(args, "dist_C", dist_C);`
			`}`
			`};`

			`///////////////////////////////////////////////////////////////////////////////////////////////////`

			`/// Defines how to execute the benchmarks`
			`struct ExecutionMode {`
			`enum Kind {`
			`Profile,`
			`Verify,`
			`Single,`
			`Invalid`
			`};`

			`static std::string to_string(Kind kind) {`
			`switch (kind) {`
			`case Profile: return "profile";`
			`case Verify: return "verify";`
			`case Single: return "single";`
			`default: return "invalid";`
			`}`
			`}`

			`static Kind from_string(std::string const &str) {`
			`if (str == "profile") return Profile;`
			`if (str == "verify") return Verify;`
			`if (str == "single") return Single;`
			`return Profile;`
			`}`
			`};`

			`/// Indicates when the workspace is saved`
			`struct WorkspaceSaveMode {`
			`enum Kind {`
			`Never,`
			`Incorrect,`
			`Always`
			`};`

			`static std::string to_string(Kind kind) {`
			`switch (kind) {`
			`case Never: return "never";`
			`case Incorrect: return "incorrect";`
			`case Always: return "always";`
			`default: return "incorrect";`
			`}`
			`}`

			`static Kind from_string(std::string const &str) {`
			`if (str == "never") return Never;`
			`if (str == "incorrect") return Incorrect;`
			`if (str == "always") return Always;`
			`return Incorrect;`
			`}`
			`};`

			`///////////////////////////////////////////////////////////////////////////////////////////////////`

			`/// Class holding testbench command line options`
			`struct TestbenchOptions {`

			`//`
			`// Data members`
			`//`

			`/// Describes the random initial state of the input matrices`
			`InitialDistribution initial_distribution;`

			`// Path to output file name`
			`std::string output_filename;`

			`/// If true, output is appended`
			`bool append;`

			`/// Number of iterations`
			`int iterations;`

			`/// Defines how to run the benchmark`
			`ExecutionMode::Kind execution_mode;`

			`/// Indicates when the workspace is saved`
			`WorkspaceSaveMode::Kind save_workspace_mode;`

			`/// Enabled kernel names`
			`std::vector<std::string> kernels;`

			`/// Scalar value for GEMM`
			`double alpha;`

			`/// Scalar value for GEMM`
			`double beta;`

			`/// Range of problem sizes`
			`GemmProblemRange problem_range;`

			`/// Tags to describe the profiler output`
			`KeyValueVector pivot_tags;`

			`/// If enabled, only the peak performance for a given kernel is reported`
			`bool peak_performance;`

			`//`
			`// Methods`
			`//`

			`/// Constructs the testbench from tags`
			`TestbenchOptions(cutlass::CommandLine const &args)`
			`: initial_distribution(args),`
			`execution_mode(ExecutionMode::Profile),`
			`save_workspace_mode(WorkspaceSaveMode::Never),`
			`problem_range(args) {`

			`// fetch command line arguments`
			`args.get_cmd_line_argument("iterations", iterations, 25);`
			`args.get_cmd_line_argument("append", append, false);`
			`args.get_cmd_line_argument("output", output_filename);`
			`args.get_cmd_line_argument("alpha", alpha, 1.0);`
			`args.get_cmd_line_argument("beta", beta, 0.0);`
			`args.get_cmd_line_argument("peak", peak_performance, false);`
			`args.get_cmd_line_argument_pairs("tags", pivot_tags);`

			`if (args.check_cmd_line_flag("execution_mode")) {`
			`std::string str;`
			`args.get_cmd_line_argument("execution_mode", str);`
			`execution_mode = ExecutionMode::from_string(str);`
			`}`

			`if (args.check_cmd_line_flag("save_workspace")) {`
			`std::string str;`
			`args.get_cmd_line_argument("save_workspace", str);`
			`save_workspace_mode = WorkspaceSaveMode::from_string(str);`
			`}`

			`// query for enabled kernels or enable all of them`
			`if (args.check_cmd_line_flag("kernels")) {`
			`args.get_cmd_line_arguments("kernels", kernels, ',');`
			`} else {`
			`char const *gemms[] = {"sgemm", "dgemm", "hgemm", "igemm", "wmma_gemm", 0};`
			`char const *layouts[] = {"nn", "nt", "tn", "tt", 0};`
			`for (int i = 0; gemms[i]; ++i) {`
			`for (int j = 0; layouts[j]; ++j) {`
			`kernels.push_back(std::string(gemms[i]) + "_" + layouts[j]);`
			`}`
			`}`
			`}`
			`}`

			`/// Returns true if the kernel name appears among the enabled kernels`
			`bool kernel_enabled(std::string const &kernel) const {`
			`typedef std::vector<std::string>::const_iterator kernel_iterator;`

			`for (kernel_iterator it = kernels.begin(); it != kernels.end(); ++it) {`
			`if (kernel.find(*it) != std::string::npos) {`
			`return true;`
			`}`
			`}`

			`return false;`
			`}`

			`/// Given the disposition of a GEMM problem, returns true if the results should`
			`/// be saved to the file system.`
			`bool save_workspace(bool correct) const {`
			`if (save_workspace_mode == WorkspaceSaveMode::Always \|\|`
			`(save_workspace_mode == WorkspaceSaveMode::Incorrect && !correct)) {`
			`return true;`
			`}`
			`return false;`
			`}`

			`/// Prints the usage statement`
			`static void usage(std::ostream &out) {`

			`out << "cutlass_perf_test [options]\n\n"`

			`<< " --help\n"`

			`<< " --append=<true\|false*> "`
			`<< " If true, appends output to existing CSV file. If false, overwrites.\n"`

			`<< " --alpha=<alpha> "`
			`<< " Value for alpha to be used in GEMM experiments\n"`

			`<< " --beta=<beta> "`
			`<< " Value for beta to be used in GEMM experiments\n"`

			`<< " --dist_{A,B,C}=<distribution> "`
			`<< " Describes the random distribution of each of the input matrix operands.\n"`

			`<< " --execution_mode=<mode> "`
			`<< " Specifies execution mode: profile, verify, single\n"`

			`<< " --output=<filename.csv> "`
			`<< " Writes summary of profiling to specified .csv file\n"`

			`<< " --iterations=<timing iterations> "`
			`<< " maximum number of iterations to execute when profiling\n"`

			`<< " --m=<height>[:max height[:step]] "`
			`<< " Height of GEMM problem (number of rows of C). May specify a range with optional "`
			`"step size.\n"`

			`<< " --n=<width>[:max width[:step]] "`
			`<< " Width of GEMM problem (number of columns of C). May specify a range with optional "`
			`"step size.\n"`

			`<< " --k=<depth>[:max depth[:step]] "`
			`<< " Size of inner dimension of A and B. May specify a range with optional step size.\n"`

Minor updates to usage and readme. 2018-05-18 06:10:55 +08:00			`<< " --kernels={s\|d\|h\|i\|wmma_}gemm_{nn,nt,tn,tt} "`
CUTLASS v1.0 release 2018-05-17 02:44:56 +08:00			`<< " Select GEMM datatype and layout to use for tests\n"`

			`<< " --peak=<bool> "`
			`<< " If true, only reports peak performance per kernel after profiling specified "`
			`"problem space.\n"`

			`<< " --save_workspace={*never,incorrect,always} "`
			`<< " Specifies when to save the GEMM inputs and results to the filesystem.\n"`

			`<< " --seed=<seed> "`
			`<< " Random seed used by the random number generator in initializing input matrices.\n"`

			`<< " --tags=<column:tag,...> "`
			`<< " Inserts leading columns in output table and uniform values for each column. Useful "`
			`"for generating pivot tables.\n"`

			`<< "\n\n"`

			`<< "Example usage:\n\n"`

			`<< "# Runs one problem size for all kernels\n"`
			`<< "./tools/test/perf/cutlass_perf_test --m=10240 --n=1024 --k=1024\n\n"`

			`<< "# Varies GEMM K dimension for SGEMM and IGEMM with column-major multiplicands\n"`
			`<< "./tools/test/perf/cutlass_perf_test --m=10240 --n=4096 --k=1024:8192:128 "`
			`"--kernels=sgemm_nn,igemm_nn\n\n"`

			`<< std::flush;`
			`}`
			`};`

			`///////////////////////////////////////////////////////////////////////////////////////////////////`

			`} // namespace perf`