From d74674bbd978fad7f27a252650249bc2550f3e92 Mon Sep 17 00:00:00 2001
From: "Allen.Dou" <allen.dou@hotmail.com>
Date: Sat, 15 Jun 2024 00:47:44 +0800
Subject: [PATCH] [Misc] Fix arg names (#5524)

---
 benchmarks/kernels/benchmark_paged_attention.py | 2 +-
 examples/aqlm_example.py                        | 2 +-
 examples/fp8/extract_scales.py                  | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/benchmarks/kernels/benchmark_paged_attention.py b/benchmarks/kernels/benchmark_paged_attention.py
index e6f4e9e6..a5355f4c 100644
--- a/benchmarks/kernels/benchmark_paged_attention.py
+++ b/benchmarks/kernels/benchmark_paged_attention.py
@@ -165,7 +165,7 @@ if __name__ == '__main__':
                         choices=["v1", "v2"],
                         default="v2")
     parser.add_argument("--batch-size", type=int, default=8)
-    parser.add_argument("--seq_len", type=int, default=4096)
+    parser.add_argument("--seq-len", type=int, default=4096)
     parser.add_argument("--num-query-heads", type=int, default=64)
     parser.add_argument("--num-kv-heads", type=int, default=8)
     parser.add_argument("--head-size",
diff --git a/examples/aqlm_example.py b/examples/aqlm_example.py
index e7c17fa0..3a63003a 100644
--- a/examples/aqlm_example.py
+++ b/examples/aqlm_example.py
@@ -17,7 +17,7 @@ def main():
                         type=int,
                         default=0,
                         help='known good models by index, [0-4]')
-    parser.add_argument('--tensor_parallel_size',
+    parser.add_argument('--tensor-parallel-size',
                         '-t',
                         type=int,
                         default=1,
diff --git a/examples/fp8/extract_scales.py b/examples/fp8/extract_scales.py
index 1eb961a5..e007a3bc 100644
--- a/examples/fp8/extract_scales.py
+++ b/examples/fp8/extract_scales.py
@@ -327,7 +327,7 @@ if __name__ == "__main__":
         "--quantization-param-path <filename>). This is only used "
         "if the KV cache dtype is FP8 and on ROCm (AMD GPU).")
     parser.add_argument(
-        "--quantized_model",
+        "--quantized-model",
         help="Specify the directory containing a single quantized HF model. "
         "It is expected that the quantization format is FP8_E4M3, for use "
         "on ROCm (AMD GPU).",
@@ -339,18 +339,18 @@ if __name__ == "__main__":
         choices=["auto", "safetensors", "npz", "pt"],
         default="auto")
     parser.add_argument(
-        "--output_dir",
+        "--output-dir",
         help="Optionally specify the output directory. By default the "
         "KV cache scaling factors will be saved in the model directory, "
         "however you can override this behavior here.",
         default=None)
     parser.add_argument(
-        "--output_name",
+        "--output-name",
         help="Optionally specify the output filename.",
         # TODO: Change this once additional scaling factors are enabled
         default="kv_cache_scales.json")
     parser.add_argument(
-        "--tp_size",
+        "--tp-size",
         help="Optionally specify the tensor-parallel (TP) size that the "
         "quantized model should correspond to. If specified, during KV "
         "cache scaling factor extraction the observed TP size will be "