From d74674bbd978fad7f27a252650249bc2550f3e92 Mon Sep 17 00:00:00 2001 From: "Allen.Dou" Date: Sat, 15 Jun 2024 00:47:44 +0800 Subject: [PATCH] [Misc] Fix arg names (#5524) --- benchmarks/kernels/benchmark_paged_attention.py | 2 +- examples/aqlm_example.py | 2 +- examples/fp8/extract_scales.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/kernels/benchmark_paged_attention.py b/benchmarks/kernels/benchmark_paged_attention.py index e6f4e9e6..a5355f4c 100644 --- a/benchmarks/kernels/benchmark_paged_attention.py +++ b/benchmarks/kernels/benchmark_paged_attention.py @@ -165,7 +165,7 @@ if __name__ == '__main__': choices=["v1", "v2"], default="v2") parser.add_argument("--batch-size", type=int, default=8) - parser.add_argument("--seq_len", type=int, default=4096) + parser.add_argument("--seq-len", type=int, default=4096) parser.add_argument("--num-query-heads", type=int, default=64) parser.add_argument("--num-kv-heads", type=int, default=8) parser.add_argument("--head-size", diff --git a/examples/aqlm_example.py b/examples/aqlm_example.py index e7c17fa0..3a63003a 100644 --- a/examples/aqlm_example.py +++ b/examples/aqlm_example.py @@ -17,7 +17,7 @@ def main(): type=int, default=0, help='known good models by index, [0-4]') - parser.add_argument('--tensor_parallel_size', + parser.add_argument('--tensor-parallel-size', '-t', type=int, default=1, diff --git a/examples/fp8/extract_scales.py b/examples/fp8/extract_scales.py index 1eb961a5..e007a3bc 100644 --- a/examples/fp8/extract_scales.py +++ b/examples/fp8/extract_scales.py @@ -327,7 +327,7 @@ if __name__ == "__main__": "--quantization-param-path ). This is only used " "if the KV cache dtype is FP8 and on ROCm (AMD GPU).") parser.add_argument( - "--quantized_model", + "--quantized-model", help="Specify the directory containing a single quantized HF model. " "It is expected that the quantization format is FP8_E4M3, for use " "on ROCm (AMD GPU).", @@ -339,18 +339,18 @@ if __name__ == "__main__": choices=["auto", "safetensors", "npz", "pt"], default="auto") parser.add_argument( - "--output_dir", + "--output-dir", help="Optionally specify the output directory. By default the " "KV cache scaling factors will be saved in the model directory, " "however you can override this behavior here.", default=None) parser.add_argument( - "--output_name", + "--output-name", help="Optionally specify the output filename.", # TODO: Change this once additional scaling factors are enabled default="kv_cache_scales.json") parser.add_argument( - "--tp_size", + "--tp-size", help="Optionally specify the tensor-parallel (TP) size that the " "quantized model should correspond to. If specified, during KV " "cache scaling factor extraction the observed TP size will be "