diff --git a/bench/check_status.sh b/bench/check_status.sh deleted file mode 100755 index bfaa826..0000000 --- a/bench/check_status.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# Initialize counters -declare -A counts -statuses=("init" "pending" "running" "fail" "oom" "timeout" "completed") -for status in "${statuses[@]}"; do - counts[$status]=0 -done - -# Find and process all status.txt files -while IFS= read -r -d '' file; do - status=$(cat "$file" | tr -d '[:space:]') - - if [[ " ${statuses[@]} " =~ " ${status} " ]]; then - ((counts[$status]++)) - fi -done < <(find "$1" -name "status.txt" -print0) - -# Calculate total -total=0 -for count in "${counts[@]}"; do - ((total += count)) -done - -# Print the results -echo "Status | Count" -echo "-----------|---------" -for status in "${statuses[@]}"; do - printf "%-10s | %d\n" "$status" "${counts[$status]}" -done -echo "-----------|---------" -echo "Total | $total" \ No newline at end of file diff --git a/bench/create_config.py b/create_config.py similarity index 100% rename from bench/create_config.py rename to create_config.py diff --git a/bench/submit_jobs.py b/submit_slurm_jobs.py similarity index 95% rename from bench/submit_jobs.py rename to submit_slurm_jobs.py index 1642a7f..a9e7ba5 100644 --- a/bench/submit_jobs.py +++ b/submit_slurm_jobs.py @@ -91,21 +91,21 @@ class Scheduler: "qos": job.qos, } - #TODO: don't hardcode the base_bench.slurm path. Should be #HOME/bench_cluster/template/base_bench.slurm + #TODO: don't hardcode the base_job.slurm path. Should be #HOME/bench_cluster/template/base_job.slurm if cluster == "hf": - base_path = "/fsx/ferdinandmom/ferdinand-hf/picotron/bench/template/base_bench.slurm" + base_path = "/fsx/ferdinandmom/ferdinand-hf/picotron/bench/template/base_job.slurm" else: raise ValueError("Invalid cluster") with open(base_path, 'r') as file: - base_bench_file = file.read() + base_job_file = file.read() - base_bench_template = Template(base_bench_file) + base_job_template = Template(base_job_file) # Write the rendered script to a new file located at the job root_path - output_file_path = os.path.join(job.root_path, "bench.slurm") + output_file_path = os.path.join(job.root_path, "job.slurm") with open(output_file_path, 'w') as file: - file.write(base_bench_template.render(context_bench)) + file.write(base_job_template.render(context_bench)) print(f"Slurm script created at {output_file_path}") diff --git a/bench/template/base_config.json b/template/base_config.json similarity index 100% rename from bench/template/base_config.json rename to template/base_config.json diff --git a/bench/template/base_bench.slurm b/template/base_job.slurm similarity index 98% rename from bench/template/base_bench.slurm rename to template/base_job.slurm index b4f05d7..77445e4 100644 --- a/bench/template/base_bench.slurm +++ b/template/base_job.slurm @@ -1,6 +1,6 @@ #!/bin/bash -#SBATCH --job-name=bench-picotron +#SBATCH --job-name=job-picotron #SBATCH --time=00:30:00 #SBATCH --partition=hopper-prod #SBATCH --nodes={{ nodes }}