cleaning
This commit is contained in:
parent
2d198659e2
commit
f74bff79e0
@ -1,32 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Initialize counters
|
||||
declare -A counts
|
||||
statuses=("init" "pending" "running" "fail" "oom" "timeout" "completed")
|
||||
for status in "${statuses[@]}"; do
|
||||
counts[$status]=0
|
||||
done
|
||||
|
||||
# Find and process all status.txt files
|
||||
while IFS= read -r -d '' file; do
|
||||
status=$(cat "$file" | tr -d '[:space:]')
|
||||
|
||||
if [[ " ${statuses[@]} " =~ " ${status} " ]]; then
|
||||
((counts[$status]++))
|
||||
fi
|
||||
done < <(find "$1" -name "status.txt" -print0)
|
||||
|
||||
# Calculate total
|
||||
total=0
|
||||
for count in "${counts[@]}"; do
|
||||
((total += count))
|
||||
done
|
||||
|
||||
# Print the results
|
||||
echo "Status | Count"
|
||||
echo "-----------|---------"
|
||||
for status in "${statuses[@]}"; do
|
||||
printf "%-10s | %d\n" "$status" "${counts[$status]}"
|
||||
done
|
||||
echo "-----------|---------"
|
||||
echo "Total | $total"
|
||||
@ -91,21 +91,21 @@ class Scheduler:
|
||||
"qos": job.qos,
|
||||
}
|
||||
|
||||
#TODO: don't hardcode the base_bench.slurm path. Should be #HOME/bench_cluster/template/base_bench.slurm
|
||||
#TODO: don't hardcode the base_job.slurm path. Should be #HOME/bench_cluster/template/base_job.slurm
|
||||
if cluster == "hf":
|
||||
base_path = "/fsx/ferdinandmom/ferdinand-hf/picotron/bench/template/base_bench.slurm"
|
||||
base_path = "/fsx/ferdinandmom/ferdinand-hf/picotron/bench/template/base_job.slurm"
|
||||
else:
|
||||
raise ValueError("Invalid cluster")
|
||||
|
||||
with open(base_path, 'r') as file:
|
||||
base_bench_file = file.read()
|
||||
base_job_file = file.read()
|
||||
|
||||
base_bench_template = Template(base_bench_file)
|
||||
base_job_template = Template(base_job_file)
|
||||
|
||||
# Write the rendered script to a new file located at the job root_path
|
||||
output_file_path = os.path.join(job.root_path, "bench.slurm")
|
||||
output_file_path = os.path.join(job.root_path, "job.slurm")
|
||||
with open(output_file_path, 'w') as file:
|
||||
file.write(base_bench_template.render(context_bench))
|
||||
file.write(base_job_template.render(context_bench))
|
||||
|
||||
print(f"Slurm script created at {output_file_path}")
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=bench-picotron
|
||||
#SBATCH --job-name=job-picotron
|
||||
#SBATCH --time=00:30:00
|
||||
#SBATCH --partition=hopper-prod
|
||||
#SBATCH --nodes={{ nodes }}
|
||||
Loading…
Reference in New Issue
Block a user