cleaning
This commit is contained in:
parent
2d198659e2
commit
f74bff79e0
@ -1,32 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Initialize counters
|
|
||||||
declare -A counts
|
|
||||||
statuses=("init" "pending" "running" "fail" "oom" "timeout" "completed")
|
|
||||||
for status in "${statuses[@]}"; do
|
|
||||||
counts[$status]=0
|
|
||||||
done
|
|
||||||
|
|
||||||
# Find and process all status.txt files
|
|
||||||
while IFS= read -r -d '' file; do
|
|
||||||
status=$(cat "$file" | tr -d '[:space:]')
|
|
||||||
|
|
||||||
if [[ " ${statuses[@]} " =~ " ${status} " ]]; then
|
|
||||||
((counts[$status]++))
|
|
||||||
fi
|
|
||||||
done < <(find "$1" -name "status.txt" -print0)
|
|
||||||
|
|
||||||
# Calculate total
|
|
||||||
total=0
|
|
||||||
for count in "${counts[@]}"; do
|
|
||||||
((total += count))
|
|
||||||
done
|
|
||||||
|
|
||||||
# Print the results
|
|
||||||
echo "Status | Count"
|
|
||||||
echo "-----------|---------"
|
|
||||||
for status in "${statuses[@]}"; do
|
|
||||||
printf "%-10s | %d\n" "$status" "${counts[$status]}"
|
|
||||||
done
|
|
||||||
echo "-----------|---------"
|
|
||||||
echo "Total | $total"
|
|
||||||
@ -91,21 +91,21 @@ class Scheduler:
|
|||||||
"qos": job.qos,
|
"qos": job.qos,
|
||||||
}
|
}
|
||||||
|
|
||||||
#TODO: don't hardcode the base_bench.slurm path. Should be #HOME/bench_cluster/template/base_bench.slurm
|
#TODO: don't hardcode the base_job.slurm path. Should be #HOME/bench_cluster/template/base_job.slurm
|
||||||
if cluster == "hf":
|
if cluster == "hf":
|
||||||
base_path = "/fsx/ferdinandmom/ferdinand-hf/picotron/bench/template/base_bench.slurm"
|
base_path = "/fsx/ferdinandmom/ferdinand-hf/picotron/bench/template/base_job.slurm"
|
||||||
else:
|
else:
|
||||||
raise ValueError("Invalid cluster")
|
raise ValueError("Invalid cluster")
|
||||||
|
|
||||||
with open(base_path, 'r') as file:
|
with open(base_path, 'r') as file:
|
||||||
base_bench_file = file.read()
|
base_job_file = file.read()
|
||||||
|
|
||||||
base_bench_template = Template(base_bench_file)
|
base_job_template = Template(base_job_file)
|
||||||
|
|
||||||
# Write the rendered script to a new file located at the job root_path
|
# Write the rendered script to a new file located at the job root_path
|
||||||
output_file_path = os.path.join(job.root_path, "bench.slurm")
|
output_file_path = os.path.join(job.root_path, "job.slurm")
|
||||||
with open(output_file_path, 'w') as file:
|
with open(output_file_path, 'w') as file:
|
||||||
file.write(base_bench_template.render(context_bench))
|
file.write(base_job_template.render(context_bench))
|
||||||
|
|
||||||
print(f"Slurm script created at {output_file_path}")
|
print(f"Slurm script created at {output_file_path}")
|
||||||
|
|
||||||
@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
#SBATCH --job-name=bench-picotron
|
#SBATCH --job-name=job-picotron
|
||||||
#SBATCH --time=00:30:00
|
#SBATCH --time=00:30:00
|
||||||
#SBATCH --partition=hopper-prod
|
#SBATCH --partition=hopper-prod
|
||||||
#SBATCH --nodes={{ nodes }}
|
#SBATCH --nodes={{ nodes }}
|
||||||
Loading…
Reference in New Issue
Block a user