small changes

This commit is contained in:
ferdinand.mom 2024-12-17 05:01:35 +00:00
parent b390a0101e
commit b0ea5066ad
3 changed files with 4 additions and 2 deletions

3
.gitignore vendored
View File

@ -6,4 +6,5 @@ picotron.egg-info
wandb
tmp
debug
bench
bench
sandbox

View File

View File

@ -58,7 +58,8 @@ module load cuda/12.1
GIT_REPO="/fsx/ferdinandmom/ferdinand-hf/picotron/"
CMD="$GIT_REPO/train.py --config {{ config }}"
huggingface-cli login --token $HUGGINGFACE_TOKEN
git checkout loading_big_model
# huggingface-cli login --token $HUGGINGFACE_TOKEN
LAUNCHER="torchrun --nproc_per_node={{ n_proc_per_node }} --nnode={{ nodes }} --node_rank=$SLURM_NODEID --rdzv_endpoint ${MASTER_ADDR}:${MASTER_PORT} --rdzv_backend c10d --max_restarts 0 --tee 3"