small changes
This commit is contained in:
parent
b390a0101e
commit
b0ea5066ad
3
.gitignore
vendored
3
.gitignore
vendored
@ -6,4 +6,5 @@ picotron.egg-info
|
||||
wandb
|
||||
tmp
|
||||
debug
|
||||
bench
|
||||
bench
|
||||
sandbox
|
||||
@ -58,7 +58,8 @@ module load cuda/12.1
|
||||
GIT_REPO="/fsx/ferdinandmom/ferdinand-hf/picotron/"
|
||||
CMD="$GIT_REPO/train.py --config {{ config }}"
|
||||
|
||||
huggingface-cli login --token $HUGGINGFACE_TOKEN
|
||||
git checkout loading_big_model
|
||||
# huggingface-cli login --token $HUGGINGFACE_TOKEN
|
||||
|
||||
LAUNCHER="torchrun --nproc_per_node={{ n_proc_per_node }} --nnode={{ nodes }} --node_rank=$SLURM_NODEID --rdzv_endpoint ${MASTER_ADDR}:${MASTER_PORT} --rdzv_backend c10d --max_restarts 0 --tee 3"
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user