torch==2.1.0 triton==2.1.0 numpy==1.26.4 datasets==2.19.1 transformers==4.47.0 flash-attn==2.5.0 wandb huggingface_hub[hf_transfer]