flash-attention/training/configs/optimizer/adamw-apex-distributed.yaml
2022-11-28 17:34:40 -08:00

4 lines
123 B
YAML

# @package train.optimizer
_target_: apex.contrib.optimizers.distributed_fused_adam.DistributedFusedAdam
adam_w_mode: True