flash-attention/training/configs/optimizer/adamw-apex-zero.yaml

8 lines
196 B
YAML
Raw Normal View History

2022-11-29 09:31:19 +08:00
# @package train.optimizer
_target_: torch.distributed.optim.ZeroRedundancyOptimizer
_recursive_: True
optimizer_class:
_target_: apex.optimizers.FusedAdam
_partial_: True
adam_w_mode: True