flash-attention/training/configs/optimizer/adamw-zero.yaml
2022-11-28 17:34:40 -08:00

8 lines
185 B
YAML

# @package train.optimizer
_target_: torch.distributed.optim.ZeroRedundancyOptimizer
_recursive_: True
optimizer_class:
_target_: torch.optim.__getattribute__
_args_:
- "AdamW"