24 lines
543 B
YAML
24 lines
543 B
YAML
# @package _global_
|
|
defaults:
|
|
- /experiment/owt/base.yaml
|
|
- override /model: gpt2-hf
|
|
- override /model/gpt2model: gpt2-small
|
|
- override /callbacks: [default, norm-monitor, flop-count]
|
|
|
|
datamodule:
|
|
batch_size: 8
|
|
|
|
train:
|
|
# Use the standard torch.nn.CrossEntropyLoss
|
|
loss_fn: null
|
|
|
|
callbacks:
|
|
flop_count:
|
|
input_size:
|
|
- ${datamodule.max_length}
|
|
input_dtype:
|
|
# It's surprisingly hard to get hydra to return torch.long since it's not a callable
|
|
_target_: torch.__getattribute__
|
|
_args_:
|
|
- long
|