14 lines
444 B
YAML
14 lines
444 B
YAML
defaults:
|
|
- _self_
|
|
- gpt2model: gpt2-small
|
|
|
|
_target_: transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel
|
|
_recursive_: True
|
|
config:
|
|
_target_: transformers.GPT2Config
|
|
# Mistral's config: https://github.com/stanford-crfm/mistral/blob/main/conf/models/gpt2-small.yaml
|
|
# However, reorder_and_upcast_attn slows things down
|
|
reorder_and_upcast_attn: false
|
|
scale_attn_by_inverse_layer_idx: true
|
|
n_positions: ${datamodule.max_length}
|