diff --git a/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary-8k.yaml b/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary-8k.yaml index 0a5a588..09fdee9 100644 --- a/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary-8k.yaml +++ b/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary-8k.yaml @@ -11,7 +11,7 @@ model: mlp_checkpoint_lvl: 0 datamodule: - batch_size: ${eval:"1 if ${train.gpu_mem} < 24 else (2 if ${train.gpu_mem} < 40 else (4 if ${train.gpu} < 80 else 8))"} + batch_size: ${eval:"1 if ${train.gpu_mem} < 24 else (2 if ${train.gpu_mem} < 40 else (4 if ${train.gpu_mem} < 80 else 8))"} train: optimizer: diff --git a/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary.yaml b/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary.yaml index 79c3730..d5caafd 100644 --- a/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary.yaml +++ b/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary.yaml @@ -11,7 +11,7 @@ model: mlp_checkpoint_lvl: 0 datamodule: - batch_size: ${eval:"4 if ${train.gpu_mem} < 24 else (8 if ${train.gpu_mem} < 40 else (16 if ${train.gpu} < 80 else 32))"} + batch_size: ${eval:"4 if ${train.gpu_mem} < 24 else (8 if ${train.gpu_mem} < 40 else (16 if ${train.gpu_mem} < 80 else 32))"} train: optimizer: diff --git a/training/configs/experiment/pile/gpt3-2.7B-flash-rotary-8k.yaml b/training/configs/experiment/pile/gpt3-2.7B-flash-rotary-8k.yaml index 4584e6b..b259a29 100644 --- a/training/configs/experiment/pile/gpt3-2.7B-flash-rotary-8k.yaml +++ b/training/configs/experiment/pile/gpt3-2.7B-flash-rotary-8k.yaml @@ -11,7 +11,7 @@ model: mlp_checkpoint_lvl: 0 datamodule: - batch_size: ${eval:"1 if ${train.gpu_mem} < 24 else (2 if ${train.gpu_mem} < 40 else (4 if ${train.gpu} < 80 else 8))"} + batch_size: ${eval:"1 if ${train.gpu_mem} < 24 else (2 if ${train.gpu_mem} < 40 else (4 if ${train.gpu_mem} < 80 else 8))"} train: optimizer: diff --git a/training/configs/experiment/pile/gpt3-2.7B-flash-rotary.yaml b/training/configs/experiment/pile/gpt3-2.7B-flash-rotary.yaml index c16ab33..1e1684c 100644 --- a/training/configs/experiment/pile/gpt3-2.7B-flash-rotary.yaml +++ b/training/configs/experiment/pile/gpt3-2.7B-flash-rotary.yaml @@ -11,7 +11,7 @@ model: mlp_checkpoint_lvl: 0 datamodule: - batch_size: ${eval:"4 if ${train.gpu_mem} < 24 else (8 if ${train.gpu_mem} < 40 else (16 if ${train.gpu} < 80 else 32))"} + batch_size: ${eval:"4 if ${train.gpu_mem} < 24 else (8 if ${train.gpu_mem} < 40 else (16 if ${train.gpu_mem} < 80 else 32))"} train: optimizer: