From 75939867d9f73ddc8028c0bee9330bd1344efcee Mon Sep 17 00:00:00 2001 From: "ferdinand.mom" Date: Mon, 2 Dec 2024 18:12:02 +0000 Subject: [PATCH] small fix on world_size with pgm --- train.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/train.py b/train.py index 0e73242..3c473e2 100644 --- a/train.py +++ b/train.py @@ -126,10 +126,10 @@ if __name__ == "__main__": project="picotron", name=f"{config['logging']['run_name']}_{tokens_per_step}_{pgm.process_group_manager}", config={ - "tensor_parallel_size": pgm.process_group_manager.tp_size, - "context_parallel_size": pgm.process_group_manager.cp_size, - "pipeline_parallel_size": pgm.process_group_manager.pp_size, - "data_parallel_size": pgm.process_group_manager.dp_size, + "tensor_parallel_size": pgm.process_group_manager.tp_world_size, + "context_parallel_size": pgm.process_group_manager.cp_world_size, + "pipeline_parallel_size": pgm.process_group_manager.pp_world_size, + "data_parallel_size": pgm.process_group_manager.dp_world_size, "model": config["model"]["name"], "dataset": config["dataset"]["name"], "max_tokens": config["training"]["max_tokens"],