diff --git a/train.py b/train.py index 0e73242..3c473e2 100644 --- a/train.py +++ b/train.py @@ -126,10 +126,10 @@ if __name__ == "__main__": project="picotron", name=f"{config['logging']['run_name']}_{tokens_per_step}_{pgm.process_group_manager}", config={ - "tensor_parallel_size": pgm.process_group_manager.tp_size, - "context_parallel_size": pgm.process_group_manager.cp_size, - "pipeline_parallel_size": pgm.process_group_manager.pp_size, - "data_parallel_size": pgm.process_group_manager.dp_size, + "tensor_parallel_size": pgm.process_group_manager.tp_world_size, + "context_parallel_size": pgm.process_group_manager.cp_world_size, + "pipeline_parallel_size": pgm.process_group_manager.pp_world_size, + "data_parallel_size": pgm.process_group_manager.dp_world_size, "model": config["model"]["name"], "dataset": config["dataset"]["name"], "max_tokens": config["training"]["max_tokens"],