torch_ext/tests/test_profille.py
2025-03-27 03:44:28 +08:00

28 lines
831 B
Python

import torch
import torch.nn as nn
import torch.optim as optim
from torch.profiler import profile, record_function, ProfilerActivity
# 定义模型和优化器
model = nn.Linear(100, 10).cuda()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 启动 Profiler
with profile(
activities=[ProfilerActivity.CUDA, ProfilerActivity.CPU], # 监控 GPU 和 CPU
record_shapes=True, # 记录张量形状
profile_memory=True, # 分析内存使用
with_stack=True, # 记录调用栈
) as prof:
for _ in range(10):
x = torch.randn(64, 100).cuda()
y = model(x)
loss = y.sum()
loss.backward()
optimizer.step()
optimizer.zero_grad()
# 输出分析结果
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
prof.export_chrome_trace("./trace.json")