28 lines
831 B
Python
28 lines
831 B
Python
import torch
|
|
import torch.nn as nn
|
|
import torch.optim as optim
|
|
from torch.profiler import profile, record_function, ProfilerActivity
|
|
|
|
# 定义模型和优化器
|
|
model = nn.Linear(100, 10).cuda()
|
|
optimizer = optim.SGD(model.parameters(), lr=0.01)
|
|
|
|
# 启动 Profiler
|
|
with profile(
|
|
activities=[ProfilerActivity.CUDA, ProfilerActivity.CPU], # 监控 GPU 和 CPU
|
|
record_shapes=True, # 记录张量形状
|
|
profile_memory=True, # 分析内存使用
|
|
with_stack=True, # 记录调用栈
|
|
) as prof:
|
|
for _ in range(10):
|
|
x = torch.randn(64, 100).cuda()
|
|
y = model(x)
|
|
loss = y.sum()
|
|
loss.backward()
|
|
optimizer.step()
|
|
optimizer.zero_grad()
|
|
|
|
# 输出分析结果
|
|
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
|
|
prof.export_chrome_trace("./trace.json")
|