torch_ext/test_add_tensor.py
2024-11-16 19:26:54 +08:00

84 lines
2.2 KiB
Python

# coding=utf-8
import torch
from torch_cuda_ext import core
x = torch.randn(size=[100, 50], dtype=torch.float32, device="cuda")
y = torch.randn(size=[100, 50], dtype=torch.float32, device="cuda")
z = torch.empty_like(x)
print(z.size())
core.add_two_tensors(x, y, z)
compare_result = x + y
assert torch.allclose(z, compare_result), "result is not equal"
# print(compare_result, z)
x = torch.randn(size=(1024, 1024)).float().cuda()
y = torch.randn(size=(1024, 1024)).float().cuda()
z = torch.zeros(size=(1024, 1024)).float().cuda()
import time
start_time = time.time()
for _ in range(1000):
core.matmul(x, y, z)
end_time = time.time()
gpu_cost_time = end_time - start_time
print("gpu cost time is :", gpu_cost_time / 1000)
start_time = time.time()
for _ in range(1000):
gpu_out = torch.matmul(x, y)
torch_cost_time = time.time() - start_time
print("torch cost time is: ", torch_cost_time / 1000)
print("accelerate rate:", torch_cost_time / gpu_cost_time)
diff = z - gpu_out
print("max diff is :", torch.max(diff))
# start_time = time.time()
# for _ in range(1000):
# core.matmul_sigmoid(x, y, z)
# own_cost_time = time.time() - start_time
# print("matmul_sigmoid cost time is :", own_cost_time)
# start_time = time.time()
# for _ in range(1000):
# torch_sigmoid_output = torch.sigmoid(torch.matmul(x, y))
# torch_cost_time = time.time() - start_time
# print("matmul sigmoid torch cost time is:", torch_cost_time)
# print(torch.max(z - torch_sigmoid_output))
# assert torch.allclose(z, torch_sigmoid_output), "not equal"
# x = x.cpu()
# y = y.cpu()
# start_time = time.time()
# for _ in range(1000):
# torch_out = torch.matmul(x, y)
# end_time = time.time()
# cpu_cost_time = end_time - start_time
# print("cpu cost time is :", end_time - start_time)
# print("accelerate rate:", cpu_cost_time / gpu_cost_time)
# print(z)
# print(torch_out)
# diff = torch_out - z
# print(torch.max(diff))
# assert torch.allclose(torch_out, z), "result is not equal"
# 还是有问题,这比较奇怪了,感觉和公司的代码也没啥差别啊,为啥就不对呢?
x = torch.randn(size=(512, 512)).float().cuda()
y = torch.empty_like(x)
core.matmul_shared(x, x, y)
real_y = torch.matmul(x, x)
if not torch.allclose(y, real_y):
print("not equal")
print(real_y - y)