# coding=utf-8 import torch from torch_cuda_ext import core x = torch.randn(size=[100, 50], dtype=torch.float32, device="cuda") y = torch.randn(size=[100, 50], dtype=torch.float32, device="cuda") z = torch.empty_like(x) print(z.size()) core.add_two_tensors(x, y, z) compare_result = x + y assert torch.allclose(z, compare_result), "result is not equal" # print(compare_result, z) x = torch.randn(size=(1024, 1024)).float().cuda() y = torch.randn(size=(1024, 1024)).float().cuda() z = torch.zeros(size=(1024, 1024)).float().cuda() import time start_time = time.time() for _ in range(1000): core.matmul(x, y, z) end_time = time.time() gpu_cost_time = end_time - start_time print("gpu cost time is :", gpu_cost_time / 1000) start_time = time.time() for _ in range(1000): gpu_out = torch.matmul(x, y) torch_cost_time = time.time() - start_time print("torch cost time is: ", torch_cost_time / 1000) print("accelerate rate:", torch_cost_time / gpu_cost_time) diff = z - gpu_out print("max diff is :", torch.max(diff)) # start_time = time.time() # for _ in range(1000): # core.matmul_sigmoid(x, y, z) # own_cost_time = time.time() - start_time # print("matmul_sigmoid cost time is :", own_cost_time) # start_time = time.time() # for _ in range(1000): # torch_sigmoid_output = torch.sigmoid(torch.matmul(x, y)) # torch_cost_time = time.time() - start_time # print("matmul sigmoid torch cost time is:", torch_cost_time) # print(torch.max(z - torch_sigmoid_output)) # assert torch.allclose(z, torch_sigmoid_output), "not equal" # x = x.cpu() # y = y.cpu() # start_time = time.time() # for _ in range(1000): # torch_out = torch.matmul(x, y) # end_time = time.time() # cpu_cost_time = end_time - start_time # print("cpu cost time is :", end_time - start_time) # print("accelerate rate:", cpu_cost_time / gpu_cost_time) # print(z) # print(torch_out) # diff = torch_out - z # print(torch.max(diff)) # assert torch.allclose(torch_out, z), "result is not equal" # 还是有问题,这比较奇怪了,感觉和公司的代码也没啥差别啊,为啥就不对呢? x = torch.randn(size=(512, 512)).float().cuda() y = torch.empty_like(x) core.matmul_shared(x, x, y) real_y = torch.matmul(x, x) if not torch.allclose(y, real_y): print("not equal") print(real_y - y)