|
本帖最后由 Aline744 于 2024-10-10 11:11 编辑
CPU = E52630V4 GPU = K40C
可以看出,python里直接用循环计算矩阵乘法效率极其低下,用多线程速度不升反降,原因是python的多线程不是真正的多线程,并不能充分利用CPU性能。使用numpy和cupy显然能大幅度提升速度。由于cupy是GPU上的库,因此在小规模运算时比numpy慢一些。另外cupy似乎还有一个启动时间,第一次运行明显很慢。
- import time
- import numpy as np
- import cupy as cp
- import matplotlib.pyplot as plt
- from threading import Thread as TH
- arr_size = 12
- time_libFree = []
- time_multiTh = []
- time_np = []
- time_cp = []
- if_cal_1 = True
- if_cal_2 = True
- def task(a, b, c, x, y):
- for index in range(arr_size):
- c[x][y] += a[x][index] * b[index][y]
- for iter in range(120):
- arr_size = 50*iter
- a = np.random.rand(arr_size, arr_size)
- b = np.random.rand(arr_size, arr_size)
- #矩阵乘法耗时测试
- #只用循环
- t1 = time.time()
- c = np.zeros((arr_size, arr_size))
- if if_cal_1: #循环做矩阵乘法,耗时相当长,因此当时间过长时不再计算此项
- for x in range(arr_size):
- for y in range(arr_size):
- for index in range(arr_size):
- c[x][y] += a[x][index] * b[index][y]
- time_libFree.append(time.time() - t1)
- else :
- time_libFree.append(time_libFree[-1])
- if time_libFree[-1] > 10:
- if_cal_1 = False
- #多线程
- t1 = time.time()
- c = np.zeros((arr_size, arr_size))
- index = 0
- if if_cal_2: #循环做矩阵乘法,耗时相当长,因此当时间过长时不再计算此项
- for x in range(arr_size):
- for y in range( int(arr_size / 5) ):
- th1 = TH(target = task, args = (a, b, c, x, 5 * y + 0))
- th2 = TH(target = task, args = (a, b, c, x, 5 * y + 1))
- th3 = TH(target = task, args = (a, b, c, x, 5 * y + 2))
- th4 = TH(target = task, args = (a, b, c, x, 5 * y + 3))
- th5 = TH(target = task, args = (a, b, c, x, 5 * y + 4))
- th1.start()
- th2.start()
- th3.start()
- th4.start()
- th5.start()
- th1.join()
- th2.join()
- th3.join()
- th4.join()
- th5.join()
- time_multiTh.append(time.time() - t1)
- else :
- time_multiTh.append(time_multiTh[-1])
- if time_multiTh[-1] > 10:
- if_cal_2 = False
- #numpy
- t1 = time.time()
- c = np.matmul(a, b)
- time_np.append(time.time() - t1)
- #cupy
- a = cp.random.rand(arr_size, arr_size)
- b = cp.random.rand(arr_size, arr_size)
- t1 = time.time()
- c = cp.matmul(a, b)
- time_cp.append(time.time() - t1)
- print(iter)
- plt.subplot(2, 1, 1)
- plt.plot(time_libFree, label = "libFree")
- plt.plot(time_multiTh, 'g', label = "multiTh")
- plt.plot(time_np, 'r', label = "np")
- plt.legend( )
- plt.xlabel('array_size(x50)')
- plt.subplot(2, 1, 2)
- plt.plot(time_np, label = "np")
- plt.plot(time_cp, 'r', label = "cp")
- plt.legend( )
- plt.xlabel('array_size(x50)')
- plt.show()
复制代码 |
-
|