import numpy as np import cupy as cp from numba import cuda from time import time from scipy import fft import cupyx.scipy.fft as cufft import matplotlib.pyplot as plt # warm up cupy compilation: try without these to see the difference gpu_arr = cp.random.randint(0, 255, size=(10,10)) cufft.fftn(gpu_arr) res = [] # max N in cupy is limited by GPU memory, # too large N gives a cupy.cuda.memory.OutOfMemoryError for N in range(1000,9000, 1000): cpu_arr = np.random.randint(0, 255, size=(N, N)) gpu_arr = cp.asarray(cpu_arr) t_cpu_start = time() fft_arr = fft.fftn(cpu_arr) t_cpu_end = time() t_gpu_start = time() fft_arr = cufft.fftn(gpu_arr) t_gpu_end = time() res.append([N, t_cpu_end-t_cpu_start, t_gpu_end-t_gpu_start]) print(f'{res[-1]}, timing ratio = {res[-1][1]/res[-1][2]:8.4f}') x, y1, y2 = zip(*res) plt.plot(x, y1, label="CPU using scipy FFT") plt.plot(x, y2, label="GPU using cupyx.scipy FFT") plt.title('FFT of integer matrix in CPU vs. GPU') plt.xlabel('matrix size N') plt.ylabel('time (s)') plt.legend() plt.show()