optimizing matrix operations in python, numpy -
this optimization problem. given matrices e, h, q, f , logic in method my_func_basic (see code block), populate matrix v. potential ways, such through vectorization, speed computation? thanks.
import timeit import numpy np n = 20 m = 90 # e: m x n e = np.random.randn(m,n) # h, q: m x m h = np.random.randn(m,m) q = np.random.randn(m,m) # f: n x n f = np.random.randn(n,n) # v: m x m v = np.zeros(shape=(m,m)) def my_func_basic(): x in range(n): y in range(n): if x == y: v[x][y] = np.nan continue h = h[x][y] e = np.array([e[x,:]+h*e[y,:]]) v1 = np.dot(np.dot(e,f),np.transpose(e))[0][0] v2 = q[x][x]+h**2*q[y][y] v[x][y] = v1/np.sqrt(v2) print(timeit.timeit(my_func_basic,number=1000),'(sec), slow...')
this 1 way solve vectorized
methods -
import numpy np def vectorized_approach(v,h,e,f,q,n): # create copy of v store output values v_vectorized = v.copy() # calculate v1 in vectorized fashion e1 = (e[none,:n,:]*h[:n,:n,none] + e[:n,none,:]).reshape(-1,n) e2 = np.dot(e1,f) v1_vectorized = np.einsum('ij,ji->i',e2,e1.t).reshape(n,n) np.fill_diagonal(v1_vectorized, np.nan) # calculate v2 in vectorized fashion q_diag = np.diag(q[:n,:n]) v2_vectorized = q_diag[:,none] + h[:n,:n]**2*q_diag[none,:] # finally, vectorized version of output v v_vectorized[:n,:n] = v1_vectorized/np.sqrt(v2_vectorized) return v_vectorized
tests:
1) setup inputs -
in [314]: n = 20 ...: m = 90 ...: # e: m x n ...: e = np.random.randn(m,n) ...: # h, q: m x m ...: h = np.random.randn(m,m) ...: q = np.random.randn(m,m) ...: # f: n x n ...: f = np.random.randn(n,n) ...: # v: m x m ...: v = np.zeros(shape=(m,m)) ...:
2) verify results -
in [327]: out_basic_approach = my_func_basic(v,h,e,f,q,n) ...: out_vectorized_approach = vectorized_approach(v,h,e,f,q,n) ...: ...: mask1 = ~np.isnan(out_basic_approach) ...: mask2 = ~np.isnan(out_vectorized_approach) ...: in [328]: np.allclose(mask1,mask2) out[328]: true in [329]: np.allclose(out_basic_approach[mask1],out_vectorized_approach[mask1]) out[329]: true
3) runtime tests -
in [330]: %timeit my_func_basic(v,h,e,f,q,n) 100 loops, best of 3: 12.2 ms per loop in [331]: %timeit vectorized_approach(v,h,e,f,q,n) 1000 loops, best of 3: 222 µs per loop
Comments
Post a Comment