python - how to add two matrices with production rules -
i have 2 matrices, 1 2 x 3 dimensions , other 1 3 x 2.
a = [[1, 0, 1], [1, 0, 1]] b = [[1, 0], [1, 0], [1, 0]]
i return 2x2 matrix c that's sum of element-wise logical or operation between , b.
so result be
c = [[3,2] [3,2]]
are there packages out there these operations efficiently? large matrices hundreds of thousands of dimensions, looping through elements/vectors slow.
this relatively easy return 2x2 matrix d result of addition of element-wise logical and
operation between , b.
d = np.dot(a,b)
accomplish this. i'm wondering if there packages counterparts of np.dot logic or
instead.
is object oriented approach acceptable ?
#!/usr/bin/env python __future__ import absolute_import __future__ import print_function import numpy class pseudobinary(object): def __init__(self,i): self.i = def __mul__(self,rhs): return pseudobinary(self.i or rhs.i) __rmul__ = __mul__ __imul__ = __mul__ def __add__(self,rhs): return pseudobinary(self.i + rhs.i) __radd__ = __add__ __iadd__ = __add__ def __str__(self): return str(self.i) __repr__ = __str__ = [[pseudobinary(1), pseudobinary(0), pseudobinary(1)], [pseudobinary(1), pseudobinary(0), pseudobinary(1)]] b = [[pseudobinary(1), pseudobinary(0)], [pseudobinary(1), pseudobinary(0)], [pseudobinary(1), pseudobinary(0)]] c = numpy.dot(a,b) print(c)
prints
[[3 2] [3 2]]
i have spent time measuring, understanding performance of approach. long story short: numpy.dot
custom objects several orders of magnitude slower regular matrix multiplication of integers.
i not 100% sure of root cause of difference. have asked specific question reasons of slowness.
the performance graph following:
in plot red curve (base) base measurement of numpy.dot(..,..) call integer matrixes. blue curve (setor) approach suggested in @vortex's answer. green curve numpy.dot() performance using matrixes of custom objects. can see numpy.dot custom objects very slow. got these numbers in macbook air (13-inch, 2014), 1.7 ghz intel core i7, 8 gb 1600 mhz ddr3
the code executes performance measurement , prints plot this: (tested in python 2.7.10)
#!/usr/bin/env python # possible answer , performance analysis stackoverflow # question. https://stackoverflow.com/q/45682641/5771861 __future__ import absolute_import __future__ import print_function import numpy import time import matplotlib mpl import matplotlib.pyplot plt import matplotlib.dates mdates import matplotlib.ticker tick import random import datetime import timeit class pseudobinary(object): def __init__(self,i): self.i = def __mul__(self,rhs): return pseudobinary(self.i or rhs.i) __rmul__ = __mul__ __imul__ = __mul__ def __add__(self,rhs): return pseudobinary(self.i + rhs.i) __radd__ = __add__ __iadd__ = __add__ def __str__(self): return "p"+str(self.i) __repr__ = __str__ class testcase(object): def __init__(self,n): self.n = n # use square matrixes rows = self.n cols = self.n self.base = numpy.array([[random.getrandbits(1) x in range(cols)] \ y in range(rows)]) self.pseudo = numpy.array( [[pseudobinary(v) v in row] row in self.base]) @staticmethod def printmatrix(m): row in m: v in row: print(v,end=" ") print("") def print(self): print("base") testcase.printmatrix(self.base) print("pseudo") testcase.printmatrix(self.pseudo) class testres(object): def __init__(self): self.res = [] def append(self,v): self.res.append(v) def mean(self): return sum(self.res)/float(len(self.res)) def runwithtime(f,count,msg): start = time.time() in xrange(count): f() end = time.time() elapsed = end-start print(msg,"took",str(datetime.timedelta(seconds=end-start)),"seconds") return elapsed def measureandprint(execcount): random.seed(1) print("start initialize test data") start = time.time() sizes = [1, 4, 8, 16, 32] testcases = [testcase(n) n in sizes] end = time.time() print("test data initialization complete in ", str(datetime.timedelta(seconds=end-start))) meascount = 4 baseresults = {} pseudoresults = {} setorresults = {} tc in testcases: print("test case for",tc.n) def base(): rv = numpy.dot(tc.base,tc.base) return rv res = testres() in xrange(meascount): t = runwithtime(base,execcount,"base") res.append(t) baseresults[tc.n] = res def pseudo(): rv = numpy.dot(tc.pseudo,tc.pseudo) return rv res = testres() in xrange(meascount): t = runwithtime(pseudo,execcount,"pseudo") res.append(t) pseudoresults[tc.n] = res ones = numpy.ones(tc.n) dotinput = ones-tc.base def setor(): rv = ones*tc.n-numpy.dot(dotinput,dotinput) return rv res = testres() in xrange(meascount): t = runwithtime(setor,execcount,"setor") res.append(t) setorresults[tc.n] = res return baseresults,pseudoresults,setorresults def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): # https://stackoverflow.com/a/33024979/5771861 return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) def formatseconds(s): # concise printer time duration in millisecond accuracy. # example 3 d 12 h 4 m 5 s 234 mi def maybestr(fmt,x): # if x non-zero return formatted string x if isclose(x,0): return "" else: return fmt.format(x) seconds, fraction = divmod(s, 1) days, seconds = divmod(seconds, 86400) hours, seconds = divmod(seconds, 3600) minutes, seconds = divmod(seconds, 60) milli = int(fraction * 1000) rv = maybestr("{} d ",days) \ + maybestr("{} h ",hours) \ + maybestr("{} m ",minutes) \ + maybestr("{} s ",seconds) \ + maybestr("{} millis ",milli) \ if rv=="": return "0" else: return rv def plotresults(results,color,label): # key , values in same order. res = sorted(results.items()) xx = [x (x,y) in res] yy = [y.mean() (x,y) in res] plt.semilogy(xx,yy,color,label=label) plt.scatter(xx,yy,c=color) # add annotation each measurement data point. x,y in res: yvalue = y.mean() plt.annotate(str(formatseconds(yvalue)),(x,yvalue)) multiplicationcount = 1000 baseresults,pseudoresults,setorresults = measureandprint(multiplicationcount) plotresults(baseresults,"r","base") plotresults(pseudoresults,"g","pseudo") plotresults(setorresults,"b","setor") plt.legend(loc="upper left") plt.title("numpy.dot() performance measurements") plt.ylabel("mean seconds taken {} multiplications".format(multiplicationcount)) plt.xlabel("dimension of square matrix") def yfmt(val,pos): return formatseconds(val) axes = plt.gca() yaxis = axes.get_yaxis() yaxis.set_major_formatter(tick.funcformatter(yfmt)) plt.show()
Comments
Post a Comment