user3671704 user3671704 - 6 months ago 9
Python Question

Random walker code python

How cold you speed up the following code to be able to do more than 100 individuals?

""" Random Walker """
import numpy as np
import scipy as sp
import random as rd
import time

def procedure():
time.sleep(2.5)
t0C = time.clock()
t0 = time.time()

"""Definitions"""

def ifilterfalse(predicate, iterable):
# ifilterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8
if predicate is None:
predicate = bool
for x in iterable:
if not predicate(x):
yield x

def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set()
seen_add = seen.add
if key is None:
for element in ifilterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element

"""Creating the Random Walk"""

n=int(input('Number of Individuals at Table: '))
iters=10000
final=np.zeros(n)
total=0
for j in xrange(iters):
d=np.array([0])
i=0
while i<1:
new=d[len(d)-1]+rd.choice([-1,1])
if new<0:
new+=n
elif new>=n:
new-=n
d=np.append(d,new)
dshort=list(unique_everseen(d))
if len(dshort)>=n:
i=1
last=dshort[len(dshort)-1]
length=len(d)
final[last]+=1
total+=length

final=np.round(final/iters,4)
total=round(total/iters,3)

"""Writing To A File"""

print (40 * '-')
print (" ")
print (" Percentages: ")
print (" ")
print (" S#:"+" S#:".join(map(str,range(n))))
print (" "+"% ".join(map(str,final))+"%")
print (" ")
print (" Average Number of Passes of Plate: {}".format(total))
print (" ")
print (40 * '-')

# measure process time
print time.clock() - t0C, "seconds process time"

# measure wall time
print time.time() - t0, "seconds wall time"


if __name__ == "__main__":
procedure()


right now for the case of 10 individuals the time is:

5.877529 seconds process time

12.9134569168 seconds wall time

The problem is that when the number of individuals(100, 1000) increases the code is too slow, any suggestions?

Answer

The problem is that unique_everseen is doing practically the same work in successive executions, consuming too much time. Here is a simplified version that removes unique_everseen function and d list and use a seen set directly in the main loop and last var to keep the last item:

""" Random Walker """
import random as rd
import time

def procedure():
    n = int(input('Number of Individuals at Table: '))

    t0C = time.clock()
    t0 = time.time()

    iters = 10000
    final = [0] * n
    total = 0
    for j in xrange(iters):
        last = 0
        count = 1
        seen = set([0])
        while len(seen) < n:
            count += 1;
            new = last + rd.choice([-1, 1])
            if new < 0:
                new += n
            elif new >= n:
                new -= n
            seen.add(new)
            last = new
        final[last] += 1
        total += count

    final = [round(float(f) / iters, 4) for f in final]
    total = round(float(total) / iters, 3)

    """Writing To A File"""

    print(40 * '-')
    print(" ")
    print("   Percentages: ")
    print(" ")
    print("   S#:" + "      S#:".join(map(str, range(n))))
    print("   " + "%   ".join(map(str, final)) + "%")
    print(" ")
    print("   Average Number of Passes of Plate: {}".format(total))
    print(" ")
    print(40 * '-')

    # measure process time
    print time.clock() - t0C, "seconds process time"

    # measure wall time
    print time.time() - t0, "seconds wall time"


if __name__ == "__main__":
    procedure()

Note that removing the numpy dependency allows the script be run with pypy

Some results (in secods)

  • for 10 individuals
    • python: 0.472
    • pypy: 0.084
  • for 100 individuals
    • python: 49.352
    • pypy: 3.256
  • for 500 individuals
    • pypy: 80.460
  • for 1000 individuals
    • pypy: 318.392