Search code examples
performancepython-2.7parallel-processingrandomembarrassingly-parallel

Python - why does this code take so long?


My concern is the part between the two hash-lines. The following code runs too long for me to wait for its output. When I replace the problematic part by another chunk of code, the programme runs in a few seconds (see the end of this post). my aim is to generate 90 data points that are uniformly distributed in a unit square (var1, var2), and then generate 12 points that are randomly placed in a circle of radius 1/8 that lies entirely within the unit square (cir1, cir2), and finally join these two sets (sph1, sph2).

I've been digging in this code since yesterday ad I'm really sure it is correct (apparently it is not). I'm probably missing something really obvious...

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import random
import math
import numpy as np
import sys
from heapq import merge
import multiprocessing as mp
from multiprocessing import Pool

boot = 2
RRpoints = 90
rrpoints = 12
step = np.arange(-8.0 , 0.5 , 0.5)

def distance_between_points(ite, jte):
    xi, yi = ite
    xj, yj = jte
    x2 = (xi - xj) ** 2
    y2 = (yi - yj) ** 2
    d = math.sqrt(x2 + y2)
    return d

def heaviside_step(n):
    return int(n >= 0)

def myscript(iteration_number):
    RRfile_name = "MC_out_cluster_1/output%d.txt" % iteration_number
    with open(RRfile_name, "w") as RRf:
#############################################################################
        np.random.seed()
        var1 = np.random.uniform(0, 1 , RRpoints)
        var2 = np.random.uniform(0, 1 , RRpoints)
        cir1 = []
        cir2 = []
        x0 = np.random.uniform(0.125 , 0.875)
        y0 = np.random.uniform(0.125 , 0.875)
        while ( len(cir1) < rrpoints and len(cir2) < rrpoints ):
            np.random.seed()
            col1 = np.random.uniform(x0 - 0.125 , x0 + 0.125)
            col2 = np.random.uniform(y0 - 0.125 , y0 + 0.125)
            if (x0 - col1) ** 2 + (y0 - col2) ** 2 <= 1/64:
                cir1.append(col1)
                cir2.append(col2)
        sph1 = list(merge(var1 , cir1))
        sph2 = list(merge(var2 , cir2))
############################################################################
        corr = []
        for k in xrange(0, len(step)):
            h = 0
            for i in xrange(0, RRpoints):
                for j in xrange(1 + i, RRpoints):
                    ite = sph1[i] , sph2[i]
                    jte = sph1[j] , sph2[j]
                    dbp = distance_between_points(ite, jte)
                    h += heaviside_step(math.exp( step[k] ) - dbp)
            corr.append([math.exp(step[k]) , h])

        for item in corr:
            RRf.write("{0}\t{1}\n".format(item[0], item[1]))

x = xrange(boot)
p = mp.Pool()

y = p.imap(myscript, x)
list(y)

This is another (working) chunk that I modified to create the above code:

 #############################################################################
        var1 = []
        var2 = []
        while (len(var1) < RRpoints):
            np.random.seed()
            col1 = np.random.uniform(0 , 1)
            col2 = np.random.uniform(0 , 1)
            if ( col1 ** 2 + (col2 - 0.5) ** 2 > 1/16 and (col1 - 1) ** 2 + (col2 - 0.5) ** 2 > 1/16 ):
                var1.append(col1)
                var2.append(col2)
        cir1 = []
        cir2 = []
        while (len(cir1) < rrpoints and len(cir2) < rrpoints):
            np.random.seed()
            new1 = np.random.uniform(0.125 , 0.875)
            new2 = np.random.uniform(0.125 , 0.875)
            if ( new1 ** 2 + (new2 - 0.5) ** 2 >= 0.140625 and (new1 - 1) ** 2 + (new2 - 0.5) ** 2 >= 0.140625 ):
                cir1.append(new1)
                cir2.append(new2)
        sph1 = list(merge(var1 , cir1))
        sph2 = list(merge(var2 , cir2))
 #############################################################################

Solution

  • You are using Python 2.x so 1/64==0 (in Python 2, division of two integers gives an integer result). If you can, upgrade to Python 3.x, or failing that change the test to use 1./64