faster way to make a 3D blob with python?

Is there a better way to make a 3D density function?

def make_spot_3d(bright, spread, x0,y0,z0):
    # Create x and y indices
    x = np.linspace(-50, 50, 200)
    y = np.linspace(-50, 50, 200)
    z = np.linspace(-50, 50, 200)

    X, Y, Z = np.meshgrid(x, y, z)
    Intensity = np.uint16(bright*np.exp(-((X-x0)/spread)**2
                                        -((Y-y0)/spread)**2
                                        -((Z-z0)/spread)**2))

    return Intensity

The function can generate a 3D numpy array which can be plotted with mayavi enter image description here

However when the function is used to generate a cluster of spots (~100) as follow:

Spots = np.asarray([make_spot_3d(100,2, *loc) for loc in locations])
cluster = np.sum(Spots, axis=0)

yielding for example:

enter image description here The execution time is around 1 minute (cpu i5);I bet this could be faster.

Solution

An obvious improvement would be to use broadcasting to evaluate your intensity function over a 'sparse' mesh rather than a full meshgrid, e.g.:

X, Y, Z = np.meshgrid(x, y, z, sparse=True)

This reduces the runtime by a factor of about 4x on my machine:

%timeit make_spot_3d(1., 1., 0, 0, 0)
1 loops, best of 3: 1.56 s per loop 

%timeit make_spot_3d_ogrid(1., 1., 0, 0, 0)
1 loops, best of 3: 359 ms per loop

You can get rid of the overhead involved in the list comprehension by vectorizing the calculation over locations, spreads and brightnesses, e.g.:

def make_spots(bright, spread, x0, y0, z0):

    # Create x and y indices
    x = np.linspace(-50, 50, 200)
    y = np.linspace(-50, 50, 200)
    z = np.linspace(-50, 50, 200)

    # this will broadcast out to an (nblobs, ny, nx, nz) array
    dx = x[None, None, :, None] - x0[:, None, None, None]
    dy = y[None, :, None, None] - y0[:, None, None, None]
    dz = z[None, None, None, :] - z0[:, None, None, None]
    spread = spread[:, None, None, None]
    bright = bright[:, None, None, None]

    # we can save time by performing the exponentiation over 2D arrays
    # before broadcasting out to 4D, since exp(a + b) == exp(a) * exp(b)
    s2 = spread * spread
    a = np.exp(-(dx * dx) / s2)
    b = np.exp(-(dy * dy) / s2)
    c = np.exp(-(dz * dz) / s2)

    intensity = bright * a * b * c

    return intensity.astype(np.uint16)

where bright, spread, x0, y0 and z0 are 1D vectors. This will generate an (nblobs, ny, nx, nz) array, which you could then sum over the first axis. Depending on how many blobs you are generating, and how large the grid is that you are evaluating them over, creating this intermediate array might become quite expensive in terms of memory.

Another option would be to initialize a single (ny, nx, nz) output array and compute the sum in-place:

def sum_spots_inplace(bright, spread, x0, y0, z0):

    # Create x and y indices
    x = np.linspace(-50, 50, 200)
    y = np.linspace(-50, 50, 200)
    z = np.linspace(-50, 50, 200)

    dx = x[None, None, :, None] - x0[:, None, None, None]
    dy = y[None, :, None, None] - y0[:, None, None, None]
    dz = z[None, None, None, :] - z0[:, None, None, None]
    spread = spread[:, None, None, None]
    bright = bright[:, None, None, None]

    s2 = spread * spread
    a = np.exp(-(dx * dx) / s2)
    b = np.exp(-(dy * dy) / s2)
    c = np.exp(-(dz * dz) / s2)

    out = np.zeros((200, 200, 200), dtype=np.uint16)

    for ii in xrange(bright.shape[0]):
        out += bright[ii] * a[ii] * b[ii] * c[ii]

    return out

This will require less memory, but the potential downside is that it necessitates looping in Python.

To give you some idea of the relative performance:

def sum_spots_listcomp(bright, spread, x0, y0, z0):
    return np.sum([make_spot_3d(bright[ii], spread[ii], x0[ii], y0[ii], z0[ii])
                   for ii in xrange(len(bright))], axis=0)

def sum_spots_vec(bright, spread, x0, y0, z0):
    return make_spots(bright, spread, x0, y0, z0).sum(0)

# some fake data
bright = np.random.rand(10) * 100
spread = np.random.rand(10) * 100
x0 = (np.random.rand(10) - 0.5) * 50
y0 = (np.random.rand(10) - 0.5) * 50
z0 = (np.random.rand(10) - 0.5) * 50

%timeit sum_spots_listcomp(bright, spread, x0, y0, z0)
# 1 loops, best of 3: 16.6 s per loop

%timeit sum_spots_vec(bright, spread, x0, y0, z0)
# 1 loops, best of 3: 1.03 s per loop

%timeit sum_spots_inplace(bright, spread, x0, y0, z0)
# 1 loops, best of 3: 330 ms per loop