Search code examples
pythonarraysnumpypoint-cloudsdownsampling

How to efficiently convert large numpy array of point cloud data to downsampled 2d array?


I have a large numpy array of unordered lidar point cloud data, of shape [num_points, 3], which are the XYZ coordinates of each point. I want to downsample this into a 2D grid of mean height values - to do this I want to split the data into 5x5 X-Y bins and calculate the mean height value (Z coordinate) in each bin.

Does anyone know any quick/efficient way to do this?

Current code:

import numpy as np
from open3d import read_point_cloud

resolution = 5

# Code to load point cloud and get points as numpy array
pcloud = read_point_cloud(params.POINT_CLOUD_DIR + "Part001.pcd")
pcloud_np = np.asarray(pcloud.points)

# Code to generate example dataset
pcloud_np = np.random.uniform(0.0, 1000.0, size=(1000,3))

# Current (inefficient) code to quantize into 5x5 XY 'bins' and take mean Z values in each bin
pcloud_np[:, 0:2] = np.round(pcloud_np[:, 0:2]/float(resolution))*float(resolution) # Round XY values to nearest 5

num_x = int(np.max(pcloud_np[:, 0])/resolution)
num_y = int(np.max(pcloud_np[:, 1])/resolution)

mean_height = np.zeros((num_x, num_y))

# Loop over each x-y bin and calculate mean z value 
x_val = 0
for x in range(num_x):
    y_val = 0
    for y in range(num_y):
        height_vals = pcloud_np[(pcloud_np[:,0] == float(x_val)) & (pcloud_np[:,1] == float(y_val))]
        if height_vals.size != 0:
            mean_height[x, y] = np.mean(height_vals)
        y_val += resolution
    x_val += resolution

Solution

  • Here is a suggestion using an np.bincount idiom on the flattened 2d grid. I also took the liberty to add some small fixes to the original code:

    import numpy as np
    #from open3d import read_point_cloud
    
    resolution = 5
    
    # Code to load point cloud and get points as numpy array
    #pcloud = read_point_cloud(params.POINT_CLOUD_DIR + "Part001.pcd")
    #pcloud_np = np.asarray(pcloud.points)
    
    # Code to generate example dataset
    pcloud_np = np.random.uniform(0.0, 1000.0, size=(1000,3))
    
    def f_op(pcloud_np, resolution):
        # Current (inefficient) code to quantize into 5x5 XY 'bins' and take mean Z values in each bin
        pcloud_np[:, 0:2] = np.round(pcloud_np[:, 0:2]/float(resolution))*float(resolution) # Round XY values to nearest 5
    
        num_x = int(np.max(pcloud_np[:, 0])/resolution) + 1
        num_y = int(np.max(pcloud_np[:, 1])/resolution) + 1
    
        mean_height = np.zeros((num_x, num_y))
    
        # Loop over each x-y bin and calculate mean z value 
        x_val = 0
        for x in range(num_x):
            y_val = 0
            for y in range(num_y):
                height_vals = pcloud_np[(pcloud_np[:,0] == float(x_val)) & (pcloud_np[:,1] == float(y_val)), 2]
                if height_vals.size != 0:
                    mean_height[x, y] = np.mean(height_vals)
                y_val += resolution
            x_val += resolution
    
        return mean_height
    
    def f_pp(pcloud_np, resolution):
        xy = pcloud_np.T[:2]
        xy = ((xy + resolution / 2) // resolution).astype(int)
        mn, mx = xy.min(axis=1), xy.max(axis=1)
        sz = mx + 1 - mn
        flatidx = np.ravel_multi_index(xy-mn[:, None], sz)
        histo = np.bincount(flatidx, pcloud_np[:, 2], sz.prod()) / np.maximum(1, np.bincount(flatidx, None, sz.prod()))
        return (histo.reshape(sz), *(xy * resolution))
    
    res_op = f_op(pcloud_np, resolution)
    res_pp, x, y = f_pp(pcloud_np, resolution)
    
    from timeit import timeit
    
    t_op = timeit(lambda:f_op(pcloud_np, resolution), number=10)*100
    t_pp = timeit(lambda:f_pp(pcloud_np, resolution), number=10)*100
    
    print("results equal:", np.allclose(res_op, res_pp))
    print(f"timings (ms) op: {t_op:.3f} pp: {t_pp:.3f}")
    

    Sample output:

    results equal: True
    timings (ms) op: 359.162 pp: 0.427
    

    Speedup almost 1000x.