With the function get_height I calculate the height difference between two point clouds for every scan (y,z-coordinates in my example).
My algorithm works but takes 1.93 s on average. How can I improve the performance?
EDIT: I attached a fully working example
import numpy as np
import matplotlib.pyplot as plt
def generate_random_dataset(N,x_max):
# Create the 'x' column
unique_x = np.linspace(0, x_max, x_max*10+1)
x = np.random.choice(unique_x, N) # Generate the array with repeated values
# Create the 'y' column
y = np.random.uniform(-5, 5, N)
# Create the 'z' column
z = - y**2 + 5 + np.random.normal(0, 1, N)
# Create the 'A' array
A = np.column_stack((x, y, z))
return A
def get_height(A0,A1):
# get unique x values that are in both scans
ux0 = np.unique(A0[:,0])
ux1 = np.unique(A1[:,0])
ux = np.intersect1d(ux0,ux1)
# get height at each unique x value
h = []
for x in ux:
# get slice of lower scan
mask0 = (A0[:,0] == x)
z0 = A0[mask0,2]
# get slice of upper scan
mask1 = (A1[:,0] == x)
z1 = A1[mask1,2]
# get height difference
height = np.max(z1) - np.max(z0)
# append results to list
h.append(height)
# convert list to array
h = np.array(h)
return ux, h
# run script
A0 = generate_random_dataset(N=300000,x_max=100)
A1 = generate_random_dataset(N=310000,x_max=120)
A1[:,2] = A1[:,2] - 0.001*(A1[:,0]-50)**2 + 5 # make A1 higher and different than A0
# apply function
%timeit ux,h = get_height(A0,A1)
ux0 = np.unique(A0[:,0])
ux1 = np.unique(A1[:,0])
ux = np.intersect1d(ux0,ux1)
# plot
fig = plt.figure(figsize=(4.24*1.5,3*1.5))
ax = plt.subplot(111)
ax.scatter(ux,h)
ax.set_xlabel('x [mm]')
ax.set_ylabel('h [mm]')
plt.show()
I've tried using np.lexsort approach from a previous question of mine but that approach doesn't work for two arrays.
I want to approach this problem differently (without looping over unique x values) but I can't figure out a solution.
There is probably a numpy
solution, but in the meantime using pandas
is much faster than a python
loop with lookup in each iteration, even including the overhead of converting the arrays into dataframes.
import pandas as pd
def get_height_pd(A0, A1):
df0 = pd.DataFrame(A0)
df1 = pd.DataFrame(A1)
m0 = df0.groupby(0)[2].max()
m1 = df1.groupby(0)[2].max()
return (m1 - m0).dropna() # dropna gets rid of the non-intersecting ones
Alternatively, possibly a little faster, use series.
def get_height_s(A0, A1):
s0 = pd.Series(A0[:, 2])
s1 = pd.Series(A1[:, 2])
m0 = s0.groupby(A0[:, 0]).max()
m1 = s1.groupby(A1[:, 0]).max()
return (m1 - m0).dropna()