I'm using Python's multiprocessing module to speed up the computation of a feature from 3D LIDAR data. Each process computes the feature for a subset of the points in the data. I'm using tqdm to provide a progress bar for the computation, but the bar isn't updating as expected.
Here's the code I'm using:
import numpy as np
from multiprocessing import Pool
from tqdm import tqdm
from functools import partial
# np.random.seed(0)
lidar_data = np.random.uniform(low=0.0, high=100.0, size=(1000, 3))
def compute_feature(i, lidar_data, radius):
x_center, y_center, z_center = lidar_data[i]
height = z_center
bounding_box_x_min = x_center - radius - 5
bounding_box_x_max = x_center + radius + 5
bounding_box_y_min = y_center - radius - 5
bounding_box_y_max = y_center + radius + 5
points_in_cylinder = []
z_height = []
for point in lidar_data:
x, y, z = point
if bounding_box_x_min <= x <= bounding_box_x_max and bounding_box_y_min <= y <= bounding_box_y_max:
if z <= z_center:
dist_to_center = np.sqrt((x - x_center)**2 + (y - y_center)**2)
if dist_to_center <= radius and z_center - height <= z:
points_in_cylinder.append(point)
z_height.append(z)
points_in_cylinder = np.array(points_in_cylinder)
z = [round(float(point[2]), 3) for point in points_in_cylinder]
minimum_z = min(z) if points_in_cylinder.size > 0 else z_center
feature = z_center - 2 * minimum_z
return feature
# lidar_data = np.random.uniform(low=0.0, high=100.0, size=(1000, 3))
radius = 3.0
with Pool() as pool:
func = partial(compute_feature, lidar_data=lidar_data, radius=radius)
features = list(tqdm(pool.imap(func, range(lidar_data.shape[0])), total= lidar_data.shape[0], desc="Computing feature"))
features = np.array(features)
When I run this code, the tqdm progress bar appears, but it doesn't update. It's as if the computation isn't starting at all. What could be the cause of this issue? How can I get the progress bar to update correctly?
I have just tried to reproduce your code, and the problem is not with tqdm, it's the way you're using multiprocessing.Pool()
.
When I copied your code verbatim, I got an endless loop of errors telling me something about starting a new process when the first one was not fully initialized, and that's due to the fact that you launch the pool as you load the file instead of doing so in a main()
function.
This code works:
import numpy as np
from multiprocessing import Pool
from tqdm import tqdm
from functools import partial
def compute_feature(i, lidar_data, radius):
x_center, y_center, z_center = lidar_data[i]
height = z_center
bounding_box_x_min = x_center - radius - 5
bounding_box_x_max = x_center + radius + 5
bounding_box_y_min = y_center - radius - 5
bounding_box_y_max = y_center + radius + 5
points_in_cylinder = []
z_height = []
for point in lidar_data:
x, y, z = point
if bounding_box_x_min <= x <= bounding_box_x_max and bounding_box_y_min <= y <= bounding_box_y_max:
if z <= z_center:
dist_to_center = np.sqrt((x - x_center)**2 + (y - y_center)**2)
if dist_to_center <= radius and z_center - height <= z:
points_in_cylinder.append(point)
z_height.append(z)
points_in_cylinder = np.array(points_in_cylinder)
z = [round(float(point[2]), 3) for point in points_in_cylinder]
minimum_z = min(z) if points_in_cylinder.size > 0 else z_center
feature = z_center - 2 * minimum_z
return feature
def main():
# np.random.seed(0)
lidar_data = np.random.uniform(low=0.0, high=100.0, size=(1000, 3))
# lidar_data = np.random.uniform(low=0.0, high=100.0, size=(1000, 3))
radius = 3.0
with Pool() as pool:
func = partial(compute_feature, lidar_data=lidar_data, radius=radius)
features = list(tqdm(pool.imap(func, range(lidar_data.shape[0])), total= lidar_data.shape[0], desc="Computing feature"))
features = np.array(features)
print(features)
if __name__ == "__main__":
main()
The only change I had to make was move all your "main" code into a function, and add a main guard to call it only once, from the primary process, and not from the forked processes that the pool creates.