Search code examples
pythonimageimage-processingvips

Making a huge image mosaic with pyvips


I am trying to make an image mosaic generator using pyvips. So basically, given an image (called original in the following) create a new, bigger, image that resembles the original one except each pixel (or more realistically groups of pixels) are replaced by smaller distinct image tiles.
I was drawn to pyvips because it is said it can handle huge images and that it can process images without having to load them completely into memory. However, I am having an issue creating a blank mosaic to then populate with tile images.
In the code below I try joining tiles together row by row to create a mosaic but this code unfortunately eats through my RAM and always segfaults.

import os
import pyvips
from os.path import join
from scipy.spatial import cKDTree

class Mosaic(object):

    def __init__(self, dir_path, original_path, tree=None, averages=None):
        self.dir_path = dir_path
        self.original = original_path
        self.tree = tree
        if averages:
            self.averages = averages
        else:
            self.averages = {}

    def get_image(self, path):
        return pyvips.Image.new_from_file(path, access="sequential")

    def build_tree(self):
        for root, dirs, files in os.walk(self.dir_path):
            print('Loading images from', root, '...')
            for file_name in files:
                path = join(root, file_name)
                try:
                    image = pyvips.Image.new_from_file(path)
                    self.averages[self.avg_rgb(image)] = path
                except pyvips.error.Error:
                    print('File', path, 'not recognized as an image.')
        self.tree = cKDTree(self.averages.keys())
        print('Loaded', len(self.averages), 'images.')

    def avg_rgb(self, image):
        m = image.stats()
        return tuple(m(4,i)[0] for i in range(1,4))

    def get_tile_name(self, patch):
        avg = self.avg_rgb(patch)
        index = self.tree.query(avg)[1]
        return self.averages[tuple(self.tree.data[index])]

    def get_tile(self, x, y, step):
        patch = self.get_image(self.original).crop(x, y, step, step)
        patch_name = self.get_tile_name(patch)
        return pyvips.Image.new_from_file(patch_name, access="sequential")

    def make_mosaic(self, tile_num, tile_size, mosaic_path):
        original = self.get_image(self.original)
        mosaic = None
        step = min(original.height, original.width) / tile_num
        for y in range(0, original.height, step):
            mosaic_row = None
            print('Building row', y/step, '/', original.height/step)
            for x in range(0, original.width, step):
                tile = self.get_tile(x, y, step)
                tile = tile.resize(float(tile_size) / float(min(tile.width, tile.height)))
                tile = tile.crop(0, 0, tile_size, tile_size)
                #mosaic.draw_image(tile, x, y)
                mosaic_row = tile if not mosaic_row else mosaic_row.join(tile, "horizontal")
            mosaic = mosaic_row if not mosaic else mosaic.join(mosaic_row, "vertical")
        mosaic.write_to_file(mosaic_path)

I have also tried creating a mosaic by resizing the original image and then using draw_image like the following but this also crashes.

mosaic = self.get_image(self.original).resize(tile_size)

mosaic.draw_image(tile, x, y)

Finally, I have tried creating the mosaic from new_temp_file and I am having trouble writing to the temp image.

How can I make this mosaic program work?


Solution

  • libvips uses a recursive algorithm to work out which pixels to compute next, so for very long pipelines you can overflow the C stack and get a crash.

    The simplest solution would be to use arrayjoin. This is a libvips operator which can join many images in a single call:

    http://jcupitt.github.io/libvips/API/current/libvips-conversion.html#vips-arrayjoin

    There's an example on the libvips github of using it to join 30,000 images at once:

    https://github.com/jcupitt/libvips/issues/471

    (though that's using the previous version of the libvips Python binding)

    I adapted your program to use arrayjoin, and changed the way it loaded images. I noticed you were also reloading the original image for each output tile, so removing that gave a nice speedup.

    #!/usr/bin/python2
    
    from __future__ import print_function
    import os
    import sys
    import pyvips
    from os.path import join
    from scipy.spatial import cKDTree
    
    class Mosaic(object):
    
        def __init__(self, dir_path, original_path, tile_size=128, tree=None, averages=None):
            self.dir_path = dir_path
            self.original_path = original_path
            self.tile_size = tile_size
            self.tree = tree
            if averages:
                self.averages = averages
            else:
                self.averages = {}
    
        def avg_rgb(self, image):
            m = image.stats()
            return tuple(m(4,i)[0] for i in range(1,4))
    
        def build_tree(self):
            for root, dirs, files in os.walk(self.dir_path):
                print('Loading images from', root, '...')
                for file_name in files:
                    path = join(root, file_name)
                    try:
                        # load image as a square image of size tile_size X tile_size
                        tile = pyvips.Image.thumbnail(path, self.tile_size,
                                                      height=self.tile_size,
                                                      crop='centre')
                        # render into memory
                        tile = tile.copy_memory()
                        self.averages[self.avg_rgb(tile)] = tile
                    except pyvips.error.Error:
                        print('File', path, 'not recognized as an image.')
            self.tree = cKDTree(self.averages.keys())
            print('Loaded', len(self.averages), 'images.')
    
        def fetch_tree(self, patch):
            avg = self.avg_rgb(patch)
            index = self.tree.query(avg)[1]
    
            return self.averages[tuple(self.tree.data[index])]
    
        def make_mosaic(self, tile_num, mosaic_path):
            mosaic = None
            original = pyvips.Image.new_from_file(self.original_path)
            step = min(original.height, original.width) / tile_num
            tiles_across = original.width / step
            tiles_down = original.height / step
            tiles = []
            for y in range(0, tiles_down):
                print('Building row', y, '/', tiles_down)
                for x in range(0, tiles_across):
                    patch = original.crop(x * step, y * step, 
                                          min(step, original.width - x * step), 
                                          min(step, original.height - y * step)) 
                    tile = self.fetch_tree(patch) 
                    tiles.append(tile)
    
            mosaic = pyvips.Image.arrayjoin(tiles, across=tiles_across)
    
            print('writing ', mosaic_path)
            mosaic.write_to_file(mosaic_path)
    
    mosaic = Mosaic(sys.argv[1], sys.argv[2])
    mosaic.build_tree()
    mosaic.make_mosaic(200, sys.argv[3])
    

    I can run it like this:

    $ time ./mosaic2.py samples/ k2.jpg x.png
    Loading images from samples/ ...
    Loaded 228 images.
    Building row 0 / 292
    ...
    Building row 291 / 292
    writing  x.png
    real    7m19.333s
    user    7m27.322s
    sys     0m30.578s
    

    making a 26496 x 37376 pixel image, in this case, and it runs in about 150mb of memory.