Search code examples
c++image-processingtifftesseractleptonica

How to convert a bitmap to PIX in memory?


Tesseract doesn't appear to work well with a bitmap, works on some inputs but screws up others. While at the same time performs fine on inputs that are identical to previous but in leptonica's format PIX.

How can I convert a bitmap in memory to PIX?

One idea that came to mind is to use leptonica's pixReadMem() :

00724 /*---------------------------------------------------------------------*
00725  *                            Read from memory                         *
00726  *---------------------------------------------------------------------*/
00727 /*!
00728  *  pixReadMem()
00729  *
00730  *      Input:  data (const; encoded)
00731  *              datasize (size of data)
00732  *      Return: pix, or null on error
00733  *
00734  *  Notes:
00735  *      (1) This is a variation of pixReadStream(), where the data is read
00736  *          from a memory buffer rather than a file.
00737  *      (2) On windows, this will only read tiff formatted files from
00738  *          memory.  For other formats, it requires fmemopen(3).
00739  *          Attempts to read those formats will fail at runtime.
00740  *      (3) findFileFormatBuffer() requires up to 8 bytes to decide on
00741  *          the format.  That determines the constraint here.
00742  */

So now I need to find a way to construct a TIFF in memory from a bitmap in memory. But I don't know how.


Solution

  • Well I decided to just do this myself with a quick dirty port. I just took the file loading BMP function and created fake fread/fseek since I am really lazy. Works.

    In case anyone needs it I will post it below. It's baffling why leptonica developers didn't do it for windows properly.

    #include <cstring>
    
    #include <leptonica/allheaders.h>
    #include "leptonica_hack.h"
    #include "bmp.h"
    
    int fake_file_tracker = 0;
    
    size_t fake_fread ( void * ptr, size_t size, size_t count, char * buffer, size_t buflen)
    {
        if (fake_file_tracker >= buflen)
        {
            return 0;
        }
    
        if (fake_file_tracker + size * count > buflen)
        {
            (void)memcpy(ptr, (void *)(buffer+fake_file_tracker), (size_t)(buflen - fake_file_tracker));
            fake_file_tracker = buflen+1;
            return (size_t)((buflen - fake_file_tracker) / size);
        }
    
        (void)memcpy(ptr, (void *)(buffer+fake_file_tracker), size * count);
        fake_file_tracker += size * count;
        return count;
    
    }
    
    int fake_fseek ( char * buffer, long int offset, int origin, size_t buflen)
    {
        if (origin + offset >= buflen)
            return 0;
    
        fake_file_tracker = origin + offset;
        return 0;
    }
    
    
    /*!
     *  pixReadStreamBmp()
     *
     *      Input:  stream opened for read
     *      Return: pix, or null on error
     *
     *  Notes:
     *      (1) Here are references on the bmp file format:
     *          http://en.wikipedia.org/wiki/BMP_file_format
     *          http://www.fortunecity.com/skyscraper/windows/364/bmpffrmt.html
     */
    PIX *
    pixReadBmpFromBuffer(char  *fp, size_t buflen)
    {
        fake_file_tracker = 0;
    l_uint16   sval;
    l_uint32   ival;
    l_int16    bfType, bfSize, bfFill1, bfReserved1, bfReserved2;
    l_int16    offset, bfFill2, biPlanes, depth, d;
    l_int32    biSize, width, height, xres, yres, compression, ignore;
    l_int32    imagebytes, biClrUsed, biClrImportant;
    l_uint8   *colormapBuf = NULL;
    l_int32    colormapEntries;
    l_int32    fileBpl, extrabytes, readerror;
    l_int32    pixWpl, pixBpl;
    l_int32    i, j, k;
    l_uint8    pel[4];
    l_uint8   *data;
    l_uint32  *line, *pword;
    PIX        *pix, *pixt;
    PIXCMAP   *cmap;
    
        PROCNAME("pixReadBmpFromBuffer");
    
        if (!fp)
            return (PIX *)ERROR_PTR("fp not defined", procName, NULL);
    
            /* Read bitmap file header */
        ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
        bfType = convertOnBigEnd16(sval);
        if (bfType != BMP_ID)
            return (PIX *)ERROR_PTR("not bmf format", procName, NULL);
    
        ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
        bfSize = convertOnBigEnd16(sval);
        ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
        bfFill1 = convertOnBigEnd16(sval);
        ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
        bfReserved1 = convertOnBigEnd16(sval);
        ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
        bfReserved2 = convertOnBigEnd16(sval);
        ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
        offset = convertOnBigEnd16(sval);
        ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
        bfFill2 = convertOnBigEnd16(sval);
    
            /* Read bitmap info header */
        ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
        biSize = convertOnBigEnd32(ival);
        ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
        width = convertOnBigEnd32(ival);
        ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
        height = convertOnBigEnd32(ival);
        ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
        biPlanes = convertOnBigEnd16(sval);
        ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
        depth = convertOnBigEnd16(sval);
        ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
        compression = convertOnBigEnd32(ival);
        ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
        imagebytes = convertOnBigEnd32(ival);
        ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
        xres = convertOnBigEnd32(ival);
        ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
        yres = convertOnBigEnd32(ival);
        ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
        biClrUsed = convertOnBigEnd32(ival);
        ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
        biClrImportant = convertOnBigEnd32(ival);
    
        if (compression != 0)
            return (PIX *)ERROR_PTR("cannot read compressed BMP files",
                                    procName,NULL);
    
            /* A little sanity checking.  It would be nice to check
             * if the number of bytes in the file equals the offset to
             * the data plus the imagedata, but this won't work when
             * reading from memory, because fmemopen() doesn't implement
             * ftell().  So we can't do that check.  The imagebytes for
             * uncompressed images is either 0 or the size of the file data.
             * (The fact that it can be 0 is perhaps some legacy glitch).  */
        if (width < 1)
            return (PIX *)ERROR_PTR("width < 1", procName,NULL);
        if (height < 1)
            return (PIX *)ERROR_PTR("height < 1", procName,NULL);
        if (depth < 1 || depth > 32)
            return (PIX *)ERROR_PTR("depth not in [1 ... 32]", procName,NULL);
        fileBpl = 4 * ((width * depth + 31)/32);
        if (imagebytes != 0 && imagebytes != fileBpl * height)
            return (PIX *)ERROR_PTR("invalid imagebytes", procName,NULL);
        if (offset < BMP_FHBYTES + BMP_IHBYTES)
            return (PIX *)ERROR_PTR("invalid offset: too small", procName,NULL);
        if (offset > BMP_FHBYTES + BMP_IHBYTES + 4 * 256)
            return (PIX *)ERROR_PTR("invalid offset: too large", procName,NULL);
    
            /* Handle the colormap */
        colormapEntries = (offset - BMP_FHBYTES - BMP_IHBYTES) / sizeof(RGBA_QUAD);
        if (colormapEntries > 0) {
            if ((colormapBuf = (l_uint8 *)CALLOC(colormapEntries,
                                                 sizeof(RGBA_QUAD))) == NULL)
                return (PIX *)ERROR_PTR("colormapBuf alloc fail", procName, NULL );
    
                /* Read colormap */
            if (fake_fread(colormapBuf, sizeof(RGBA_QUAD), colormapEntries, fp, buflen)
                     != colormapEntries) {
                FREE(colormapBuf);
                return (PIX *)ERROR_PTR( "colormap read fail", procName, NULL);
            }
        }
    
            /* Make a 32 bpp pix if depth is 24 bpp */
        d = depth;
        if (depth == 24)
            d = 32;
        if ((pix = pixCreate(width, height, d)) == NULL)
            return (PIX *)ERROR_PTR( "pix not made", procName, NULL);
        pixSetXRes(pix, (l_int32)((l_float32)xres / 39.37 + 0.5));  /* to ppi */
        pixSetYRes(pix, (l_int32)((l_float32)yres / 39.37 + 0.5));  /* to ppi */
        pixWpl = pixGetWpl(pix);
        pixBpl = 4 * pixWpl;
    
        cmap = NULL;
        if (colormapEntries > 256)
            L_WARNING("more than 256 colormap entries!", procName);
        if (colormapEntries > 0) {  /* import the colormap to the pix cmap */
            cmap = pixcmapCreate(L_MIN(d, 8));
            FREE(cmap->array);  /* remove generated cmap array */
            cmap->array  = (void *)colormapBuf;  /* and replace */
            cmap->n = L_MIN(colormapEntries, 256);
        }
        pixSetColormap(pix, cmap);
    
            /* Seek to the start of the bitmap in the file */
        fake_fseek(fp, offset, 0, buflen);
    
        if (depth != 24) {  /* typ. 1 or 8 bpp */
            data = (l_uint8 *)pixGetData(pix) + pixBpl * (height - 1);
            for (i = 0; i < height; i++) {
                if (fake_fread(data, 1, fileBpl, fp, buflen) != fileBpl) {
                    pixDestroy(&pix);
                    return (PIX *)ERROR_PTR("BMP read fail", procName, NULL);
                }
                data -= pixBpl;
            }
        }
        else {  /*  24 bpp file; 32 bpp pix
                 *  Note: for bmp files, pel[0] is blue, pel[1] is green,
                 *  and pel[2] is red.  This is opposite to the storage
                 *  in the pix, which puts the red pixel in the 0 byte,
                 *  the green in the 1 byte and the blue in the 2 byte.
                 *  Note also that all words are endian flipped after
                 *  assignment on L_LITTLE_ENDIAN platforms.
                 *
                 *  We can then make these assignments for little endians:
                 *      SET_DATA_BYTE(pword, 1, pel[0]);      blue
                 *      SET_DATA_BYTE(pword, 2, pel[1]);      green
                 *      SET_DATA_BYTE(pword, 3, pel[2]);      red
                 *  This looks like:
                 *          3  (R)     2  (G)        1  (B)        0
                 *      |-----------|------------|-----------|-----------|
                 *  and after byte flipping:
                 *           3          2  (B)     1  (G)        0  (R)
                 *      |-----------|------------|-----------|-----------|
                 *
                 *  For big endians we set:
                 *      SET_DATA_BYTE(pword, 2, pel[0]);      blue
                 *      SET_DATA_BYTE(pword, 1, pel[1]);      green
                 *      SET_DATA_BYTE(pword, 0, pel[2]);      red
                 *  This looks like:
                 *          0  (R)     1  (G)        2  (B)        3
                 *      |-----------|------------|-----------|-----------|
                 *  so in both cases we get the correct assignment in the PIX.
                 *
                 *  Can we do a platform-independent assignment?
                 *  Yes, set the bytes without using macros:
                 *      *((l_uint8 *)pword) = pel[2];           red
                 *      *((l_uint8 *)pword + 1) = pel[1];       green
                 *      *((l_uint8 *)pword + 2) = pel[0];       blue
                 *  For little endians, before flipping, this looks again like:
                 *          3  (R)     2  (G)        1  (B)        0
                 *      |-----------|------------|-----------|-----------|
                 */
            readerror = 0;
            extrabytes = fileBpl - 3 * width;
            line = pixGetData(pix) + pixWpl * (height - 1);
            for (i = 0; i < height; i++) {
                for (j = 0; j < width; j++) {
                    pword = line + j;
                    if (fake_fread(&pel, 1, 3, fp, buflen) != 3)
                        readerror = 1;
                    *((l_uint8 *)pword + COLOR_RED) = pel[2];
                    *((l_uint8 *)pword + COLOR_GREEN) = pel[1];
                    *((l_uint8 *)pword + COLOR_BLUE) = pel[0];
                }
                if (extrabytes) {
                    for (k = 0; k < extrabytes; k++)
                        ignore = fake_fread(&pel, 1, 1, fp, buflen);
                }
                line -= pixWpl;
            }
            if (readerror) {
                pixDestroy(&pix);
                return (PIX *)ERROR_PTR("BMP read fail", procName, NULL);
            }
        }
    
        pixEndianByteSwap(pix);
    
            /* ----------------------------------------------
             * The bmp colormap determines the values of black
             * and white pixels for binary in the following way:
             * if black = 1 (255), white = 0
             *      255, 255, 255, 0, 0, 0, 0, 0
             * if black = 0, white = 1 (255)
             *      0, 0, 0, 0, 255, 255, 255, 0
             * We have no need for a 1 bpp pix with a colormap!
             * ---------------------------------------------- */
        if (depth == 1 && cmap) {
    /*        L_INFO("Removing colormap", procName); */
            pixt = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC);
            pixDestroy(&pix);
            pix = pixt;  /* rename */
        }
    
        return pix;
    }