Search code examples
cimage-processingcudalibpng

Image processing with CUDA and libpng


I've been trying to proccess an image (i.e applying a black and white filter) using CUDA and the libpng library. However, I'm not sure how to pass the image data to the device.

As far as I understand, the libpng librarie reads the png and stores the information in a png_bytep structue called row_pointers row by row using this function.

    void read_png_file(char* file_name){
        char header[8];    // 8 is the maximum size that can be checked

        FILE *fp = fopen(file_name, "rb");
        if (!fp)
                abort_("[read_png_file] File %s could not be opened for reading" file_name);
        fread(header, 1, 8, fp);
        // if (png_sig_cmp(header, 0, 8))
        //         abort_("[read_png_file] File %s is not recognized as a PNG file", file_name);


        //Inicializa variables necesarias para libpng
        png_ptr =   png_create_read_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);

        if (!png_ptr)
                abort_("[read_png_file] png_create_read_struct failed");

        info_ptr = png_create_info_struct(png_ptr);
        if (!info_ptr)
                abort_("[read_png_file] png_create_info_struct failed");

        if (setjmp(png_jmpbuf(png_ptr)))
                abort_("[read_png_file] Error during init_io");

        //Inicializa el input/output para el archivo PNG
        png_init_io(png_ptr, fp);
        png_set_sig_bytes(png_ptr, 8);

        //Lee la información anterior a los datos de los píxeles como tal
        png_read_info(png_ptr, info_ptr);

        //Almacena información del archivo PNG 
        width = png_get_image_width(png_ptr, info_ptr);
        height = png_get_image_height(png_ptr, info_ptr);
        color_type = png_get_color_type(png_ptr, info_ptr);
        bit_depth = png_get_bit_depth(png_ptr, info_ptr);

        number_of_passes = png_set_interlace_handling(png_ptr);
        png_read_update_info(png_ptr, info_ptr);


        // Lectura del archivo PNG
        if (setjmp(png_jmpbuf(png_ptr)))
                abort_("[read_png_file] Error during read_image");

        // Reserva el espacio necesario para almacenar los datos del archivo PNG por filas
        row_pointers = (png_bytep*) malloc(sizeof(png_bytep) * height);
        for (y=0; y<height; y++)
                row_pointers[y] = (png_byte*) malloc(png_get_rowbytes(png_ptr,info_ptr));

        // Y para la copia para el device
        d_row_pointers = (png_bytep*) malloc(sizeof(png_bytep) * height);
        for (y=0; y<height; y++)
                d_row_pointers[y] = (png_byte*) malloc(png_get_rowbytes(png_ptr,info_ptr));


        png_read_image(png_ptr, row_pointers);
        
        fclose(fp);
}

I've been trying to pass the information stored in row_pointers creating a copy of it named d_row_pointers and using the cudaMalloc and cudaMemcpy functions like this:

png_bytep * row_pointers;
png_bytep * d_row_pointers;

int main(int argc, char **argv)
{       
        
        // Verifica los parámetros para ejecutar el programa
        if (argc != 3)
                abort_("Uso: ./Nombre_del_Programa <file_in> <file_out>");

        read_png_file(argv[1]);

        // CUDA
        int size = sizeof(png_bytep);
        int int_size = sizeof(int);
        
        cudaMalloc((void **)&d_row_pointers, size);
        for (y=0; y<height; y++)
                 cudaMalloc((void **)&d_row_pointers[y],png_get_rowbytes(png_ptr,info_ptr));

        cudaMalloc((void **)&d_width, int_size);
        cudaMalloc((void **)&d_height, int_size);


        cudaMemcpy(d_row_pointers, &row_pointers, size, cudaMemcpyHostToDevice);
        for (y=0; y<height; y++)
                 cudaMemcpy(d_row_pointers[y], &row_pointers[y], png_get_rowbytes(png_ptr,info_ptr), cudaMemcpyHostToDevice);
        cudaMemcpy(d_width, &width, int_size, cudaMemcpyHostToDevice);
        cudaMemcpy(d_height, &height, int_size, cudaMemcpyHostToDevice);

        // Lanzar el kernel
        process_file<<<1,1>>>(d_row_pointers, d_width, d_height);

        // Copiar los resultados de vuelta al host
        cudaMemcpy(&row_pointers, d_row_pointers, size, cudaMemcpyDeviceToHost);

        // Limpieza
        for (y=0; y<height; y++)
                cudaFree(d_row_pointers[y]);
        cudaFree(d_row_pointers);
        cudaFree(d_width);
        cudaFree(d_height);

        // Escritura de la imagen con los resultados
        write_png_file(argv[2]);

        return 0;
}

And this is the process_file function that is supossed to apply the filter

__global__ void process_file(png_bytep * d_row_pointers, int * d_width, int * d_height)
{
        // Se realizan los cambios deseados en la imagen
        
        //Verificar los datos recibidos
        printf("Width = %d , Height = %d ", *d_width, *d_height);

        int rgb_total = 0;  
        float rgb_average = 0.0;
        int x = 0;
        int y = 0;
        png_byte *row;
        png_byte *ptr;

        for (y=0; y<*d_height-1; y++) {

                for (x=0; x<*d_width; x++) {
                
                        rgb_total = 0;
                        rgb_average = 0;

                        row             = d_row_pointers[y];
                        ptr             = &(row[x*3]);
                        
                        printf("Pixel  %d - %d, Rgb values: %d - %d - %d \n", x, y, ptr[0], ptr[1], ptr[2]); 
                        rgb_total      += ptr[0] + ptr[1] + ptr[2];
                        
                        // Calculando el promedios RGB
                        rgb_average = rgb_total / 3;
                        // printf("Average: %d \n", (int)rgb_average);
                        
                        ptr[0]  = (int)rgb_average;
                        ptr[1]  = (int)rgb_average;
                        ptr[2]  = (int)rgb_average;
                        
                        // printf("Changed to  %d - %d - %d \n",ptr[0], ptr[1], ptr[2]); 
                        // printf("Pixel  %d - %d done\n",x,y);  
                }

        }
        // printf("Para la imagen de resolución: %d x %d - ", *width, *height);
        
}

But I can't get it to work, even tho the kernel seems to be receiving the structure, when I access the data it's all zeros, when it is supossed to be the RGB values. I really appreciate any help regarding the rigth way to pass this data to the kernel. Thank you!

PD: The full code can be found here


Solution

  • I'm quite sure that this line causes the problems:

    cudaMemcpy(d_row_pointers[y], &row_pointers[y], 
        png_get_rowbytes(png_ptr,info_ptr), cudaMemcpyHostToDevice);
    

    You can't access the data at d_row_pointers[y] from cpu code. cudaMalloc expects a pointer to a pointer on the cpu and will treat the argument as such.

    You need to be careful with multidimensional arrays in CUDA. Most people opt to use flat arrays instead, even for images.

    And that's what I recommend: Copy the data to a flat (1-dimensional) buffer and then copy that to the device. It will be much harder to make mistakes this way.

    Ah, and one more thing: You don't need to explicitly cudaMalloc and cudaMemcpy the width and height parameters for the kernel. You can pass them by value, as if it was a normal function. You only need those functions for arrays.