Transposing of Large and Narrow Images in C

I am trying to process large .pgm images in C. The images are first read in the format Image, as matrices of unsigned char elements:

struct Matrix{
  int rows;
  int cols;
  unsigned char * data;
  int widthStep;
};
typedef struct Matrix Image;

I am using the following function to read the images, with netpbm (netpbm/pam.h):

Image * loadPBM(char * fname){
  FILE * file;
  struct pam inpam;
  tuple * tuplerow;
  unsigned int row;
  Image * image;
  int aux;

  file=fopen(fname,"r");
  pnm_readpaminit(file, &inpam, /*PAM_STRUCT_SIZE(tuple_type)*/ 
  sizeof(struct pam));

  printf("Reading image\n"); 

  /* allocating image*/
  image=(Image*)malloc(sizeof(Image));
  image->cols=inpam.width;
  image->rows=inpam.height;
  image->widthStep=image->cols;
  aux=image->cols & 0x3;
  if (aux!=0){
    image->widthStep+=4-aux;
  }
  image->data=(unsigned char *)malloc(image->widthStep*image->rows);

  tuplerow = pnm_allocpamrow(&inpam);

  for (row = 0; row < inpam.height; row++) {
    unsigned int column;
    pnm_readpamrow(&inpam, tuplerow);
    for (column = 0; column < inpam.width; ++column) {
      unsigned int plane;
      for (plane = 0; plane < inpam.depth; ++plane) {
        image->data[image->widthStep*row+column]= tuplerow[column][plane];
          }
        }
      } 

  pnm_freepamrow(tuplerow);
  fclose(file);
  return image;

}

After being read, the images are translated into the format ImageF, so that I can process the elements as double, as such:

struct MatrixF{
  int rows;
  int cols;
  double * data;
  int widthStep;
};
typedef struct MatrixF ImageF;

Translation Image to ImageF:

for (int i = 0; i < in_img->rows; ++i){

    for (int j = 0; j < in_img->cols; ++j){

        in_aux->data[i*(in_img->cols)+j] = (double)in_img->data[i*(in_img->cols)+j];
    }
}

For the actual image processing I need to transpose the images, so I have written the following function:

void transpose(ImageF *in_re, ImageF *out_re){

    int rows = in_re->rows;
    int cols = in_re->cols;

    for(int i = 0 ; i < rows ; ++i){

        for(int j = 0 ; j < cols ; ++j){

            out_re->data[j*rows+i] = in_re->data[i*cols+j];
        }
    }

    out_re->rows = in_re->cols;
    out_re->cols = in_re->rows;

    out_re->widthStep = out_re->cols * sizeof(double);
}

After being transposed, the images are translated back from ImageF to Image, in order to store the results (convert double to unsigned char):

    double val;
    for (int i = 0; i < out_aux->rows; i++){

        for (int j = 0; j < out_aux->cols; j++){

            val = out_aux->data[i*out_aux->cols + j];///((in_img->rows)*(in_img->cols ));

            if (val < 0)
                val = 0.0;
            else if (val > 255)
                val = 255.0;
            out_img->data[i * out_aux->cols + j] = (unsigned char)val;
        }
    }

And finally they are stored using the following function:

void savePBM(char * fname, Image * image){
  FILE * file;
  struct pam outpam;
  tuple * tuplerow;
  unsigned int row;

  int aux;

  file=fopen(fname,"w");
  outpam.file=file;
  outpam.size=sizeof(struct pam);
  outpam.len=sizeof(struct pam);
  outpam.format=RPGM_FORMAT;
  outpam.plainformat=0;
  outpam.height=image->rows;
  outpam.width=image->cols;
  outpam.depth=1;
  outpam.maxval=255;
  strcpy(outpam.tuple_type,PAM_PGM_TUPLETYPE);

  pnm_writepaminit( &outpam );

  printf("Writing image\n");

  tuplerow = pnm_allocpamrow(&outpam);

  for (row = 0; row < outpam.height; row++) {
    unsigned int column;
    for (column = 0; column < outpam.width; ++column) {
      unsigned int plane;
      for (plane = 0; plane < outpam.depth; ++plane) {
    tuplerow[column][plane]=image->data[image->widthStep*row+column];
      }
    }
    pnm_writepamrow(&outpam, tuplerow); 
  } 

  pnm_freepamrow(tuplerow);
  fclose(file);
}

Image memory space allocation I believe was done correctly, after loading the input image with loadPBM, as such:

    out_img = (Image *)malloc( sizeof(Image) );
    out_img->rows = in_img->cols;
    out_img->cols = in_img->rows;
    out_img->widthStep = out_img->cols * sizeof(unsigned char);
    out_img->data = (unsigned char *)malloc( (out_img->rows)*(out_img->cols)*sizeof(unsigned char) );

/*Auxiliary variables*/
in_aux = (ImageF *)malloc(sizeof(ImageF));
    in_aux->rows = in_img->rows;
    in_aux->cols = in_img->cols;
    in_aux->widthStep = in_aux->cols * sizeof(double);
    in_aux->data = (double *)malloc( (in_aux->rows)*(in_aux->cols)*sizeof(double) );

out_aux = (ImageF *)malloc(sizeof(ImageF));
    out_aux->rows = in_img->rows;
    out_aux->cols = in_img->cols;
    out_aux->widthStep = out_aux->cols * sizeof(double);
    out_aux->data = (double *)malloc( (out_aux->rows)*(out_aux->cols)*sizeof(double) );

For some reason, this works fine for square images or even images that are about 450x700 in resolution. But as images become narrower (for example 170x500), this algorithm stops functioning correctly. Images become distorted, and I have no idea why, since it works for other non-square matrices that are just not as narrow. If anyone can see where I went wrong or has some advice or anything, it would be extremely appreciated!

Thanks in advance!

Solution

After some grinding and discussion with a good friend, we figured out that the padding inserted by the function loadPBM, for memory alignment, this section:

 aux=image->cols & 0x3;
  if (aux!=0){
    image->widthStep+=4-aux;
  }

Was causing the writing of bytes at each matrix element to go overboard to the next element, for images whose width isn't a mulitple of 4, distorting them, due to altering their widthStep. Which explains why the algorithm worked for some non-square matrices, but not all. For those who saw the post and were also wondering why this was happening, hope this helps! You can just remove this added padding, and it'll work like a charm