Search code examples
pythonimage-segmentationsemantic-segmentationrun-length-encoding

Runlength encoded mask is not as same as decoded mask


Masks before encoding and afterI'm dealing with a task of doing semantic segmentation on Severstal steel anomaly detection,

These are the encoding and decoding functions.

def mask_to_rle(mask):
    """
    params:  mask - numpy array
    returns: run-length encoding string (pairs of start & length of encoding)
    """
    # turn a n-dimensional array into a 1-dimensional series of pixels
    # for example:
    #     [[1. 1. 0.]
    #      [0. 0. 0.]   --> [1. 1. 0. 0. 0. 0. 1. 0. 0.]
    #      [1. 0. 0.]]
    flat = mask.flatten()
    
    # we find consecutive sequences by overlaying the mask
    # on a version of itself that is displaced by 1 pixel
    # for that, we add some padding before slicing
    padded = np.concatenate([[0], flat, [0]])
    
    # this returns the indices where the sliced arrays differ
    runs = np.where(padded[1:] != padded[:-1])[0] 
    # indexes start at 0, pixel numbers start at 1
    runs += 1

    # every uneven element represents the start of a new sequence
    # every even element is where the run comes to a stop
    # subtract the former from the latter to get the length of the run
    runs[1::2] -= runs[0::2]
 
    # convert the array to a string
    return ' '.join(str(x) for x in runs)
def rle_to_mask(lre, shape=(1600, 256)):
    '''
    params:  rle   - run-length encoding string (pairs of start & length of encoding)
             shape - (width,height) of numpy array to return 
    
    returns: numpy array with dimensions of shape parameter
    '''    
    # the incoming string is space-delimited
    runs = np.asarray([int(run) for run in lre.split(' ')])
    
    # we do the same operation with the even and uneven elements, but this time with addition
    runs[1::2] += runs[0::2]
    # pixel numbers start at 1, indexes start at 0
    runs -= 1
    
    # extract the starting and ending indeces at even and uneven intervals, respectively
    run_starts, run_ends = runs[0::2], runs[1::2]
    
    # build the mask
    h, w = shape
    mask = np.zeros(h*w, dtype=np.uint8)
    for start, end in zip(run_starts, run_ends):
        mask[start:end] = 1
    
    # transform the numpy array from flat to the original image shape
    return mask.reshape(shape).T
def augmentation_test(dataframe:pd.DataFrame):
  record=dataframe.loc[np.random.randint(10)]
  image=cv2.imread(os.path.join("./train_images",record['ImageId']),0)
  image=cv2.resize(image,(1600,256))
  mask=rle_to_mask(record['EncodedPixels'])
  transformed=transform(image=image,mask=mask)
  transformed_image=transformed['image']
  transformed_mask=transformed['mask']
  return transformed_image,transformed_mask
img,mask=augmentation_test(train_df)
lre=mask_to_rle(mask)
mask=cv2.resize(mask,(1600,256))
plt.imshow(mask)
lree=rle_to_mask(lre)

I used some augmentation techniques on the data with minority classes, after doing the augmentation and encoding the mask into rle and decoding it doesn't gave me the same result.I don't know whether the mask_to_rle does something wrong.


Solution

  • Actually, I found the answer.

    Here,

    lre=mask_to_rle(mask.T)
    

    The augmented mask should be transposed before encoding it again.