I'm dealing with a task of doing semantic segmentation on Severstal steel anomaly detection,
These are the encoding and decoding functions.
def mask_to_rle(mask):
"""
params: mask - numpy array
returns: run-length encoding string (pairs of start & length of encoding)
"""
# turn a n-dimensional array into a 1-dimensional series of pixels
# for example:
# [[1. 1. 0.]
# [0. 0. 0.] --> [1. 1. 0. 0. 0. 0. 1. 0. 0.]
# [1. 0. 0.]]
flat = mask.flatten()
# we find consecutive sequences by overlaying the mask
# on a version of itself that is displaced by 1 pixel
# for that, we add some padding before slicing
padded = np.concatenate([[0], flat, [0]])
# this returns the indices where the sliced arrays differ
runs = np.where(padded[1:] != padded[:-1])[0]
# indexes start at 0, pixel numbers start at 1
runs += 1
# every uneven element represents the start of a new sequence
# every even element is where the run comes to a stop
# subtract the former from the latter to get the length of the run
runs[1::2] -= runs[0::2]
# convert the array to a string
return ' '.join(str(x) for x in runs)
def rle_to_mask(lre, shape=(1600, 256)):
'''
params: rle - run-length encoding string (pairs of start & length of encoding)
shape - (width,height) of numpy array to return
returns: numpy array with dimensions of shape parameter
'''
# the incoming string is space-delimited
runs = np.asarray([int(run) for run in lre.split(' ')])
# we do the same operation with the even and uneven elements, but this time with addition
runs[1::2] += runs[0::2]
# pixel numbers start at 1, indexes start at 0
runs -= 1
# extract the starting and ending indeces at even and uneven intervals, respectively
run_starts, run_ends = runs[0::2], runs[1::2]
# build the mask
h, w = shape
mask = np.zeros(h*w, dtype=np.uint8)
for start, end in zip(run_starts, run_ends):
mask[start:end] = 1
# transform the numpy array from flat to the original image shape
return mask.reshape(shape).T
def augmentation_test(dataframe:pd.DataFrame):
record=dataframe.loc[np.random.randint(10)]
image=cv2.imread(os.path.join("./train_images",record['ImageId']),0)
image=cv2.resize(image,(1600,256))
mask=rle_to_mask(record['EncodedPixels'])
transformed=transform(image=image,mask=mask)
transformed_image=transformed['image']
transformed_mask=transformed['mask']
return transformed_image,transformed_mask
img,mask=augmentation_test(train_df)
lre=mask_to_rle(mask)
mask=cv2.resize(mask,(1600,256))
plt.imshow(mask)
lree=rle_to_mask(lre)
I used some augmentation techniques on the data with minority classes, after doing the augmentation and encoding the mask into rle and decoding it doesn't gave me the same result.I don't know whether the mask_to_rle does something wrong.
Actually, I found the answer.
Here,
lre=mask_to_rle(mask.T)
The augmented mask should be transposed before encoding it again.