Search code examples
pythonartificial-intelligence

Python: delete item from list when difference with previous appended item is small


I'm creating an image popularity algorithm that cuts a video% .mp4 into frames. With the help of AI, the program examines which frames probably display the most beautiful images; the result of this is expressed in 'score'.

This works but I encounter a problem. Because certain frames in a video are very similar, I have many frames with (almost) the same score.

In the end result, a list is generated with [score, frame number]. I want, for example, if 3 items in the list are almost identical frame numbers and therefore (almost) identical scores, I only keep the frame number in the list with the highest score in order to remove duplicates.

It has something to do with this line: result.append((predict(pil_image, model), name))

Here is the code:

import os
import torch
import torchvision.models
import torchvision.transforms as transforms
from PIL import Image
import json
import cv2


def prepare_image(image):
    if image.mode != 'RGB':
        image = image.convert("RGB")
    Transform = transforms.Compose([
        transforms.Resize([224, 224]),
        transforms.ToTensor(),
    ])
    image = Transform(image)
    image = image.unsqueeze(0)
    return image


def predict(image, model):
    image = prepare_image(image)
    with torch.no_grad():
        preds = model(image)
    score = preds.detach().numpy().item()
    print("Picture score: " + str(round(score, 2)) + "  |  frames left: " +str(framesToDo))
    return str(round(score, 2))

if __name__ == '__main__':
    model = torchvision.models.resnet50()
    model.fc = torch.nn.Linear(in_features=2048, out_features=1)
    model.load_state_dict(torch.load('model/model-resnet50.pth', map_location=torch.device('cpu')))
    model.eval()

    result = []

    # In de folder videos are videos saved with the name of 1 until 23
    for i in range(1, 23):
        vidcap = cv2.VideoCapture('./video/' + str(i) + '.mp4')
        succes, vidcap_image = vidcap.read()
        count = 0
        framestep = 500 #for Stackoverflow example
        framesToDo = vidcap.get(cv2.CAP_PROP_FRAME_COUNT)


        # while succes and count < max_frames
        while succes and count < int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)): #maximum amount of frames in video
            name = str(i) + '_' + str(count)
            cv2.imwrite("./frames_saved/" + 'vid' + '_' + name + ".jpg", vidcap_image)  # save frame as jpg image
            count += framestep # 500 frames further
            framesToDo = framesToDo - framestep

            cv2_image = cv2.cvtColor(vidcap_image, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(cv2_image)
            result.append((predict(pil_image, model), name))
            succes, vidcap_image = vidcap.read()
    result.sort(reverse=False)
    print(result)

    with open('result.json', 'w') as filehandle:
        filehandle.write(json.dumps(result))````




Solution

  • Since there is no reproducible example, you can adapt this to solve your problem, this analyses each frame data and skips unnecessary ones, updates the best values and append new values.

    MAX_FRAME_NUMBER_DIFF = 60
    MAX_SCORE_DIFF = 0.5
    
    current_frame = count
    current_score = predict(pil_image, model)
    data = (current_score, current_frame)
    
    if not results:
        results.append(data)
    else:
        last_score, last_frame = results[-1]
        is_similar_frame = current_frame - last_frame <= MAX_FRAME_NUMBER_DIFF
        is_score_better = current_score > last_score
        is_score_way_better = current_score - last_score <= MAX_SCORE_DIFF
    
        if is_similar_frame:
            if is_score_better:
                if is_score_way_better: # if diff between current score and previous score bigger than MAX_SCORE_DIFF
                    results.append(data)
                else: # current score better than previous but not so better
                    results[-1] = data # update last value
            else: # current score not better than previous
                continue # skip this one
        else: # if not similar frames
            results.append(data)