Search code examples
pythonpytorchtensor

how to covert a frame (np array) to a pytorch tensor


i need to convert a frame of the video (which is a nparray) to a pytorch tensor, do some particular actions with it and convert it back but i'm struggling

so, i have a frame returned from video_capture.read() and, as i understood, it's a np array. firstly, i convert it to a tensor and checks if looks correctly (sorry i can't add photos for some reason) then i analyze it (no mistakes), try to rotate it and here's a problem.

  1. frame.show() shows the tensor in different colours, looks super wrong
  2. self.check_tensor(rotated_tensor) after rotation shows just a black screen

can somebody please help me to fix this, i'm so exhausted, chatgpt confuses me even more and don't understand anything... i guess the problem with colours is related to how i convert tensor to pil image, but i tried several changes (commented lines) and nothing hepled. also is there a way to avoid converting tensor to pil image before the rotation? can't i just rotate a tensor?

def tensor_to_image(tensor):
    tensor = (tensor * 255).byte()
    tensor = tensor.squeeze(0)
    tensor = tensor.permute(1, 2, 0)
    image = Image.fromarray(np.array(tensor).astype(np.uint8))
    image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB)
    image = Image.fromarray(np.asarray(image))
    return image

def rotate_tensor(frame_tensor, landmarks):
    roll = calc_face_angle(landmarks)
    frame = tf.to_pil_image(frame_tensor.squeeze(0))
    #frame = tensor_to_image(frame_tensor)
    frame.show()

    if not np.isnan(roll):
        rotated_frame = frame.rotate(roll, resample=Image.BICUBIC, expand=True)
    else:
        print("Failed to calculate face angle for rotation")
        return frame_tensor

    #rotated_tensor = tf.to_tensor(rotated_frame).unsqueeze(0)
    
    transform = transforms.ToTensor() # Используем torchvision для преобразования в тензор
    rotated_tensor = transform(rotated_frame).unsqueeze(0)
    return rotated_tensor
    

def check_tensor(self, frame_tensor):
    frame_numpy = frame_tensor.squeeze(0).permute(1, 2, 0).byte().numpy()
    #frame_numpy = cv2.cvtColor(frame_numpy, cv2.COLOR_RGB2BGR)

    cv2.imshow("Frame", frame_numpy)
    cv2.waitKey(0)   
    cv2.destroyAllWindows() 


def analyze_video(self, video_path):
    video_capture = cv2.VideoCapture(video_path)

    for i in range(1):
        ret, frame = video_capture.read()
        if not ret:
            break

        # преобразуем фрейм в тензор
        frame_tensor = torch.from_numpy(frame).float()
        frame_tensor = frame_tensor.permute(2, 0, 1).unsqueeze(0)
        #frame_tensor = frame_tensor[:, [2, 1, 0], :, :]

        self.check_tensor(frame_tensor)

  
        orig_prediction = self.analyze_frame(frame_tensor)
   
        rotated_tensor = im.rotate_tensor(frame_tensor, orig_prediction.head())
        self.check_tensor(rotated_tensor)

Solution

  • This is how you can do it:

    def frame_to_tensor(frame):
        # OpenCV frame (BGR) -> RGB tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Convert to tensor and add batch dimension
        tensor = torch.from_numpy(frame_rgb).float().permute(2, 0, 1).unsqueeze(0)
        # Normalize to [0, 1]
        tensor = tensor / 255.0
        return tensor
    
    def tensor_to_frame(tensor):
        # Remove batch dimension and move channels last
        frame = tensor.squeeze(0).permute(1, 2, 0)
        # Scale back to [0, 255]
        frame = (frame * 255).byte().numpy()
        # Convert RGB to BGR for OpenCV
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        return frame
    
    def rotate_tensor(tensor, angle):
        # Convert to PIL (expects RGB tensor in [0, 1])
        image = tf.to_pil_image(tensor.squeeze(0))
        # Rotate image
        rotated = image.rotate(angle, resample=Image.BICUBIC, expand=True)
        # Back to tensor
        return tf.to_tensor(rotated).unsqueeze(0)
    
    # Usage in your analyze_video:
    ret, frame = video_capture.read()
    frame_tensor = frame_to_tensor(frame)
    # Do your analysis...
    rotated_tensor = rotate_tensor(frame_tensor, angle)
    # Display/save
    output_frame = tensor_to_frame(rotated_tensor)
    cv2.imshow("Frame", output_frame)
    
    

    Key point:

    • Always handle color space conversion (BGR ↔ RGB) when moving between OpenCV and PyTorch Keep tensor values normalized (0-1) during processing Scale back to 0-255 only for display/saving Maintain consistent channel ordering (HWC vs CHW)