Forcing Pytorch to use GPU

I've recently followed a tutorial here: https://www.provideocoalition.com/automatic-rotoscopingfor-free/

And ended with a functional bit of code that generated masks outlining interesting objects.

But now, I want to run it on my GPU, since the CPU is way too slow.

I have CUDA installed and all, but Pytorch refuses to use it. I've used most tricks like setting torch.device and all, but not available; Pytorch keeps using 0 GPU.

Here's the code:

    from PIL import Image
    import torch
    import torchvision.transforms as T
    from torchvision import models
    import numpy as np
    
    fcn = None
    
    
    device = torch.device('cuda')
    torch.cuda.set_device(0)
    print('Using device:', device)
    print()
    
    if device.type == 'cuda':
        print(torch.cuda.get_device_name(0))
        print('Memory Usage:')
        print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
        print('Cached:', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')
    
    
    def getRotoModel():
        global fcn
        #fcn = models.segmentation.fcn_resnet101(pretrained=True).eval()
        fcn = models.segmentation.deeplabv3_resnet101(pretrained=True).eval()
    
    
    # Define the helper function
    def decode_segmap(image, nc=21):
    
        label_colors = np.array([(0, 0, 0),  # 0=background
                               # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
                   (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
                   # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
                   (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
                   # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
                   (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
                   # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
                   (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
    
        r = np.zeros_like(image).astype(np.uint8)
        g = np.zeros_like(image).astype(np.uint8)
        b = np.zeros_like(image).astype(np.uint8)
    
        for l in range(0, nc):
            idx = image == l
            r[idx] = label_colors[l, 0]
            g[idx] = label_colors[l, 1]
            b[idx] = label_colors[l, 2]
    
        rgb = np.stack([r, g, b], axis=2)
        return rgb
    
    def createMatte(filename, matteName, size):
        img = Image.open(filename)
        trf = T.Compose([T.Resize(size),
                         T.ToTensor(), 
                         T.Normalize(mean = [0.485, 0.456, 0.406], 
                                     std = [0.229, 0.224, 0.225])])
        inp = trf(img).unsqueeze(0)
        if (fcn == None): getRotoModel()
        out = fcn(inp)['out']
        om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()
        rgb = decode_segmap(om)
        im = Image.fromarray(rgb)
        im.save(matteName)

What could I do? Thanks.

Solution

If everything is set up correctly you just have to move the tensors you want to process on the gpu to the gpu. You can try this to make sure it works in general

import torch
t = torch.tensor([1.0]) # create tensor with just a 1 in it
t = t.cuda() # Move t to the gpu
print(t) # Should print something like tensor([1], device='cuda:0')
print(t.mean()) # Test an operation just to be sure

You already have a device variable so instead of .cuda() you can just use .to(device). Which is also the preferable way to do it so you can just switch between cpu and gpu by setting one variable.