I've recently followed a tutorial here: https://www.provideocoalition.com/automatic-rotoscopingfor-free/
And ended with a functional bit of code that generated masks outlining interesting objects.
But now, I want to run it on my GPU, since the CPU is way too slow.
I have CUDA installed and all, but Pytorch refuses to use it. I've used most tricks like setting torch.device and all, but not available; Pytorch keeps using 0 GPU.
Here's the code:
from PIL import Image
import torch
import torchvision.transforms as T
from torchvision import models
import numpy as np
fcn = None
device = torch.device('cuda')
torch.cuda.set_device(0)
print('Using device:', device)
print()
if device.type == 'cuda':
print(torch.cuda.get_device_name(0))
print('Memory Usage:')
print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
print('Cached:', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')
def getRotoModel():
global fcn
#fcn = models.segmentation.fcn_resnet101(pretrained=True).eval()
fcn = models.segmentation.deeplabv3_resnet101(pretrained=True).eval()
# Define the helper function
def decode_segmap(image, nc=21):
label_colors = np.array([(0, 0, 0), # 0=background
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
r = np.zeros_like(image).astype(np.uint8)
g = np.zeros_like(image).astype(np.uint8)
b = np.zeros_like(image).astype(np.uint8)
for l in range(0, nc):
idx = image == l
r[idx] = label_colors[l, 0]
g[idx] = label_colors[l, 1]
b[idx] = label_colors[l, 2]
rgb = np.stack([r, g, b], axis=2)
return rgb
def createMatte(filename, matteName, size):
img = Image.open(filename)
trf = T.Compose([T.Resize(size),
T.ToTensor(),
T.Normalize(mean = [0.485, 0.456, 0.406],
std = [0.229, 0.224, 0.225])])
inp = trf(img).unsqueeze(0)
if (fcn == None): getRotoModel()
out = fcn(inp)['out']
om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()
rgb = decode_segmap(om)
im = Image.fromarray(rgb)
im.save(matteName)
What could I do? Thanks.
If everything is set up correctly you just have to move the tensors you want to process on the gpu to the gpu. You can try this to make sure it works in general
import torch
t = torch.tensor([1.0]) # create tensor with just a 1 in it
t = t.cuda() # Move t to the gpu
print(t) # Should print something like tensor([1], device='cuda:0')
print(t.mean()) # Test an operation just to be sure
You already have a device
variable so instead of .cuda()
you can just use .to(device)
. Which is also the preferable way to do it so you can just switch between cpu and gpu by setting one variable.