I want to know the inference time of a layer in Alexnet. This code measures the inference time of the first fully connected layer of Alexnet as the batch size changes. And I have a few questions about this.
Given the following code:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
import time
from tqdm import tqdm
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.relu = nn.ReLU(inplace=True)
self.maxpool2D = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
self.adaptive_avg_polling = nn.AdaptiveAvgPool2d((6, 6))
self.dropout = nn.Dropout(p=0.5)
self.conv1 = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2)
self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2)
self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.fc1 = nn.Linear(256 * 6 * 6, 4096)
self.fc2 = nn.Linear(4096, 4096)
self.fc3 = nn.Linear(4096, 1000)
def time(self, x):
x = self.maxpool2D(self.relu(self.conv1(x)))
x = self.maxpool2D(self.relu(self.conv2(x)))
x = self.relu(self.conv3(x))
x = self.relu(self.conv4(x))
x = self.maxpool2D(self.relu(self.conv5(x)))
x = self.adaptive_avg_polling(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
start1 = time.time()
x = self.fc1(x)
finish1 = time.time()
x = self.dropout(self.relu(x))
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
return finish1 - start1
def layer_time():
use_cuda = torch.cuda.is_available()
print("use_cuda : ", use_cuda)
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
device= torch.device("cuda:0" if use_cuda else "cpu")
net = AlexNet().to(device)
test_iter = 10000
batch_size = 1
for i in range(10):
X = torch.randn(size=(batch_size, 3, 227, 227)).type(FloatTensor)
s = 0.0
for i in tqdm(range(test_iter)):
s += net.time(X)
print(s)
batch_size *= 2
layer_time()
I found a way to measure inference time by studying the AMP document. Using this, the GPU and CPU are synchronized and the inference time can be measured accurately.
import torch, time, gc
# Timing utilities
start_time = None
def start_timer():
global start_time
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.synchronize()
start_time = time.time()
def end_timer():
torch.cuda.synchronize()
end_time = time.time()
return end_time - start_time
So my code changes as follows:
import torch, time, gc
from tqdm import tqdm
import torch.nn as nn
import torch
# Timing utilities
start_time = None
def start_timer():
global start_time
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.synchronize()
start_time = time.time()
def end_timer():
torch.cuda.synchronize()
end_time = time.time()
return end_time - start_time
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.relu = nn.ReLU(inplace=True)
self.maxpool2D = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
self.adaptive_avg_polling = nn.AdaptiveAvgPool2d((6, 6))
self.dropout = nn.Dropout(p=0.5)
self.conv1 = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2)
self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2)
self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.fc1 = nn.Linear(256 * 6 * 6, 4096)
self.fc2 = nn.Linear(4096, 4096)
self.fc3 = nn.Linear(4096, 1000)
def time(self, x):
x = self.maxpool2D(self.relu(self.conv1(x)))
x = self.maxpool2D(self.relu(self.conv2(x)))
x = self.relu(self.conv3(x))
x = self.relu(self.conv4(x))
x = self.maxpool2D(self.relu(self.conv5(x)))
x = self.adaptive_avg_polling(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
# Check first linear layer inference time
start_timer()
x = self.fc1(x)
result = end_timer()
x = self.dropout(self.relu(x))
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
return result
def layer_time():
use_cuda = torch.cuda.is_available()
print("use_cuda : ", use_cuda)
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
device= torch.device("cuda:0" if use_cuda else "cpu")
net = AlexNet().to(device)
test_iter = 1000
batch_size = 1
for i in range(10):
X = torch.randn(size=(batch_size, 3, 227, 227)).type(FloatTensor)
s = 0.0
for i in tqdm(range(test_iter)):
s += net.time(X)
print(s)
batch_size *= 2
layer_time()