如何确保所有 PyTorch 代码在 Google Colab 上充分利用 GPU

How to ensure all PyTorch code fully utilises GPU on Google Colab

我是 PyTorch 的新手,一直在做一些关于 CIFAR10 的教程,特别是 Google Colab,因为我个人还没有 GPU 来进行实验。

我已经成功地训练了我的神经网络,但我不确定我的代码是否使用了 Colab 的 GPU,因为 Colab 的训练时间并不比我的 2014 MacBook Pro(没有 GPU)快得多。

我查了一下,我的笔记本确实是运行Tesla K80,但是不知为何训练速度很慢。所以我想也许我的代码没有配备 GPU 语法,但我无法弄清楚那是哪一部分。

# install PyTorch
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'
!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl torchvision

import torch
import torch.nn as nn
from torch.optim import Adam
from torchvision import transforms
from torch.autograd import Variable
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

# hyperparameters
n_epochs = 50
n_batch_size = 200
n_display_step = 200
n_learning_rate = 1e-3
n_download_cifar = True

# import cifar
# more about cifar https://www.cs.toronto.edu/~kriz/cifar.html

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_dataset = datasets.CIFAR10(
                    root="../datasets/cifar", 
                    train=True, 
                    transform=transform,
                    download=n_download_cifar)
test_dataset = datasets.CIFAR10(
                    root="../datasets/cifar", 
                    train=False, 
                    transform=transform)

# create data loader
train_loader = DataLoader(train_dataset, batch_size=n_batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=n_batch_size, shuffle=False)

# build CNN
class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        # (3, 32, 32)
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2, 2))

        # (32, 16, 16)
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 16, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2, 2))

        # (16, 8, 8)
        self.out = nn.Linear(16 * 8 * 8, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        out = self.out(x)
        return out

net = CNN()
net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(net.parameters(), lr=n_learning_rate)

def get_accuracy(model, loader):
    model.eval()
    n_samples = 0
    n_correct = 0

    with torch.no_grad():
        for step, (x, y) in enumerate(loader):
            x, y = Variable(x).to(device), Variable(y).to(device)
            out = model(x)
            _, pred = torch.max(out, 1)
            n_samples += y.size(0)
            n_correct += (pred == y).sum().item()

    return n_correct / n_samples


def train(model, criterion, optimizer, epochs, train_loader, test_loader):
    for epoch in range(epochs):
        for step, (x, y) in enumerate(train_loader):
            model.train()
            x, y = Variable(x).to(device), Variable(y).to(device)
            out = model(x)
            loss = criterion(out, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if step % n_display_step == 0:
                print("Epoch {:2d} Loss {:.4f} Accuracy (Train | Test) {:.4f} {:.4f}".format(epoch, loss.item(), get_accuracy(model, train_loader), get_accuracy(model, test_loader)))

train(net, criterion, optimizer, n_epochs, train_loader, test_loader)

您的代码似乎很合适,我 运行 在我的 MacBook、支持 GPU 的机器和 Google Colab 上使用了它。我比较了训练时间,我的实验表明你的代码针对 GPU 进行了优化。

你可以尝试 运行 来自 的这段代码,看看 Google 为你分配了多少 GPU RAM?我猜你只给了 5% 的 GPU 使用率。

此致,

雷克斯