MNIST with PyTorch#

The following code example is based on Mikhail Klassen’s article Tensorflow vs. PyTorch by example

First, you need to install PyTorch in a new Anaconda environment.

Setup#

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import ExponentialLR

# Get CPU or GPU device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device)

Define settings#

# Random seed for reproducibility
seed = 42
torch.manual_seed(seed)

# Save the model at the end?
save_model = False

# Batch sizes for training and testing
batch_size = 64
test_batch_size = 14

# Training epochs (usually set to 10)
n_epochs = 2

# Learning rate
learning_rate = 1.0

# Decay rate for adjusting the learning rate
gamma = 0.7

# How many batches before logging training status
log_interval = 10

# Number of target classes in the MNIST data
num_classes = 10

train_kwargs = {'batch_size': batch_size}
test_kwargs = {'batch_size': test_batch_size}

# CUDA settings
if torch.cuda.is_available():
    cuda_kwargs = {'num_workers': 1,
                   'pin_memory': True,
                   'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

Data preparation#

# The scaled mean and standard deviation of the MNIST dataset (precalculated)
data_mean = 0.1307
data_std = 0.3081

# Convert input images to tensors and normalize
transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((data_mean,), (data_std,))
    ])

# Get the MNIST data from torchvision
dataset1 = datasets.MNIST('../data', train=True, download=True,
                    transform=transform)
dataset2 = datasets.MNIST('../data', train=False,
                    transform=transform)

# Define the data loaders that will handle fetching of data
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

Model architecture#

# Define the architecture of the neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding='valid')
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding='valid')
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.softmax(x, dim=1)
        return output

Model training#

def train(model, device, train_loader, optimizer, epoch, log_interval):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            # sum up batch loss
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            # get the index of the max log-probability
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
# Send the model to the device (CPU or GPU)
model = Net().to(device)

# Define the optimizer to user for gradient descent
optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)

# Shrinks the learning rate by gamma every step_size
scheduler = ExponentialLR(optimizer, gamma=gamma)

# Train the model
for epoch in range(1, n_epochs + 1):
    train(model, device, train_loader, optimizer, epoch, log_interval)
    test(model, device, test_loader)
    scheduler.step()
Train Epoch: 1 [0/60000 (0%)]	Loss: 2.303437
Train Epoch: 1 [640/60000 (1%)]	Loss: 2.190133
Train Epoch: 1 [1280/60000 (2%)]	Loss: 1.982814
Train Epoch: 1 [1920/60000 (3%)]	Loss: 1.953468
Train Epoch: 1 [2560/60000 (4%)]	Loss: 1.761265
Train Epoch: 1 [3200/60000 (5%)]	Loss: 1.802745
Train Epoch: 1 [3840/60000 (6%)]	Loss: 1.733097
Train Epoch: 1 [4480/60000 (7%)]	Loss: 1.738909
Train Epoch: 1 [5120/60000 (9%)]	Loss: 1.756195
Train Epoch: 1 [5760/60000 (10%)]	Loss: 1.757859
Train Epoch: 1 [6400/60000 (11%)]	Loss: 1.696007
Train Epoch: 1 [7040/60000 (12%)]	Loss: 1.692416
Train Epoch: 1 [7680/60000 (13%)]	Loss: 1.700961
Train Epoch: 1 [8320/60000 (14%)]	Loss: 1.692403
Train Epoch: 1 [8960/60000 (15%)]	Loss: 1.642780
Train Epoch: 1 [9600/60000 (16%)]	Loss: 1.682384
Train Epoch: 1 [10240/60000 (17%)]	Loss: 1.699099
Train Epoch: 1 [10880/60000 (18%)]	Loss: 1.596182
Train Epoch: 1 [11520/60000 (19%)]	Loss: 1.640822
Train Epoch: 1 [12160/60000 (20%)]	Loss: 1.612970
Train Epoch: 1 [12800/60000 (21%)]	Loss: 1.628855
Train Epoch: 1 [13440/60000 (22%)]	Loss: 1.544742
Train Epoch: 1 [14080/60000 (23%)]	Loss: 1.615298
Train Epoch: 1 [14720/60000 (25%)]	Loss: 1.643510
Train Epoch: 1 [15360/60000 (26%)]	Loss: 1.599172
Train Epoch: 1 [16000/60000 (27%)]	Loss: 1.678295
Train Epoch: 1 [16640/60000 (28%)]	Loss: 1.624110
Train Epoch: 1 [17280/60000 (29%)]	Loss: 1.551333
Train Epoch: 1 [17920/60000 (30%)]	Loss: 1.505503
Train Epoch: 1 [18560/60000 (31%)]	Loss: 1.502764
Train Epoch: 1 [19200/60000 (32%)]	Loss: 1.528274
Train Epoch: 1 [19840/60000 (33%)]	Loss: 1.507777
Train Epoch: 1 [20480/60000 (34%)]	Loss: 1.532025
Train Epoch: 1 [21120/60000 (35%)]	Loss: 1.522650
Train Epoch: 1 [21760/60000 (36%)]	Loss: 1.484702
Train Epoch: 1 [22400/60000 (37%)]	Loss: 1.539056
Train Epoch: 1 [23040/60000 (38%)]	Loss: 1.553571
Train Epoch: 1 [23680/60000 (39%)]	Loss: 1.607744
Train Epoch: 1 [24320/60000 (41%)]	Loss: 1.488518
Train Epoch: 1 [24960/60000 (42%)]	Loss: 1.559851
Train Epoch: 1 [25600/60000 (43%)]	Loss: 1.543421
Train Epoch: 1 [26240/60000 (44%)]	Loss: 1.491874
Train Epoch: 1 [26880/60000 (45%)]	Loss: 1.533729
Train Epoch: 1 [27520/60000 (46%)]	Loss: 1.508406
Train Epoch: 1 [28160/60000 (47%)]	Loss: 1.540629
Train Epoch: 1 [28800/60000 (48%)]	Loss: 1.538574
Train Epoch: 1 [29440/60000 (49%)]	Loss: 1.521769
Train Epoch: 1 [30080/60000 (50%)]	Loss: 1.553346
Train Epoch: 1 [30720/60000 (51%)]	Loss: 1.567774
Train Epoch: 1 [31360/60000 (52%)]	Loss: 1.556850
Train Epoch: 1 [32000/60000 (53%)]	Loss: 1.536666
Train Epoch: 1 [32640/60000 (54%)]	Loss: 1.498510
Train Epoch: 1 [33280/60000 (55%)]	Loss: 1.551042
Train Epoch: 1 [33920/60000 (57%)]	Loss: 1.528805
Train Epoch: 1 [34560/60000 (58%)]	Loss: 1.543514
Train Epoch: 1 [35200/60000 (59%)]	Loss: 1.544061
Train Epoch: 1 [35840/60000 (60%)]	Loss: 1.529986
Train Epoch: 1 [36480/60000 (61%)]	Loss: 1.504250
Train Epoch: 1 [37120/60000 (62%)]	Loss: 1.529914
Train Epoch: 1 [37760/60000 (63%)]	Loss: 1.530004
Train Epoch: 1 [38400/60000 (64%)]	Loss: 1.502128
Train Epoch: 1 [39040/60000 (65%)]	Loss: 1.494294
Train Epoch: 1 [39680/60000 (66%)]	Loss: 1.524500
Train Epoch: 1 [40320/60000 (67%)]	Loss: 1.498563
Train Epoch: 1 [40960/60000 (68%)]	Loss: 1.504439
Train Epoch: 1 [41600/60000 (69%)]	Loss: 1.519205
Train Epoch: 1 [42240/60000 (70%)]	Loss: 1.509261
Train Epoch: 1 [42880/60000 (71%)]	Loss: 1.553779
Train Epoch: 1 [43520/60000 (72%)]	Loss: 1.533013
Train Epoch: 1 [44160/60000 (74%)]	Loss: 1.516619
Train Epoch: 1 [44800/60000 (75%)]	Loss: 1.552788
Train Epoch: 1 [45440/60000 (76%)]	Loss: 1.554292
Train Epoch: 1 [46080/60000 (77%)]	Loss: 1.548144
Train Epoch: 1 [46720/60000 (78%)]	Loss: 1.587070
Train Epoch: 1 [47360/60000 (79%)]	Loss: 1.528295
Train Epoch: 1 [48000/60000 (80%)]	Loss: 1.524065
Train Epoch: 1 [48640/60000 (81%)]	Loss: 1.496142
Train Epoch: 1 [49280/60000 (82%)]	Loss: 1.480355
Train Epoch: 1 [49920/60000 (83%)]	Loss: 1.519368
Train Epoch: 1 [50560/60000 (84%)]	Loss: 1.502452
Train Epoch: 1 [51200/60000 (85%)]	Loss: 1.509757
Train Epoch: 1 [51840/60000 (86%)]	Loss: 1.491970
Train Epoch: 1 [52480/60000 (87%)]	Loss: 1.481563
Train Epoch: 1 [53120/60000 (88%)]	Loss: 1.527318
Train Epoch: 1 [53760/60000 (90%)]	Loss: 1.478199
Train Epoch: 1 [54400/60000 (91%)]	Loss: 1.499248
Train Epoch: 1 [55040/60000 (92%)]	Loss: 1.497738
Train Epoch: 1 [55680/60000 (93%)]	Loss: 1.523645
Train Epoch: 1 [56320/60000 (94%)]	Loss: 1.566173
Train Epoch: 1 [56960/60000 (95%)]	Loss: 1.516700
Train Epoch: 1 [57600/60000 (96%)]	Loss: 1.518415
Train Epoch: 1 [58240/60000 (97%)]	Loss: 1.476856
Train Epoch: 1 [58880/60000 (98%)]	Loss: 1.478713
Train Epoch: 1 [59520/60000 (99%)]	Loss: 1.463038

Test set: Average loss: -0.9711, Accuracy: 9724/10000 (97%)

Train Epoch: 2 [0/60000 (0%)]	Loss: 1.482012
Train Epoch: 2 [640/60000 (1%)]	Loss: 1.484298
Train Epoch: 2 [1280/60000 (2%)]	Loss: 1.507437
Train Epoch: 2 [1920/60000 (3%)]	Loss: 1.493413
Train Epoch: 2 [2560/60000 (4%)]	Loss: 1.497554
Train Epoch: 2 [3200/60000 (5%)]	Loss: 1.479610
Train Epoch: 2 [3840/60000 (6%)]	Loss: 1.479470
Train Epoch: 2 [4480/60000 (7%)]	Loss: 1.478733
Train Epoch: 2 [5120/60000 (9%)]	Loss: 1.522738
Train Epoch: 2 [5760/60000 (10%)]	Loss: 1.486314
Train Epoch: 2 [6400/60000 (11%)]	Loss: 1.492833
Train Epoch: 2 [7040/60000 (12%)]	Loss: 1.508549
Train Epoch: 2 [7680/60000 (13%)]	Loss: 1.501037
Train Epoch: 2 [8320/60000 (14%)]	Loss: 1.478467
Train Epoch: 2 [8960/60000 (15%)]	Loss: 1.500617
Train Epoch: 2 [9600/60000 (16%)]	Loss: 1.501023
Train Epoch: 2 [10240/60000 (17%)]	Loss: 1.636965
Train Epoch: 2 [10880/60000 (18%)]	Loss: 1.461305
Train Epoch: 2 [11520/60000 (19%)]	Loss: 1.535370
Train Epoch: 2 [12160/60000 (20%)]	Loss: 1.540100
Train Epoch: 2 [12800/60000 (21%)]	Loss: 1.507719
Train Epoch: 2 [13440/60000 (22%)]	Loss: 1.505560
Train Epoch: 2 [14080/60000 (23%)]	Loss: 1.508344
Train Epoch: 2 [14720/60000 (25%)]	Loss: 1.533464
Train Epoch: 2 [15360/60000 (26%)]	Loss: 1.534683
Train Epoch: 2 [16000/60000 (27%)]	Loss: 1.507471
Train Epoch: 2 [16640/60000 (28%)]	Loss: 1.554044
Train Epoch: 2 [17280/60000 (29%)]	Loss: 1.475878
Train Epoch: 2 [17920/60000 (30%)]	Loss: 1.492778
Train Epoch: 2 [18560/60000 (31%)]	Loss: 1.499268
Train Epoch: 2 [19200/60000 (32%)]	Loss: 1.492610
Train Epoch: 2 [19840/60000 (33%)]	Loss: 1.535098
Train Epoch: 2 [20480/60000 (34%)]	Loss: 1.505886
Train Epoch: 2 [21120/60000 (35%)]	Loss: 1.499575
Train Epoch: 2 [21760/60000 (36%)]	Loss: 1.463617
Train Epoch: 2 [22400/60000 (37%)]	Loss: 1.475593
Train Epoch: 2 [23040/60000 (38%)]	Loss: 1.492989
Train Epoch: 2 [23680/60000 (39%)]	Loss: 1.503387
Train Epoch: 2 [24320/60000 (41%)]	Loss: 1.463551
Train Epoch: 2 [24960/60000 (42%)]	Loss: 1.479160
Train Epoch: 2 [25600/60000 (43%)]	Loss: 1.518775
Train Epoch: 2 [26240/60000 (44%)]	Loss: 1.487487
Train Epoch: 2 [26880/60000 (45%)]	Loss: 1.511166
Train Epoch: 2 [27520/60000 (46%)]	Loss: 1.515113
Train Epoch: 2 [28160/60000 (47%)]	Loss: 1.497111
Train Epoch: 2 [28800/60000 (48%)]	Loss: 1.528531
Train Epoch: 2 [29440/60000 (49%)]	Loss: 1.486960
Train Epoch: 2 [30080/60000 (50%)]	Loss: 1.492313
Train Epoch: 2 [30720/60000 (51%)]	Loss: 1.546178
Train Epoch: 2 [31360/60000 (52%)]	Loss: 1.505029
Train Epoch: 2 [32000/60000 (53%)]	Loss: 1.534682
Train Epoch: 2 [32640/60000 (54%)]	Loss: 1.504612
Train Epoch: 2 [33280/60000 (55%)]	Loss: 1.522709
Train Epoch: 2 [33920/60000 (57%)]	Loss: 1.461186
Train Epoch: 2 [34560/60000 (58%)]	Loss: 1.512066
Train Epoch: 2 [35200/60000 (59%)]	Loss: 1.533930
Train Epoch: 2 [35840/60000 (60%)]	Loss: 1.507137
Train Epoch: 2 [36480/60000 (61%)]	Loss: 1.511106
Train Epoch: 2 [37120/60000 (62%)]	Loss: 1.499031
Train Epoch: 2 [37760/60000 (63%)]	Loss: 1.500017
Train Epoch: 2 [38400/60000 (64%)]	Loss: 1.483651
Train Epoch: 2 [39040/60000 (65%)]	Loss: 1.469648
Train Epoch: 2 [39680/60000 (66%)]	Loss: 1.507555
Train Epoch: 2 [40320/60000 (67%)]	Loss: 1.476786
Train Epoch: 2 [40960/60000 (68%)]	Loss: 1.501535
Train Epoch: 2 [41600/60000 (69%)]	Loss: 1.497805
Train Epoch: 2 [42240/60000 (70%)]	Loss: 1.489715
Train Epoch: 2 [42880/60000 (71%)]	Loss: 1.535977
Train Epoch: 2 [43520/60000 (72%)]	Loss: 1.512499
Train Epoch: 2 [44160/60000 (74%)]	Loss: 1.477942
Train Epoch: 2 [44800/60000 (75%)]	Loss: 1.477015
Train Epoch: 2 [45440/60000 (76%)]	Loss: 1.519611
Train Epoch: 2 [46080/60000 (77%)]	Loss: 1.500612
Train Epoch: 2 [46720/60000 (78%)]	Loss: 1.493225
Train Epoch: 2 [47360/60000 (79%)]	Loss: 1.499400
Train Epoch: 2 [48000/60000 (80%)]	Loss: 1.497467
Train Epoch: 2 [48640/60000 (81%)]	Loss: 1.479307
Train Epoch: 2 [49280/60000 (82%)]	Loss: 1.477221
Train Epoch: 2 [49920/60000 (83%)]	Loss: 1.500800
Train Epoch: 2 [50560/60000 (84%)]	Loss: 1.524441
Train Epoch: 2 [51200/60000 (85%)]	Loss: 1.496813
Train Epoch: 2 [51840/60000 (86%)]	Loss: 1.504592
Train Epoch: 2 [52480/60000 (87%)]	Loss: 1.476948
Train Epoch: 2 [53120/60000 (88%)]	Loss: 1.503898
Train Epoch: 2 [53760/60000 (90%)]	Loss: 1.479251
Train Epoch: 2 [54400/60000 (91%)]	Loss: 1.462188
Train Epoch: 2 [55040/60000 (92%)]	Loss: 1.492595
Train Epoch: 2 [55680/60000 (93%)]	Loss: 1.492745
Train Epoch: 2 [56320/60000 (94%)]	Loss: 1.476812
Train Epoch: 2 [56960/60000 (95%)]	Loss: 1.463375
Train Epoch: 2 [57600/60000 (96%)]	Loss: 1.506456
Train Epoch: 2 [58240/60000 (97%)]	Loss: 1.462169
Train Epoch: 2 [58880/60000 (98%)]	Loss: 1.461197
Train Epoch: 2 [59520/60000 (99%)]	Loss: 1.461150

Test set: Average loss: -0.9804, Accuracy: 9804/10000 (98%)
if save_model:
    torch.save(model.state_dict(), "mnist_cnn_pytorch.ckpt")