MNIST with PyTorch
Contents
MNIST with PyTorch#
The following code example is based on Mikhail Klassen’s article Tensorflow vs. PyTorch by example
First, you need to install PyTorch in a new Anaconda environment.
Setup#
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import ExponentialLR
# Get CPU or GPU device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device)
Define settings#
# Random seed for reproducibility
seed = 42
torch.manual_seed(seed)
# Save the model at the end?
save_model = False
# Batch sizes for training and testing
batch_size = 64
test_batch_size = 14
# Training epochs (usually set to 10)
n_epochs = 2
# Learning rate
learning_rate = 1.0
# Decay rate for adjusting the learning rate
gamma = 0.7
# How many batches before logging training status
log_interval = 10
# Number of target classes in the MNIST data
num_classes = 10
train_kwargs = {'batch_size': batch_size}
test_kwargs = {'batch_size': test_batch_size}
# CUDA settings
if torch.cuda.is_available():
cuda_kwargs = {'num_workers': 1,
'pin_memory': True,
'shuffle': True}
train_kwargs.update(cuda_kwargs)
test_kwargs.update(cuda_kwargs)
Data preparation#
# The scaled mean and standard deviation of the MNIST dataset (precalculated)
data_mean = 0.1307
data_std = 0.3081
# Convert input images to tensors and normalize
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((data_mean,), (data_std,))
])
# Get the MNIST data from torchvision
dataset1 = datasets.MNIST('../data', train=True, download=True,
transform=transform)
dataset2 = datasets.MNIST('../data', train=False,
transform=transform)
# Define the data loaders that will handle fetching of data
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
Model architecture#
# Define the architecture of the neural network
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding='valid')
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding='valid')
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, num_classes)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.softmax(x, dim=1)
return output
Model training#
def train(model, device, train_loader, optimizer, epoch, log_interval):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
if batch_idx % log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
# sum up batch loss
test_loss += F.nll_loss(output, target, reduction='sum').item()
# get the index of the max log-probability
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
# Send the model to the device (CPU or GPU)
model = Net().to(device)
# Define the optimizer to user for gradient descent
optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)
# Shrinks the learning rate by gamma every step_size
scheduler = ExponentialLR(optimizer, gamma=gamma)
# Train the model
for epoch in range(1, n_epochs + 1):
train(model, device, train_loader, optimizer, epoch, log_interval)
test(model, device, test_loader)
scheduler.step()
Train Epoch: 1 [0/60000 (0%)] Loss: 2.303437
Train Epoch: 1 [640/60000 (1%)] Loss: 2.190133
Train Epoch: 1 [1280/60000 (2%)] Loss: 1.982814
Train Epoch: 1 [1920/60000 (3%)] Loss: 1.953468
Train Epoch: 1 [2560/60000 (4%)] Loss: 1.761265
Train Epoch: 1 [3200/60000 (5%)] Loss: 1.802745
Train Epoch: 1 [3840/60000 (6%)] Loss: 1.733097
Train Epoch: 1 [4480/60000 (7%)] Loss: 1.738909
Train Epoch: 1 [5120/60000 (9%)] Loss: 1.756195
Train Epoch: 1 [5760/60000 (10%)] Loss: 1.757859
Train Epoch: 1 [6400/60000 (11%)] Loss: 1.696007
Train Epoch: 1 [7040/60000 (12%)] Loss: 1.692416
Train Epoch: 1 [7680/60000 (13%)] Loss: 1.700961
Train Epoch: 1 [8320/60000 (14%)] Loss: 1.692403
Train Epoch: 1 [8960/60000 (15%)] Loss: 1.642780
Train Epoch: 1 [9600/60000 (16%)] Loss: 1.682384
Train Epoch: 1 [10240/60000 (17%)] Loss: 1.699099
Train Epoch: 1 [10880/60000 (18%)] Loss: 1.596182
Train Epoch: 1 [11520/60000 (19%)] Loss: 1.640822
Train Epoch: 1 [12160/60000 (20%)] Loss: 1.612970
Train Epoch: 1 [12800/60000 (21%)] Loss: 1.628855
Train Epoch: 1 [13440/60000 (22%)] Loss: 1.544742
Train Epoch: 1 [14080/60000 (23%)] Loss: 1.615298
Train Epoch: 1 [14720/60000 (25%)] Loss: 1.643510
Train Epoch: 1 [15360/60000 (26%)] Loss: 1.599172
Train Epoch: 1 [16000/60000 (27%)] Loss: 1.678295
Train Epoch: 1 [16640/60000 (28%)] Loss: 1.624110
Train Epoch: 1 [17280/60000 (29%)] Loss: 1.551333
Train Epoch: 1 [17920/60000 (30%)] Loss: 1.505503
Train Epoch: 1 [18560/60000 (31%)] Loss: 1.502764
Train Epoch: 1 [19200/60000 (32%)] Loss: 1.528274
Train Epoch: 1 [19840/60000 (33%)] Loss: 1.507777
Train Epoch: 1 [20480/60000 (34%)] Loss: 1.532025
Train Epoch: 1 [21120/60000 (35%)] Loss: 1.522650
Train Epoch: 1 [21760/60000 (36%)] Loss: 1.484702
Train Epoch: 1 [22400/60000 (37%)] Loss: 1.539056
Train Epoch: 1 [23040/60000 (38%)] Loss: 1.553571
Train Epoch: 1 [23680/60000 (39%)] Loss: 1.607744
Train Epoch: 1 [24320/60000 (41%)] Loss: 1.488518
Train Epoch: 1 [24960/60000 (42%)] Loss: 1.559851
Train Epoch: 1 [25600/60000 (43%)] Loss: 1.543421
Train Epoch: 1 [26240/60000 (44%)] Loss: 1.491874
Train Epoch: 1 [26880/60000 (45%)] Loss: 1.533729
Train Epoch: 1 [27520/60000 (46%)] Loss: 1.508406
Train Epoch: 1 [28160/60000 (47%)] Loss: 1.540629
Train Epoch: 1 [28800/60000 (48%)] Loss: 1.538574
Train Epoch: 1 [29440/60000 (49%)] Loss: 1.521769
Train Epoch: 1 [30080/60000 (50%)] Loss: 1.553346
Train Epoch: 1 [30720/60000 (51%)] Loss: 1.567774
Train Epoch: 1 [31360/60000 (52%)] Loss: 1.556850
Train Epoch: 1 [32000/60000 (53%)] Loss: 1.536666
Train Epoch: 1 [32640/60000 (54%)] Loss: 1.498510
Train Epoch: 1 [33280/60000 (55%)] Loss: 1.551042
Train Epoch: 1 [33920/60000 (57%)] Loss: 1.528805
Train Epoch: 1 [34560/60000 (58%)] Loss: 1.543514
Train Epoch: 1 [35200/60000 (59%)] Loss: 1.544061
Train Epoch: 1 [35840/60000 (60%)] Loss: 1.529986
Train Epoch: 1 [36480/60000 (61%)] Loss: 1.504250
Train Epoch: 1 [37120/60000 (62%)] Loss: 1.529914
Train Epoch: 1 [37760/60000 (63%)] Loss: 1.530004
Train Epoch: 1 [38400/60000 (64%)] Loss: 1.502128
Train Epoch: 1 [39040/60000 (65%)] Loss: 1.494294
Train Epoch: 1 [39680/60000 (66%)] Loss: 1.524500
Train Epoch: 1 [40320/60000 (67%)] Loss: 1.498563
Train Epoch: 1 [40960/60000 (68%)] Loss: 1.504439
Train Epoch: 1 [41600/60000 (69%)] Loss: 1.519205
Train Epoch: 1 [42240/60000 (70%)] Loss: 1.509261
Train Epoch: 1 [42880/60000 (71%)] Loss: 1.553779
Train Epoch: 1 [43520/60000 (72%)] Loss: 1.533013
Train Epoch: 1 [44160/60000 (74%)] Loss: 1.516619
Train Epoch: 1 [44800/60000 (75%)] Loss: 1.552788
Train Epoch: 1 [45440/60000 (76%)] Loss: 1.554292
Train Epoch: 1 [46080/60000 (77%)] Loss: 1.548144
Train Epoch: 1 [46720/60000 (78%)] Loss: 1.587070
Train Epoch: 1 [47360/60000 (79%)] Loss: 1.528295
Train Epoch: 1 [48000/60000 (80%)] Loss: 1.524065
Train Epoch: 1 [48640/60000 (81%)] Loss: 1.496142
Train Epoch: 1 [49280/60000 (82%)] Loss: 1.480355
Train Epoch: 1 [49920/60000 (83%)] Loss: 1.519368
Train Epoch: 1 [50560/60000 (84%)] Loss: 1.502452
Train Epoch: 1 [51200/60000 (85%)] Loss: 1.509757
Train Epoch: 1 [51840/60000 (86%)] Loss: 1.491970
Train Epoch: 1 [52480/60000 (87%)] Loss: 1.481563
Train Epoch: 1 [53120/60000 (88%)] Loss: 1.527318
Train Epoch: 1 [53760/60000 (90%)] Loss: 1.478199
Train Epoch: 1 [54400/60000 (91%)] Loss: 1.499248
Train Epoch: 1 [55040/60000 (92%)] Loss: 1.497738
Train Epoch: 1 [55680/60000 (93%)] Loss: 1.523645
Train Epoch: 1 [56320/60000 (94%)] Loss: 1.566173
Train Epoch: 1 [56960/60000 (95%)] Loss: 1.516700
Train Epoch: 1 [57600/60000 (96%)] Loss: 1.518415
Train Epoch: 1 [58240/60000 (97%)] Loss: 1.476856
Train Epoch: 1 [58880/60000 (98%)] Loss: 1.478713
Train Epoch: 1 [59520/60000 (99%)] Loss: 1.463038
Test set: Average loss: -0.9711, Accuracy: 9724/10000 (97%)
Train Epoch: 2 [0/60000 (0%)] Loss: 1.482012
Train Epoch: 2 [640/60000 (1%)] Loss: 1.484298
Train Epoch: 2 [1280/60000 (2%)] Loss: 1.507437
Train Epoch: 2 [1920/60000 (3%)] Loss: 1.493413
Train Epoch: 2 [2560/60000 (4%)] Loss: 1.497554
Train Epoch: 2 [3200/60000 (5%)] Loss: 1.479610
Train Epoch: 2 [3840/60000 (6%)] Loss: 1.479470
Train Epoch: 2 [4480/60000 (7%)] Loss: 1.478733
Train Epoch: 2 [5120/60000 (9%)] Loss: 1.522738
Train Epoch: 2 [5760/60000 (10%)] Loss: 1.486314
Train Epoch: 2 [6400/60000 (11%)] Loss: 1.492833
Train Epoch: 2 [7040/60000 (12%)] Loss: 1.508549
Train Epoch: 2 [7680/60000 (13%)] Loss: 1.501037
Train Epoch: 2 [8320/60000 (14%)] Loss: 1.478467
Train Epoch: 2 [8960/60000 (15%)] Loss: 1.500617
Train Epoch: 2 [9600/60000 (16%)] Loss: 1.501023
Train Epoch: 2 [10240/60000 (17%)] Loss: 1.636965
Train Epoch: 2 [10880/60000 (18%)] Loss: 1.461305
Train Epoch: 2 [11520/60000 (19%)] Loss: 1.535370
Train Epoch: 2 [12160/60000 (20%)] Loss: 1.540100
Train Epoch: 2 [12800/60000 (21%)] Loss: 1.507719
Train Epoch: 2 [13440/60000 (22%)] Loss: 1.505560
Train Epoch: 2 [14080/60000 (23%)] Loss: 1.508344
Train Epoch: 2 [14720/60000 (25%)] Loss: 1.533464
Train Epoch: 2 [15360/60000 (26%)] Loss: 1.534683
Train Epoch: 2 [16000/60000 (27%)] Loss: 1.507471
Train Epoch: 2 [16640/60000 (28%)] Loss: 1.554044
Train Epoch: 2 [17280/60000 (29%)] Loss: 1.475878
Train Epoch: 2 [17920/60000 (30%)] Loss: 1.492778
Train Epoch: 2 [18560/60000 (31%)] Loss: 1.499268
Train Epoch: 2 [19200/60000 (32%)] Loss: 1.492610
Train Epoch: 2 [19840/60000 (33%)] Loss: 1.535098
Train Epoch: 2 [20480/60000 (34%)] Loss: 1.505886
Train Epoch: 2 [21120/60000 (35%)] Loss: 1.499575
Train Epoch: 2 [21760/60000 (36%)] Loss: 1.463617
Train Epoch: 2 [22400/60000 (37%)] Loss: 1.475593
Train Epoch: 2 [23040/60000 (38%)] Loss: 1.492989
Train Epoch: 2 [23680/60000 (39%)] Loss: 1.503387
Train Epoch: 2 [24320/60000 (41%)] Loss: 1.463551
Train Epoch: 2 [24960/60000 (42%)] Loss: 1.479160
Train Epoch: 2 [25600/60000 (43%)] Loss: 1.518775
Train Epoch: 2 [26240/60000 (44%)] Loss: 1.487487
Train Epoch: 2 [26880/60000 (45%)] Loss: 1.511166
Train Epoch: 2 [27520/60000 (46%)] Loss: 1.515113
Train Epoch: 2 [28160/60000 (47%)] Loss: 1.497111
Train Epoch: 2 [28800/60000 (48%)] Loss: 1.528531
Train Epoch: 2 [29440/60000 (49%)] Loss: 1.486960
Train Epoch: 2 [30080/60000 (50%)] Loss: 1.492313
Train Epoch: 2 [30720/60000 (51%)] Loss: 1.546178
Train Epoch: 2 [31360/60000 (52%)] Loss: 1.505029
Train Epoch: 2 [32000/60000 (53%)] Loss: 1.534682
Train Epoch: 2 [32640/60000 (54%)] Loss: 1.504612
Train Epoch: 2 [33280/60000 (55%)] Loss: 1.522709
Train Epoch: 2 [33920/60000 (57%)] Loss: 1.461186
Train Epoch: 2 [34560/60000 (58%)] Loss: 1.512066
Train Epoch: 2 [35200/60000 (59%)] Loss: 1.533930
Train Epoch: 2 [35840/60000 (60%)] Loss: 1.507137
Train Epoch: 2 [36480/60000 (61%)] Loss: 1.511106
Train Epoch: 2 [37120/60000 (62%)] Loss: 1.499031
Train Epoch: 2 [37760/60000 (63%)] Loss: 1.500017
Train Epoch: 2 [38400/60000 (64%)] Loss: 1.483651
Train Epoch: 2 [39040/60000 (65%)] Loss: 1.469648
Train Epoch: 2 [39680/60000 (66%)] Loss: 1.507555
Train Epoch: 2 [40320/60000 (67%)] Loss: 1.476786
Train Epoch: 2 [40960/60000 (68%)] Loss: 1.501535
Train Epoch: 2 [41600/60000 (69%)] Loss: 1.497805
Train Epoch: 2 [42240/60000 (70%)] Loss: 1.489715
Train Epoch: 2 [42880/60000 (71%)] Loss: 1.535977
Train Epoch: 2 [43520/60000 (72%)] Loss: 1.512499
Train Epoch: 2 [44160/60000 (74%)] Loss: 1.477942
Train Epoch: 2 [44800/60000 (75%)] Loss: 1.477015
Train Epoch: 2 [45440/60000 (76%)] Loss: 1.519611
Train Epoch: 2 [46080/60000 (77%)] Loss: 1.500612
Train Epoch: 2 [46720/60000 (78%)] Loss: 1.493225
Train Epoch: 2 [47360/60000 (79%)] Loss: 1.499400
Train Epoch: 2 [48000/60000 (80%)] Loss: 1.497467
Train Epoch: 2 [48640/60000 (81%)] Loss: 1.479307
Train Epoch: 2 [49280/60000 (82%)] Loss: 1.477221
Train Epoch: 2 [49920/60000 (83%)] Loss: 1.500800
Train Epoch: 2 [50560/60000 (84%)] Loss: 1.524441
Train Epoch: 2 [51200/60000 (85%)] Loss: 1.496813
Train Epoch: 2 [51840/60000 (86%)] Loss: 1.504592
Train Epoch: 2 [52480/60000 (87%)] Loss: 1.476948
Train Epoch: 2 [53120/60000 (88%)] Loss: 1.503898
Train Epoch: 2 [53760/60000 (90%)] Loss: 1.479251
Train Epoch: 2 [54400/60000 (91%)] Loss: 1.462188
Train Epoch: 2 [55040/60000 (92%)] Loss: 1.492595
Train Epoch: 2 [55680/60000 (93%)] Loss: 1.492745
Train Epoch: 2 [56320/60000 (94%)] Loss: 1.476812
Train Epoch: 2 [56960/60000 (95%)] Loss: 1.463375
Train Epoch: 2 [57600/60000 (96%)] Loss: 1.506456
Train Epoch: 2 [58240/60000 (97%)] Loss: 1.462169
Train Epoch: 2 [58880/60000 (98%)] Loss: 1.461197
Train Epoch: 2 [59520/60000 (99%)] Loss: 1.461150
Test set: Average loss: -0.9804, Accuracy: 9804/10000 (98%)
if save_model:
torch.save(model.state_dict(), "mnist_cnn_pytorch.ckpt")