Python Engineer

Free Python and Machine Learning Tutorials

Become A Patron and get exclusive content! Get access to ML From Scratch notebooks, join a private Slack channel, get priority response, and more! I really appreciate the support!

back to course overview

Transfer Learning - PyTorch Beginner 15

12 Feb 2020

Learn all the basics you need to get started with this deep learning framework! In this part we will learn about transfer learning and how this can be implemented in PyTorch.

We will learn: - What is Transfer Learning - Use the pretrained ResNet-18 model - Apply transfer learning to classify ants and bees - Exchange the last fully connected layer - Try 2 methods: Finetune the whole network or train only the last layer - Evaluate the results

All code from this course can be found on GitHub.

Transfer Learning in PyTorch

The dataset can be downloaded from the official PyTorch site here

import torch import torch.nn as nn import torch.optim as optim from torch.optim import lr_scheduler import numpy as np import torchvision from torchvision import datasets, models, transforms import matplotlib.pyplot as plt import time import os import copy mean = np.array([0.5, 0.5, 0.5]) std = np.array([0.25, 0.25, 0.25]) data_transforms = { 'train': transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std) ]), 'val': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean, std) ]), } data_dir = 'data/hymenoptera_data' image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']} dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in ['train', 'val']} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} class_names = image_datasets['train'].classes device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(class_names) def imshow(inp, title): """Imshow for Tensor.""" inp = inp.numpy().transpose((1, 2, 0)) inp = std * inp + mean inp = np.clip(inp, 0, 1) plt.imshow(inp) plt.title(title) plt.show() # Get a batch of training data inputs, classes = next(iter(dataloaders['train'])) # Make a grid from batch out = torchvision.utils.make_grid(inputs) imshow(out, title=[class_names[x] for x in classes]) def train_model(model, criterion, optimizer, scheduler, num_epochs=25): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. for inputs, labels in dataloaders[phase]: inputs = inputs.to(device) labels = labels.to(device) # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': optimizer.zero_grad() loss.backward() optimizer.step() # statistics running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) if phase == 'train': scheduler.step() epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects.double() / dataset_sizes[phase] print('{} Loss: {:.4f} Acc: {:.4f}'.format( phase, epoch_loss, epoch_acc)) # deep copy the model if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) # load best model weights model.load_state_dict(best_model_wts) return model #### Finetuning the convnet #### # Load a pretrained model and reset final fully connected layer. model = models.resnet18(pretrained=True) num_ftrs = model.fc.in_features # Here the size of each output sample is set to 2. # Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)). model.fc = nn.Linear(num_ftrs, 2) model = model.to(device) criterion = nn.CrossEntropyLoss() # Observe that all parameters are being optimized optimizer = optim.SGD(model.parameters(), lr=0.001) # StepLR Decays the learning rate of each parameter group by gamma every step_size epochs # Decay LR by a factor of 0.1 every 7 epochs # Learning rate scheduling should be applied after optimizer’s update # e.g., you should write your code this way: # for epoch in range(100): # train(...) # validate(...) # scheduler.step() step_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=25) #### ConvNet as fixed feature extractor #### # Here, we need to freeze all the network except the final layer. # We need to set requires_grad == False to freeze the parameters so that the gradients are not computed in backward() model_conv = torchvision.models.resnet18(pretrained=True) for param in model_conv.parameters(): param.requires_grad = False # Parameters of newly constructed modules have requires_grad=True by default num_ftrs = model_conv.fc.in_features model_conv.fc = nn.Linear(num_ftrs, 2) model_conv = model_conv.to(device) criterion = nn.CrossEntropyLoss() # Observe that only parameters of final layer are being optimized as # opposed to before. optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1) model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=25)