Python Engineer

Free Python and Machine Learning Tutorials

Become A Patron and get exclusive content! Get access to ML From Scratch notebooks, join a private Slack channel, get priority response, and more! I really appreciate the support!

back to course overview

Softmax And Cross Entropy - PyTorch Beginner 11

14 Jan 2020

Learn all the basics you need to get started with this deep learning framework! In this part we learn about the softmax function and the cross entropy loss function. Softmax and cross entropy are popular functions used in neural nets, especially in multiclass classification problems. Learn the math behind these functions, and when and how to use them in PyTorch. Also learn differences between multiclass and binary classification problems.

All code from this course can be found on GitHub.

Softmax and Cross Entropy in PyTorch

import torch import torch.nn as nn import numpy as np # # -> 2.0 -> 0.65 # Linear -> 1.0 -> Softmax -> 0.25 -> CrossEntropy(y, y_hat) # -> 0.1 -> 0.1 # # scores(logits) probabilities # sum = 1.0 # # Softmax applies the exponential function to each element, and normalizes # by dividing by the sum of all these exponentials # -> squashes the output to be between 0 and 1 = probability # sum of all probabilities is 1 def softmax(x): return np.exp(x) / np.sum(np.exp(x), axis=0) x = np.array([2.0, 1.0, 0.1]) outputs = softmax(x) print('softmax numpy:', outputs) x = torch.tensor([2.0, 1.0, 0.1]) outputs = torch.softmax(x, dim=0) # along values along first axis print('softmax torch:', outputs) # Cross entropy # Cross-entropy loss, or log loss, measures the performance of a classification model # whose output is a probability value between 0 and 1. # -> loss increases as the predicted probability diverges from the actual label def cross_entropy(actual, predicted): EPS = 1e-15 predicted = np.clip(predicted, EPS, 1 - EPS) loss = -np.sum(actual * np.log(predicted)) return loss # / float(predicted.shape[0]) # y must be one hot encoded # if class 0: [1 0 0] # if class 1: [0 1 0] # if class 2: [0 0 1] Y = np.array([1, 0, 0]) Y_pred_good = np.array([0.7, 0.2, 0.1]) Y_pred_bad = np.array([0.1, 0.3, 0.6]) l1 = cross_entropy(Y, Y_pred_good) l2 = cross_entropy(Y, Y_pred_bad) print(f'Loss1 numpy: {l1:.4f}') print(f'Loss2 numpy: {l2:.4f}') # CrossEntropyLoss in PyTorch (applies Softmax) # nn.LogSoftmax + nn.NLLLoss # NLLLoss = negative log likelihood loss loss = nn.CrossEntropyLoss() # loss(input, target) # target is of size nSamples = 1 # each element has class label: 0, 1, or 2 # Y (=target) contains class labels, not one-hot Y = torch.tensor([0]) # input is of size nSamples x nClasses = 1 x 3 # y_pred (=input) must be raw, unnormalizes scores (logits) for each class, not softmax Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]]) Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]]) l1 = loss(Y_pred_good, Y) l2 = loss(Y_pred_bad, Y) print(f'PyTorch Loss1: {l1.item():.4f}') print(f'PyTorch Loss2: {l2.item():.4f}') # get predictions _, predictions1 = torch.max(Y_pred_good, 1) _, predictions2 = torch.max(Y_pred_bad, 1) print(f'Actual class: {Y.item()}, Y_pred1: {predictions1.item()}, Y_pred2: {predictions2.item()}') # allows batch loss for multiple samples # target is of size nBatch = 3 # each element has class label: 0, 1, or 2 Y = torch.tensor([2, 0, 1]) # input is of size nBatch x nClasses = 3 x 3 # Y_pred are logits (not softmax) Y_pred_good = torch.tensor( [[0.1, 0.2, 3.9], # predict class 2 [1.2, 0.1, 0.3], # predict class 0 [0.3, 2.2, 0.2]]) # predict class 1 Y_pred_bad = torch.tensor( [[0.9, 0.2, 0.1], [0.1, 0.3, 1.5], [1.2, 0.2, 0.5]]) l1 = loss(Y_pred_good, Y) l2 = loss(Y_pred_bad, Y) print(f'Batch Loss1: {l1.item():.4f}') print(f'Batch Loss2: {l2.item():.4f}') # get predictions _, predictions1 = torch.max(Y_pred_good, 1) _, predictions2 = torch.max(Y_pred_bad, 1) print(f'Actual class: {Y}, Y_pred1: {predictions1}, Y_pred2: {predictions2}') # Binary classification class NeuralNet1(nn.Module): def __init__(self, input_size, hidden_size): super(NeuralNet1, self).__init__() self.linear1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.linear2 = nn.Linear(hidden_size, 1) def forward(self, x): out = self.linear1(x) out = self.relu(out) out = self.linear2(out) # sigmoid at the end y_pred = torch.sigmoid(out) return y_pred model = NeuralNet1(input_size=28*28, hidden_size=5) criterion = nn.BCELoss() # Multiclass problem class NeuralNet2(nn.Module): def __init__(self, input_size, hidden_size, num_classes): super(NeuralNet2, self).__init__() self.linear1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.linear2 = nn.Linear(hidden_size, num_classes) def forward(self, x): out = self.linear1(x) out = self.relu(out) out = self.linear2(out) # no softmax at the end return out model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3) criterion = nn.CrossEntropyLoss() # (applies Softmax)