Back to course overview

Softmax And Cross Entropy - PyTorch Beginner 11

Learn all the basics you need to get started with this deep learning framework! In this part we learn about the softmax function and the cross entropy loss function. Softmax and cross entropy are popular functions used in neural nets, especially in multiclass classification problems. Learn the math behind these functions, and when and how to use them in PyTorch. Also learn differences between multiclass and binary classification problems.

All code from this course can be found on GitHub.

Softmax and Cross Entropy in PyTorch

import torch import torch.nn as nn import numpy as np # # -> 2.0 -> 0.65 # Linear -> 1.0 -> Softmax -> 0.25 -> CrossEntropy(y, y_hat) # -> 0.1 -> 0.1 # # scores(logits) probabilities # sum = 1.0 # # Softmax applies the exponential function to each element, and normalizes # by dividing by the sum of all these exponentials # -> squashes the output to be between 0 and 1 = probability # sum of all probabilities is 1 def softmax(x): return np.exp(x) / np.sum(np.exp(x), axis=0) x = np.array([2.0, 1.0, 0.1]) outputs = softmax(x) print('softmax numpy:', outputs) x = torch.tensor([2.0, 1.0, 0.1]) outputs = torch.softmax(x, dim=0) # along values along first axis print('softmax torch:', outputs) # Cross entropy # Cross-entropy loss, or log loss, measures the performance of a classification model # whose output is a probability value between 0 and 1. # -> loss increases as the predicted probability diverges from the actual label def cross_entropy(actual, predicted): EPS = 1e-15 predicted = np.clip(predicted, EPS, 1 - EPS) loss = -np.sum(actual * np.log(predicted)) return loss # / float(predicted.shape[0]) # y must be one hot encoded # if class 0: [1 0 0] # if class 1: [0 1 0] # if class 2: [0 0 1] Y = np.array([1, 0, 0]) Y_pred_good = np.array([0.7, 0.2, 0.1]) Y_pred_bad = np.array([0.1, 0.3, 0.6]) l1 = cross_entropy(Y, Y_pred_good) l2 = cross_entropy(Y, Y_pred_bad) print(f'Loss1 numpy: {l1:.4f}') print(f'Loss2 numpy: {l2:.4f}') # CrossEntropyLoss in PyTorch (applies Softmax) # nn.LogSoftmax + nn.NLLLoss # NLLLoss = negative log likelihood loss loss = nn.CrossEntropyLoss() # loss(input, target) # target is of size nSamples = 1 # each element has class label: 0, 1, or 2 # Y (=target) contains class labels, not one-hot Y = torch.tensor([0]) # input is of size nSamples x nClasses = 1 x 3 # y_pred (=input) must be raw, unnormalizes scores (logits) for each class, not softmax Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]]) Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]]) l1 = loss(Y_pred_good, Y) l2 = loss(Y_pred_bad, Y) print(f'PyTorch Loss1: {l1.item():.4f}') print(f'PyTorch Loss2: {l2.item():.4f}') # get predictions _, predictions1 = torch.max(Y_pred_good, 1) _, predictions2 = torch.max(Y_pred_bad, 1) print(f'Actual class: {Y.item()}, Y_pred1: {predictions1.item()}, Y_pred2: {predictions2.item()}') # allows batch loss for multiple samples # target is of size nBatch = 3 # each element has class label: 0, 1, or 2 Y = torch.tensor([2, 0, 1]) # input is of size nBatch x nClasses = 3 x 3 # Y_pred are logits (not softmax) Y_pred_good = torch.tensor( [[0.1, 0.2, 3.9], # predict class 2 [1.2, 0.1, 0.3], # predict class 0 [0.3, 2.2, 0.2]]) # predict class 1 Y_pred_bad = torch.tensor( [[0.9, 0.2, 0.1], [0.1, 0.3, 1.5], [1.2, 0.2, 0.5]]) l1 = loss(Y_pred_good, Y) l2 = loss(Y_pred_bad, Y) print(f'Batch Loss1: {l1.item():.4f}') print(f'Batch Loss2: {l2.item():.4f}') # get predictions _, predictions1 = torch.max(Y_pred_good, 1) _, predictions2 = torch.max(Y_pred_bad, 1) print(f'Actual class: {Y}, Y_pred1: {predictions1}, Y_pred2: {predictions2}') # Binary classification class NeuralNet1(nn.Module): def __init__(self, input_size, hidden_size): super(NeuralNet1, self).__init__() self.linear1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.linear2 = nn.Linear(hidden_size, 1) def forward(self, x): out = self.linear1(x) out = self.relu(out) out = self.linear2(out) # sigmoid at the end y_pred = torch.sigmoid(out) return y_pred model = NeuralNet1(input_size=28*28, hidden_size=5) criterion = nn.BCELoss() # Multiclass problem class NeuralNet2(nn.Module): def __init__(self, input_size, hidden_size, num_classes): super(NeuralNet2, self).__init__() self.linear1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.linear2 = nn.Linear(hidden_size, num_classes) def forward(self, x): out = self.linear1(x) out = self.relu(out) out = self.linear2(out) # no softmax at the end return out model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3) criterion = nn.CrossEntropyLoss() # (applies Softmax)

FREE VS Code / PyCharm Extensions I Use

🪁 Code faster with Kite, AI-powered autocomplete: Link *

✅ Write cleaner code with Sourcery, instant refactoring suggestions: Link *

* These are affiliate links. By clicking on it you will not have any additional costs, instead you will support me and my project. Thank you! 🙏

Check out my Courses