# To install Pytorch on your machine
#!pip install torch
import numpy as np
# install Pytorch in Python
import torch
import torch.nn as nn
from torchviz import make_dot
Tensor data structure is almost exactly like numpy arrays. The only difference is tensors can also be loaded onto GPUs.
data = [[1, 2],[3, 4]]
np_array = np.array(data)
np_array
array([[1, 2], [3, 4]])
tensor = torch.from_numpy(np_array)
tensor
tensor([[1, 2], [3, 4]])
tensor.shape
torch.Size([2, 2])
# Check which device it's on
tensor.device
device(type='cpu')
# Cast back into numpy array
np_array_back = tensor.detach().numpy() #.detach() is necessary if its on a GPU
np_array_back
array([[1, 2], [3, 4]])
# Using the example from Computation graph #1 yesterday
# Input data
xyz = torch.tensor([-2.0, 5.0, -4.0], requires_grad=True)
#requires_grad=True tells Pytorch that we want to backprop thru these variables
# forward pass calcuates the function
# f = (x+y)*z
f = (xyz[0]+xyz[1])*xyz[2]
f
tensor(-12., grad_fn=<MulBackward0>)
f.backward() #Tell Pytorch to do the backwards pass through the computation graph
# Partial derivatives of f with respect to each element
# in vector xyz
print(xyz.grad)
tensor([-4., -4., 3.])
# Slight tweak on computation graph #1
xyz = torch.tensor([-2.0, 5.0, -4.0, 1.0], requires_grad=True)
f2 = 3*(xyz[0]+xyz[1])*xyz[2]
f2
tensor(-36., grad_fn=<MulBackward0>)
f2.backward()
print(xyz.grad)
tensor([-12., -12., 9., 0.])
# Note: visualization may only work for Macs
#!brew install graphviz
# # Visualize the computation graph
# # Blue is the shape of the input tensor, xyz
# # Green is the output
# make_dot(f2, params={'xyz': xyz})
# Same toy data as HW3
NUM_CLASSES = 2 #let's do binary logistic regression again
X_train = torch.tensor([[1.0, 0.0, 0.0],
[0.0, 1.0, 1.0],
[1.0, 1.0, 0.0],
[0.0, 0.0, 1.0]])
Y_train = torch.tensor([0, 1, 0, 1])
num_examples = X_train.shape[0]
num_words = X_train.shape[1]
assert X_train.shape[0] == Y_train.shape[0]
# Create a linear layer
theta = nn.Linear(num_words, NUM_CLASSES)
out = theta(X_train)
out.shape
torch.Size([4, 2])
# Logistic function via log sigmoid (log for numerical stability)
log_softmax = nn.LogSoftmax(dim=1)
log_probs = log_softmax(out)
# Get back to numpy
log_probs_numpy = log_probs.detach().numpy()
log_probs_numpy
array([[-1.941936 , -0.15481459], [-0.5606183 , -0.84596276], [-1.4656038 , -0.26258427], [-0.8536415 , -0.5548843 ]], dtype=float32)
# How do we get probabilities out?
np.exp(log_probs_numpy)
array([[0.143426 , 0.856574 ], [0.570856 , 0.429144 ], [0.23093851, 0.76906157], [0.42586133, 0.5741387 ]], dtype=float32)
class BinaryLogisticRegressionModel(nn.Module):
"""
Pytorch implementation for binary logistic regression
"""
def __init__(self, num_words, num_classes):
super().__init__()
# Create the network architecture ("stack the legos")
self.theta = nn.Linear(num_words, num_classes)
self.log_softmax = nn.LogSoftmax(dim=1)
def forward(self, X):
out = self.theta(X)
log_probs = self.log_softmax(out)
return log_probs
def train_model(self, X, Y, loss_fn, optimizer, num_iterations):
"""
Training our binary logistic regression model
Note: Here, we are not using mini-batches, we are going through the entire
dataset in every iteration (not recommended beyond toy examples)
"""
self.train() # tells nn.Module its in training mode
# (important when we get to things like dropout)
for t in range(num_iterations):
# Forward pass
pred = self.forward(X)
loss = loss_fn(pred, Y)
#Backprop
optimizer.zero_grad() # clears the gradients from the previous iteration
# this step is important because otherwise Pytorch will
# *accumulate* gradients for all itereations (all backwards passes)
loss.backward() # calculate gradients from forward step
optimizer.step() # gradient descent update equation
loss_value = loss.item() # call .item() to detach from the tensor
print(f"Iteration={t}, Loss={loss_value}")
def predict(self, X):
self.eval() # tells nn.Module its NOT in training mode
# (important when we get to things like dropout)
pred_log_probs = self.forward(X)
log_pred_pos_class = pred_log_probs[:,1].detach().numpy() #get only the positive class
pred_pos_class = np.exp(log_pred_pos_class) #exp to undo the log
# decision threshold
y_pred = np.zeros(X.shape[0])
y_pred[pred_pos_class>= 0.5] = 1
return y_pred, pred_pos_class
LEARNING_RATE = 1e-3
NUMBER_ITERATIONS = 10
loss_fn= nn.NLLLoss() #choose our loss function
# Wouldn't expect loss to imporove dramatically each iteration b/c we're not
# using mini-batches here
model = BinaryLogisticRegressionModel(num_words, NUM_CLASSES) #initalize
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE) #stochastic gradient descent
model.train_model(X_train, Y_train, loss_fn, optimizer, NUMBER_ITERATIONS)
Iteration=0, Loss=0.7447301745414734 Iteration=1, Loss=0.744430661201477 Iteration=2, Loss=0.7441313862800598 Iteration=3, Loss=0.7438322901725769 Iteration=4, Loss=0.7435333728790283 Iteration=5, Loss=0.7432346343994141 Iteration=6, Loss=0.7429361343383789 Iteration=7, Loss=0.7426378130912781 Iteration=8, Loss=0.742339551448822 Iteration=9, Loss=0.7420416474342346
# Inference time (predict on a single example)
X_test = torch.tensor([[1.0, 0.0, 0.0]])
y_pred, pred_pos_class = model.predict(X_test)
pred_pos_class
array([0.42864218], dtype=float32)
y_pred
array([0.])
# print(f"Model structure: {model}\n")
# for name, param in model.named_parameters():
# print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")
# make_dot(model.forward(X_train))
#Addmm is matrix multiplication (in the linear layer)
# Blue: thetas (weight and bias term)
# Green: Loss function output (num_examples, num_classes)
class BasicFeedForwardModel(nn.Module):
"""
Pytorch implementation for single hidden layer deep learning model
"""
def __init__(self, num_words, num_classes, hidden_dim1):
super().__init__()
# Create the network architecture ("stack the legos")
###### UPDATE FROM LOGREG ######
self.hidden1 = nn.Linear(num_words, hidden_dim1)
self.theta = nn.Linear(hidden_dim1, num_classes)
#################################
self.log_softmax = nn.LogSoftmax(dim=1)
def forward(self, X):
###### UPDATE FROM LOGREG ######
hid1 = self.hidden1(X)
hid1 = nn.functional.relu(hid1)
out = self.theta(hid1)
#################################
log_probs = self.log_softmax(out)
return log_probs
def train_model(self, X, Y, loss_fn, optimizer, num_iterations):
"""
Training our binary logistic regression model
Note: Here, we are not using mini-batches, we are going through the entire
dataset in every iteration (not recommended beyond toy examples)
"""
self.train() # tells nn.Module its in training mode
# (important when we get to things like dropout)
for t in range(num_iterations):
# Forward pass
pred = self.forward(X)
loss = loss_fn(pred, Y)
#Backprop
optimizer.zero_grad() # clears the gradients from the previous iteration
# this step is important because otherwise Pytorch will
# *accumulate* gradients for all itereations (all backwards passes)
loss.backward() # calculate gradients from forward step
optimizer.step() # gradient descent update equation
loss_value = loss.item() # call .item() to detach from the tensor
print(f"Iteration={t}, Loss={loss_value}")
def predict(self, X):
self.eval() # tells nn.Module its NOT in training mode
# (important when we get to things like dropout)
pred_log_probs = self.forward(X)
log_pred_pos_class = pred_log_probs[:,1].detach().numpy() #get only the positive class
pred_pos_class = np.exp(log_pred_pos_class) #exp to undo the log
# decision threshold
y_pred = np.zeros(X.shape[0])
y_pred[pred_pos_class>= 0.5] = 1
return y_pred, pred_pos_class
HIDDEN_DIM = 5
model = BasicFeedForwardModel(num_words, NUM_CLASSES, HIDDEN_DIM) #initalize
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE) #stochastic gradient descent
model.train_model(X_train, Y_train, loss_fn, optimizer, NUMBER_ITERATIONS)
Iteration=0, Loss=0.704086184501648 Iteration=1, Loss=0.7040359377861023 Iteration=2, Loss=0.703985869884491 Iteration=3, Loss=0.7039358019828796 Iteration=4, Loss=0.7038857340812683 Iteration=5, Loss=0.7038357257843018 Iteration=6, Loss=0.7037858963012695 Iteration=7, Loss=0.7037360072135925 Iteration=8, Loss=0.7036861777305603 Iteration=9, Loss=0.7036364674568176