Logistic Regression with SGD¶
Exploring how another one of the most basic machine learning models can be implemented using PyTorch.
Imports¶
In [1]:
Copied!
from typing import Sequence, Tuple
import torch
from torch.utils.data import Dataset, DataLoader
from typing import Sequence, Tuple
import torch
from torch.utils.data import Dataset, DataLoader
Configuration¶
In [2]:
Copied!
torch.manual_seed(1)
torch.manual_seed(1)
Out[2]:
<torch._C.Generator at 0x105d1e390>
Create Dataset¶
Start by creating a dataset and dataloader for the task.
In [3]:
Copied!
class LinearlySeperableData(Dataset):
def __init__(self, b: float, w: float, sigma: float = 0.1):
self.w = torch.tensor(w)
self.b = torch.tensor(b)
self.sigma = sigma
self.X = torch.arange(-2, 2, 0.01).view(-1, 1)
z = self.b + self.w * self.X
self.y = torch.where(z + self.sigma * torch.randn(self.X.size()) > 0, 1.0, 0.0)
self.len = self.y.shape[0]
def __getitem__(self, idx: float) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
return (self.X[idx], self.y[idx])
def __len__(self) -> int:
return self.len
data = LinearlySeperableData(b=0, w=1)
print(f"n_samples = {len(data)}")
print(f"data[0] = {data[0]}")
data_loader = DataLoader(dataset=data, batch_size=5)
data_batches = list(data_loader)
print(f"mini_batch[0] = {data_batches[0]}")
class LinearlySeperableData(Dataset):
def __init__(self, b: float, w: float, sigma: float = 0.1):
self.w = torch.tensor(w)
self.b = torch.tensor(b)
self.sigma = sigma
self.X = torch.arange(-2, 2, 0.01).view(-1, 1)
z = self.b + self.w * self.X
self.y = torch.where(z + self.sigma * torch.randn(self.X.size()) > 0, 1.0, 0.0)
self.len = self.y.shape[0]
def __getitem__(self, idx: float) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
return (self.X[idx], self.y[idx])
def __len__(self) -> int:
return self.len
data = LinearlySeperableData(b=0, w=1)
print(f"n_samples = {len(data)}")
print(f"data[0] = {data[0]}")
data_loader = DataLoader(dataset=data, batch_size=5)
data_batches = list(data_loader)
print(f"mini_batch[0] = {data_batches[0]}")
n_samples = 400 data[0] = (tensor([-2.]), tensor([0.])) mini_batch[0] = [tensor([[-2.0000], [-1.9900], [-1.9800], [-1.9700], [-1.9600]]), tensor([[0.], [0.], [0.], [0.], [0.]])]
Logistic Regression with the PyTorch Optimiser¶
Now define the model. We will use a Binary Cross Entropy (BCE) loss function, which is equivalent to the negative of the log-likelhood function for a set of Bernouli trials - see here for for information.
In [4]:
Copied!
class LogisticRegressionPyTorch(torch.nn.Module):
def __init__(self, input_size: int):
super().__init__()
# this is an alternative to torch.sigmoid(torch.nn.Linear())
self.model = torch.nn.Sequential(
torch.nn.Linear(input_size, 1), torch.nn.Sigmoid()
)
def forward(self, X: torch.FloatTensor) -> torch.FloatTensor:
"""Compute a prediction."""
return self.model(X)
class LogisticRegressionPyTorch(torch.nn.Module):
def __init__(self, input_size: int):
super().__init__()
# this is an alternative to torch.sigmoid(torch.nn.Linear())
self.model = torch.nn.Sequential(
torch.nn.Linear(input_size, 1), torch.nn.Sigmoid()
)
def forward(self, X: torch.FloatTensor) -> torch.FloatTensor:
"""Compute a prediction."""
return self.model(X)
Now define the training routine.
In [5]:
Copied!
def train(
model: torch.nn.Module,
criterion: torch.nn.Module,
data_loader: DataLoader,
n_epochs: int,
learning_rate: float,
) -> Sequence[float]:
"""Train the model over multiple epochs recording the loss for each."""
def process_batch(X: torch.FloatTensor, y: torch.FloatTensor) -> float:
y_hat = model.forward(X)
loss = criterion(y_hat, y)
optimiser.zero_grad()
loss.backward()
optimiser.step()
return loss.detach().numpy().tolist()
def process_epoch() -> float:
return [process_batch(X, y) for X, y in data_loader][-1]
optimiser = torch.optim.SGD(model.parameters(), lr=0.05)
training_run = [process_epoch() for epoch in range(n_epochs)]
return training_run
def train(
model: torch.nn.Module,
criterion: torch.nn.Module,
data_loader: DataLoader,
n_epochs: int,
learning_rate: float,
) -> Sequence[float]:
"""Train the model over multiple epochs recording the loss for each."""
def process_batch(X: torch.FloatTensor, y: torch.FloatTensor) -> float:
y_hat = model.forward(X)
loss = criterion(y_hat, y)
optimiser.zero_grad()
loss.backward()
optimiser.step()
return loss.detach().numpy().tolist()
def process_epoch() -> float:
return [process_batch(X, y) for X, y in data_loader][-1]
optimiser = torch.optim.SGD(model.parameters(), lr=0.05)
training_run = [process_epoch() for epoch in range(n_epochs)]
return training_run
We now training the model using optim
.
In [6]:
Copied!
logistic_clf = LogisticRegressionPyTorch(1)
loss = torch.nn.BCELoss()
train(logistic_clf, loss, data_loader, n_epochs=20, learning_rate=0.05)
logistic_clf = LogisticRegressionPyTorch(1)
loss = torch.nn.BCELoss()
train(logistic_clf, loss, data_loader, n_epochs=20, learning_rate=0.05)
Out[6]:
[0.1400395780801773, 0.03897496312856674, 0.016650056466460228, 0.008784324862062931, 0.005244280211627483, 0.003394478466361761, 0.002325858222320676, 0.001662470051087439, 0.0012277166824787855, 0.0009306239662691951, 0.0007206659065559506, 0.0005679914029315114, 0.0004545174597296864, 0.00036841287510469556, 0.000302050553727895, 0.00025009672390297055, 0.00020888847939204425, 0.00017586114699952304, 0.0001491658331360668, 0.00012727596913464367]
Take a look at estimated parameters. Note how unimportant (unconstrained) the slope parameter is for this task.
In [7]:
Copied!
for k, v in logistic_clf.state_dict().items():
print(f"{k}: {v.numpy().tolist()}")
for k, v in logistic_clf.state_dict().items():
print(f"{k}: {v.numpy().tolist()}")
model.0.weight: [[4.43060827255249]] model.0.bias: [0.2427944391965866]
Testing the model on unseen data.
In [8]:
Copied!
test_data = LinearlySeperableData(b=0, w=1)
y_hat = torch.where(logistic_clf.forward(test_data.X) > 0.5, 1.0, 0.0)
accuracy = torch.sum(torch.where(y_hat == test_data.y, 1.0, 0.0)) / len(test_data)
accuracy
test_data = LinearlySeperableData(b=0, w=1)
y_hat = torch.where(logistic_clf.forward(test_data.X) > 0.5, 1.0, 0.0)
accuracy = torch.sum(torch.where(y_hat == test_data.y, 1.0, 0.0)) / len(test_data)
accuracy
Out[8]:
tensor(0.9800)