Logistic Regression with SGD¶

Exploring how another one of the most basic machine learning models can be implemented using PyTorch.

Imports¶

In [1]:

Copied!

from typing import Sequence, Tuple

import torch
from torch.utils.data import Dataset, DataLoader
from typing import Sequence, Tuple

import torch
from torch.utils.data import Dataset, DataLoader

Configuration¶

In [2]:

Copied!

torch.manual_seed(1)
torch.manual_seed(1)

Out[2]:

<torch._C.Generator at 0x105d1e390>

Create Dataset¶

Start by creating a dataset and dataloader for the task.

In [3]:

Copied!





class LinearlySeperableData(Dataset):
    def __init__(self, b: float, w: float, sigma: float = 0.1):
        self.w = torch.tensor(w)
        self.b = torch.tensor(b)
        self.sigma = sigma
        self.X = torch.arange(-2, 2, 0.01).view(-1, 1)

        z = self.b + self.w * self.X
        self.y = torch.where(z + self.sigma * torch.randn(self.X.size()) > 0, 1.0, 0.0)
        self.len = self.y.shape[0]

    def __getitem__(self, idx: float) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
        return (self.X[idx], self.y[idx])

    def __len__(self) -> int:
        return self.len


data = LinearlySeperableData(b=0, w=1)
print(f"n_samples = {len(data)}")
print(f"data[0] = {data[0]}")

data_loader = DataLoader(dataset=data, batch_size=5)
data_batches = list(data_loader)
print(f"mini_batch[0] = {data_batches[0]}")
class LinearlySeperableData(Dataset):
    def __init__(self, b: float, w: float, sigma: float = 0.1):
        self.w = torch.tensor(w)
        self.b = torch.tensor(b)
        self.sigma = sigma
        self.X = torch.arange(-2, 2, 0.01).view(-1, 1)

        z = self.b + self.w * self.X
        self.y = torch.where(z + self.sigma * torch.randn(self.X.size()) > 0, 1.0, 0.0)
        self.len = self.y.shape[0]

    def __getitem__(self, idx: float) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
        return (self.X[idx], self.y[idx])

    def __len__(self) -> int:
        return self.len


data = LinearlySeperableData(b=0, w=1)
print(f"n_samples = {len(data)}")
print(f"data[0] = {data[0]}")

data_loader = DataLoader(dataset=data, batch_size=5)
data_batches = list(data_loader)
print(f"mini_batch[0] = {data_batches[0]}")

n_samples = 400
data[0] = (tensor([-2.]), tensor([0.]))
mini_batch[0] = [tensor([[-2.0000],
        [-1.9900],
        [-1.9800],
        [-1.9700],
        [-1.9600]]), tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.]])]

Logistic Regression with the PyTorch Optimiser¶

Now define the model. We will use a Binary Cross Entropy (BCE) loss function, which is equivalent to the negative of the log-likelhood function for a set of Bernouli trials - see here for for information.

In [4]:

Copied!





class LogisticRegressionPyTorch(torch.nn.Module):
    def __init__(self, input_size: int):
        super().__init__()
        # this is an alternative to torch.sigmoid(torch.nn.Linear())
        self.model = torch.nn.Sequential(
            torch.nn.Linear(input_size, 1), torch.nn.Sigmoid()
        )

    def forward(self, X: torch.FloatTensor) -> torch.FloatTensor:
        """Compute a prediction."""
        return self.model(X)
class LogisticRegressionPyTorch(torch.nn.Module):
    def __init__(self, input_size: int):
        super().__init__()
        # this is an alternative to torch.sigmoid(torch.nn.Linear())
        self.model = torch.nn.Sequential(
            torch.nn.Linear(input_size, 1), torch.nn.Sigmoid()
        )

    def forward(self, X: torch.FloatTensor) -> torch.FloatTensor:
        """Compute a prediction."""
        return self.model(X)

Now define the training routine.

In [5]:

Copied!





def train(
    model: torch.nn.Module,
    criterion: torch.nn.Module,
    data_loader: DataLoader,
    n_epochs: int,
    learning_rate: float,
) -> Sequence[float]:
    """Train the model over multiple epochs recording the loss for each."""

    def process_batch(X: torch.FloatTensor, y: torch.FloatTensor) -> float:
        y_hat = model.forward(X)
        loss = criterion(y_hat, y)
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        return loss.detach().numpy().tolist()

    def process_epoch() -> float:
        return [process_batch(X, y) for X, y in data_loader][-1]

    optimiser = torch.optim.SGD(model.parameters(), lr=0.05)
    training_run = [process_epoch() for epoch in range(n_epochs)]
    return training_run
def train(
    model: torch.nn.Module,
    criterion: torch.nn.Module,
    data_loader: DataLoader,
    n_epochs: int,
    learning_rate: float,
) -> Sequence[float]:
    """Train the model over multiple epochs recording the loss for each."""

    def process_batch(X: torch.FloatTensor, y: torch.FloatTensor) -> float:
        y_hat = model.forward(X)
        loss = criterion(y_hat, y)
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        return loss.detach().numpy().tolist()

    def process_epoch() -> float:
        return [process_batch(X, y) for X, y in data_loader][-1]

    optimiser = torch.optim.SGD(model.parameters(), lr=0.05)
    training_run = [process_epoch() for epoch in range(n_epochs)]
    return training_run

We now training the model using optim.

In [6]:

Copied!

logistic_clf = LogisticRegressionPyTorch(1)
loss = torch.nn.BCELoss()
train(logistic_clf, loss, data_loader, n_epochs=20, learning_rate=0.05)
logistic_clf = LogisticRegressionPyTorch(1)
loss = torch.nn.BCELoss()
train(logistic_clf, loss, data_loader, n_epochs=20, learning_rate=0.05)

Out[6]:

[0.1400395780801773,
 0.03897496312856674,
 0.016650056466460228,
 0.008784324862062931,
 0.005244280211627483,
 0.003394478466361761,
 0.002325858222320676,
 0.001662470051087439,
 0.0012277166824787855,
 0.0009306239662691951,
 0.0007206659065559506,
 0.0005679914029315114,
 0.0004545174597296864,
 0.00036841287510469556,
 0.000302050553727895,
 0.00025009672390297055,
 0.00020888847939204425,
 0.00017586114699952304,
 0.0001491658331360668,
 0.00012727596913464367]

Take a look at estimated parameters. Note how unimportant (unconstrained) the slope parameter is for this task.

In [7]:

Copied!

for k, v in logistic_clf.state_dict().items():
    print(f"{k}: {v.numpy().tolist()}")
for k, v in logistic_clf.state_dict().items():
    print(f"{k}: {v.numpy().tolist()}")

model.0.weight: [[4.43060827255249]]
model.0.bias: [0.2427944391965866]

Testing the model on unseen data.

In [8]:

Copied!

test_data = LinearlySeperableData(b=0, w=1)

y_hat = torch.where(logistic_clf.forward(test_data.X) > 0.5, 1.0, 0.0)
accuracy = torch.sum(torch.where(y_hat == test_data.y, 1.0, 0.0)) / len(test_data)
accuracy
test_data = LinearlySeperableData(b=0, w=1)

y_hat = torch.where(logistic_clf.forward(test_data.X) > 0.5, 1.0, 0.0)
accuracy = torch.sum(torch.where(y_hat == test_data.y, 1.0, 0.0)) / len(test_data)
accuracy

Out[8]:

tensor(0.9800)