← AI Foundations
Lab

Lab: Training a Perceptron

25 min Python

Goal

This lab builds a single perceptron from scratch in pure Python, trains it to learn the AND gate, then the OR gate, then observes it fail on XOR. This demonstrates why single-layer networks cannot learn non-linear decision boundaries.

Setup

Python 3.8 or later is required. No external packages are needed – this lab uses only the standard library random module.

Create a file called perceptron_lab.py.

Step 1: Implement the Perceptron

A perceptron computes a weighted sum of its inputs, adds a bias, and passes the result through a step activation function. If the result is above zero, output 1. Otherwise, output 0.

import random

class Perceptron:
    def __init__(self, num_inputs, learning_rate=0.1):
        # Initialize weights to small random values
        random.seed(42)
        self.weights = [random.uniform(-0.5, 0.5) for _ in range(num_inputs)]
        self.bias = random.uniform(-0.5, 0.5)
        self.lr = learning_rate

    def predict(self, inputs):
        """Weighted sum + bias, then step activation."""
        total = sum(w * x for w, x in zip(self.weights, inputs)) + self.bias
        return 1 if total > 0 else 0

    def train_step(self, inputs, target):
        """Single perceptron learning rule update."""
        prediction = self.predict(inputs)
        error = target - prediction
        # Update weights: w_i += lr * error * x_i
        for i in range(len(self.weights)):
            self.weights[i] += self.lr * error * inputs[i]
        # Update bias: b += lr * error
        self.bias += self.lr * error
        return error

    def __repr__(self):
        w_str = ", ".join(f"{w:.4f}" for w in self.weights)
        return f"Perceptron(weights=[{w_str}], bias={self.bias:.4f})"

This is the entire perceptron: multiply, sum, threshold. The learning rule adjusts weights in the direction that reduces the error.

Step 2: Train on the AND Gate

The AND gate outputs 1 only when both inputs are 1. Train the perceptron on all four input combinations.

def train(perceptron, data, epochs=20):
    """Train the perceptron on a dataset for a given number of epochs."""
    print(f"  Initial: {perceptron}")
    for epoch in range(epochs):
        total_error = 0
        for inputs, target in data:
            error = perceptron.train_step(inputs, target)
            total_error += abs(error)
        if (epoch + 1) % 5 == 0 or total_error == 0:
            print(f"  Epoch {epoch + 1:3d}: {perceptron}  errors={total_error}")
        if total_error == 0:
            print(f"  Converged at epoch {epoch + 1}")
            break
    return perceptron


def test(perceptron, data, label):
    """Test the perceptron and print a truth table."""
    print(f"\n  {label} Truth Table:")
    print(f"  {'x1':>4} {'x2':>4} {'target':>6} {'output':>6} {'correct':>7}")
    all_correct = True
    for inputs, target in data:
        output = perceptron.predict(inputs)
        correct = output == target
        if not correct:
            all_correct = False
        print(f"  {inputs[0]:>4} {inputs[1]:>4} {target:>6} {output:>6} {'yes' if correct else 'NO':>7}")
    return all_correct


# AND gate truth table
and_data = [
    ([0, 0], 0),
    ([0, 1], 0),
    ([1, 0], 0),
    ([1, 1], 1),
]

print("Training AND gate:")
p_and = Perceptron(num_inputs=2, learning_rate=0.1)
train(p_and, and_data)
test(p_and, and_data, "AND")

Run the program:

python3 perceptron_lab.py
Training AND gate:
  Initial: Perceptron(weights=[-0.1247, 0.2324], bias=0.1136)
  Epoch   1: Perceptron(weights=[-0.1247, 0.1324], bias=-0.0864)  errors=2
  Epoch   5: Perceptron(weights=[0.0753, 0.1324], bias=-0.1864)  errors=0
  Converged at epoch 5

  AND Truth Table:
    x1   x2 target output correct
     0    0      0      0     yes
     0    1      0      0     yes
     1    0      0      0     yes
     1    1      1      1     yes

The perceptron learned AND in a handful of epochs. The weights and bias define a line that separates the (1,1) case from the rest.

Step 3: Visualize the Decision Boundary

Print an ASCII visualization of what the perceptron learned. The decision boundary is the line where w1*x1 + w2*x2 + bias = 0.

def print_decision_boundary(perceptron, label):
    """Print the decision boundary equation and an ASCII grid."""
    w1, w2 = perceptron.weights
    b = perceptron.bias
    print(f"\n  {label} Decision Boundary:")
    print(f"  {w1:.4f}*x1 + {w2:.4f}*x2 + {b:.4f} = 0")

    if abs(w2) > 0.0001:
        print(f"  Solved for x2: x2 = ({-w1:.4f}*x1 + {-b:.4f}) / {w2:.4f}")

    # 10x10 ASCII grid showing classification regions
    print(f"\n  x2")
    for row in range(10, -1, -1):
        x2 = row / 10.0
        line = "  "
        for col in range(11):
            x1 = col / 10.0
            output = perceptron.predict([x1, x2])
            line += "#" if output == 1 else "."
        line += f"  {x2:.1f}" if row % 5 == 0 else ""
        print(line)
    print("  " + "-" * 11)
    print("  x1        0.0       1.0")


print_decision_boundary(p_and, "AND")
  AND Decision Boundary:
  0.0753*x1 + 0.1324*x2 + -0.1864 = 0
  Solved for x2: x2 = (-0.0753*x1 + 0.1864) / 0.1324

  x2
  ...........  1.0
  ...........
  ...........
  ...........
  ...........
  ..........#  0.5
  .........##
  ........###
  .......####
  ......#####
  .....######  0.0
  -----------
  x1        0.0       1.0

The # region is where the perceptron outputs 1. The diagonal line separating . from # is the learned decision boundary.

Step 4: Train on the OR Gate

OR outputs 1 when at least one input is 1. A single perceptron can learn this too.

or_data = [
    ([0, 0], 0),
    ([0, 1], 1),
    ([1, 0], 1),
    ([1, 1], 1),
]

print("\n\nTraining OR gate:")
p_or = Perceptron(num_inputs=2, learning_rate=0.1)
train(p_or, or_data)
test(p_or, or_data, "OR")
print_decision_boundary(p_or, "OR")
Training OR gate:
  Initial: Perceptron(weights=[-0.1247, 0.2324], bias=0.1136)
  Epoch   2: Perceptron(weights=[0.0753, 0.2324], bias=-0.0864)  errors=0
  Converged at epoch 2

  OR Truth Table:
    x1   x2 target output correct
     0    0      0      0     yes
     0    1      1      1     yes
     1    0      1      1     yes
     1    1      1      1     yes

OR converges even faster. The decision boundary now separates (0,0) from the other three points.

Step 5: Attempt XOR and Observe Failure

XOR outputs 1 when exactly one input is 1. This is where the single perceptron breaks down.

xor_data = [
    ([0, 0], 0),
    ([0, 1], 1),
    ([1, 0], 1),
    ([1, 1], 0),
]

print("\n\nTraining XOR gate:")
p_xor = Perceptron(num_inputs=2, learning_rate=0.1)
train(p_xor, xor_data, epochs=100)
success = test(p_xor, xor_data, "XOR")
print_decision_boundary(p_xor, "XOR")

if not success:
    print("\n  XOR FAILED: a single perceptron cannot learn XOR.")
    print("  The four XOR points are not linearly separable.")
    print("  (0,0)=0 and (1,1)=0 are on opposite corners from")
    print("  (0,1)=1 and (1,0)=1. No single straight line can")
    print("  separate the 0s from the 1s.")
    print()
    print("  This is the Minsky-Papert limitation (1969) that")
    print("  motivated multi-layer networks: stack perceptrons")
    print("  in layers and non-linear problems become solvable.")
Training XOR gate:
  Initial: Perceptron(weights=[-0.1247, 0.2324], bias=0.1136)
  Epoch   5: Perceptron(weights=[0.0753, 0.0324], bias=-0.0864)  errors=2
  Epoch  10: Perceptron(weights=[-0.0247, 0.1324], bias=-0.0864)  errors=2
  ...
  Epoch 100: Perceptron(weights=[0.0753, -0.0676], bias=-0.0864)  errors=2

  XOR Truth Table:
    x1   x2 target output correct
     0    0      0      0     yes
     0    1      1      0      NO
     1    0      1      1     yes
     1    1      0      0     yes

  XOR FAILED: a single perceptron cannot learn XOR.
  The four XOR points are not linearly separable.
  (0,0)=0 and (1,1)=0 are on opposite corners from
  (0,1)=1 and (1,0)=1. No single straight line can
  separate the 0s from the 1s.

  This is the Minsky-Papert limitation (1969) that
  motivated multi-layer networks: stack perceptrons
  in layers and non-linear problems become solvable.

The perceptron never converges on XOR. It oscillates, always getting at least one or two examples wrong. No matter how long training continues, a single straight line cannot separate the XOR outputs. The error count never reaches zero.

Summary

This lab demonstrated the perceptron learning algorithm in pure Python: