Simple NumPy Neural Network for Digits Recognition

There is an excellent blog explaing Neural Network. It is A Neural Network in 11 lines of Python (Part 1). I recommend to read also the second part.

Anyway, after reading it and memorizing the 11 lines as recommended (very useful recommendation!), I wanted to implement something that is a bit more impressive than the simple example given in that post.

So I decided to try recognizing digits. I used as input the data file from the recommended Coursera Machine Learning course. This file is a subset of the MNIST handwritten digit dataset (http://yann.lecun.com/exdb/mnist) saved as a Matlab data file.

In the 11-lines-post, the bias units are not added. In the script below I have added those or else the algorithm does not really learn.

#!/usr/bin/env python3

import numpy as np
import scipy
from scipy import io
from scipy import sparse


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def sigmoid_output_to_derivative(a):
    return np.multiply(a, (1 - a))


def int_vec_to_bin_mat(indices):
    indptr = range(len(indices) + 1)
    data = np.ones(len(indices))
    matrix = scipy.sparse.csr_matrix((data, indices, indptr)).todense()
    matrix[:, 0] = matrix[:, 10]
    matrix = matrix[:, 0:10]
    return matrix


def predict(x, w0, w1):
    l1 = sigmoid(np.dot(x, w0))
    l1_plus = add_column_of_ones(l1)
    l2 = sigmoid(np.dot(l1_plus, w1))
    return np.round(l2)


def vector_to_number(p):
    if p.sum() == 0:
        return 0
    elif p.sum() > 1:
        return -1
    else:
        d = np.diag(np.array(range(1, 11)))
        return np.dot(p, d).sum() - 1


def add_column_of_ones(mat):
    vector_of_ones = np.ones((mat.shape[0], 1))
    return np.c_[mat, vector_of_ones]


def slice_data(X, y, train_percentage):
    m = X.shape[0]
    m_train = round(m * train_percentage)
    X_train = X[0:m_train, :]
    y_train = y[0:m_train, :]
    X_test = X[m_train:, :]
    y_test = y[m_train:, :]
    return X_train, y_train, X_test, y_test


def show_number(x):
    print("----------------------------------------")
    for i in range(0, 20):
        for j in range(0, 20):
            v = x[j * 20 + i]
            if 0.2 < v < 0.5:
                print('.', end='')
            elif 0.5 < v:
                print('+', end='')
            else:
                print(' ', end='')
        print()
    print()


np.random.seed(1)

data = scipy.io.loadmat("ex3data1.mat")

X = data['X']              # (5000, 400)
X = add_column_of_ones(X)  # (5000, 401)

original_y = data['y']     # (5000, 1)
y = int_vec_to_bin_mat(original_y[:, 0].T)   # (5000, 10)

randomize = np.arange(len(X))
np.random.shuffle(randomize)
X = X[randomize]
y = y[randomize]

(X_train, y_train, X_test, y_test) = slice_data(X, y, 0.8)
m_train = X_train.shape[0]
m_test = X_test.shape[0]

epsilon = 0.4
alpha = 0.0015

w0 = 2 * epsilon * np.random.random((401, 45)) - epsilon
w1 = 2 * epsilon * np.random.random((46, 10)) - epsilon

training_error = 0
for i in range(3000):
    l0 = X_train                                                   # (5000, 401)
    l1 = sigmoid(np.dot(l0, w0))                                   # (5000, 45)
    l1_plus = add_column_of_ones(l1)                               # (5000, 46)
    l2 = sigmoid(np.dot(l1_plus, w1))                              # (5000, 10)
    l2_error = y_train - l2                                        # (5000, 10)
    l2_delta = np.multiply(l2_error, sigmoid_output_to_derivative(l2))  # (5000, 10)
    l1_error = np.dot(l2_delta, w1.T)                              # (5000, 46)
    l1_error = l1_error[:, :-1]                                    # (5000, 45)
    l1_delta = np.multiply(l1_error, sigmoid_output_to_derivative(l1))  # (5000, 45)
    w0 = w0 + alpha * np.dot(l0.T, l1_delta)
    w1 = w1 + alpha * np.dot(l1_plus.T, l2_delta)
    training_error = np.mean(np.abs(l2_error))
    if i % 100 == 0:
        print("Error of epoch {}: {}".format(i, training_error))

correct_predictions = 0
incorrect_predictions = 0
for i in range(m_test):
    predicted_number = vector_to_number(predict(X_test[i, :], w0, w1))
    real_number = vector_to_number(y_test[i, :])
    show_number(X_test[i, :])
    if predicted_number == real_number:
        correct_predictions += 1
        print("Predicted correctly {}".format(predicted_number))
    else:
        incorrect_predictions += 1
        print("Predicted incorrectly {} as {}".format(real_number, predicted_number))

accuracy = correct_predictions / (correct_predictions + incorrect_predictions)
print("Accuracy over {} samples is {}%".format(m_test, accuracy * 100))
print("Training error: {}".format(training_error))