Simple NumPy Neural Network for Digits Recognition

There is an excellent blog explaing Neural Network. It is A Neural Network in 11 lines of Python (Part 1). I recommend to read also the second part.
Anyway, after reading it and memorizing the 11 lines as recommended (very useful recommendation!), I wanted to implement something that is a bit more impressive than the simple example given in that post.
So I decided to try recognizing digits. I used as input the data file from the recommended Coursera Machine Learning course. This file is a subset of the MNIST handwritten digit dataset (http://yann.lecun.com/exdb/mnist) saved as a Matlab data file.
In the 11-lines-post, the bias units are not added. In the script below I have added those or else the algorithm does not really learn.
#!/usr/bin/env python3
import numpy as np
import scipy
from scipy import io
from scipy import sparse
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def sigmoid_output_to_derivative(a):
return np.multiply(a, (1 - a))
def int_vec_to_bin_mat(indices):
indptr = range(len(indices) + 1)
data = np.ones(len(indices))
matrix = scipy.sparse.csr_matrix((data, indices, indptr)).todense()
matrix[:, 0] = matrix[:, 10]
matrix = matrix[:, 0:10]
return matrix
def predict(x, w0, w1):
l1 = sigmoid(np.dot(x, w0))
l1_plus = add_column_of_ones(l1)
l2 = sigmoid(np.dot(l1_plus, w1))
return np.round(l2)
def vector_to_number(p):
if p.sum() == 0:
return 0
elif p.sum() > 1:
return -1
else:
d = np.diag(np.array(range(1, 11)))
return np.dot(p, d).sum() - 1
def add_column_of_ones(mat):
vector_of_ones = np.ones((mat.shape[0], 1))
return np.c_[mat, vector_of_ones]
def slice_data(X, y, train_percentage):
m = X.shape[0]
m_train = round(m * train_percentage)
X_train = X[0:m_train, :]
y_train = y[0:m_train, :]
X_test = X[m_train:, :]
y_test = y[m_train:, :]
return X_train, y_train, X_test, y_test
def show_number(x):
print("----------------------------------------")
for i in range(0, 20):
for j in range(0, 20):
v = x[j * 20 + i]
if 0.2 < v < 0.5:
print('.', end='')
elif 0.5 < v:
print('+', end='')
else:
print(' ', end='')
print()
print()
np.random.seed(1)
data = scipy.io.loadmat("ex3data1.mat")
X = data['X'] # (5000, 400)
X = add_column_of_ones(X) # (5000, 401)
original_y = data['y'] # (5000, 1)
y = int_vec_to_bin_mat(original_y[:, 0].T) # (5000, 10)
randomize = np.arange(len(X))
np.random.shuffle(randomize)
X = X[randomize]
y = y[randomize]
(X_train, y_train, X_test, y_test) = slice_data(X, y, 0.8)
m_train = X_train.shape[0]
m_test = X_test.shape[0]
epsilon = 0.4
alpha = 0.0015
w0 = 2 * epsilon * np.random.random((401, 45)) - epsilon
w1 = 2 * epsilon * np.random.random((46, 10)) - epsilon
training_error = 0
for i in range(3000):
l0 = X_train # (5000, 401)
l1 = sigmoid(np.dot(l0, w0)) # (5000, 45)
l1_plus = add_column_of_ones(l1) # (5000, 46)
l2 = sigmoid(np.dot(l1_plus, w1)) # (5000, 10)
l2_error = y_train - l2 # (5000, 10)
l2_delta = np.multiply(l2_error, sigmoid_output_to_derivative(l2)) # (5000, 10)
l1_error = np.dot(l2_delta, w1.T) # (5000, 46)
l1_error = l1_error[:, :-1] # (5000, 45)
l1_delta = np.multiply(l1_error, sigmoid_output_to_derivative(l1)) # (5000, 45)
w0 = w0 + alpha * np.dot(l0.T, l1_delta)
w1 = w1 + alpha * np.dot(l1_plus.T, l2_delta)
training_error = np.mean(np.abs(l2_error))
if i % 100 == 0:
print("Error of epoch {}: {}".format(i, training_error))
correct_predictions = 0
incorrect_predictions = 0
for i in range(m_test):
predicted_number = vector_to_number(predict(X_test[i, :], w0, w1))
real_number = vector_to_number(y_test[i, :])
show_number(X_test[i, :])
if predicted_number == real_number:
correct_predictions += 1
print("Predicted correctly {}".format(predicted_number))
else:
incorrect_predictions += 1
print("Predicted incorrectly {} as {}".format(real_number, predicted_number))
accuracy = correct_predictions / (correct_predictions + incorrect_predictions)
print("Accuracy over {} samples is {}%".format(m_test, accuracy * 100))
print("Training error: {}".format(training_error))