""" MNIST handwritten letters identified by a basic ELM (Extreme Learning Machine) The whole neural network is in class ELM, with methods train(), which is actually just a linear equation solution, and predict(). Input data is in n samples each holding m data points, stored as x(n,m) Since data points should be kept together in memory, Python's column major storage order favors x(n,m). The output data is n samples each in i cathegories (numbers 0..9), stored as y(n,i) For memory efficiency, I won't transpose these arrays, so the network, written in the component form, is y(n,i) = activation( x(n,m) @ w1(m,h) + b1(1,h) ) @ w2(h,i) This is what method predict() computes using matrix products. where h is the number of hidden layer nodes. Use random but fixed elements in w1(m,h) and in b1(1,h). Mark M(n,h) := activation( x(n,m) @ w1(m,h) + b1(1,h) ) and pseudoinverse it to Mpinv(h,n), and solve for w2 for the training set (x,y), w2(h,i) = Mpinv(h,n) @ y(n,i) The method predict() computes y(n,i) = activation( x(n,m) @ w1(m,h) + b1(1,h) ) @ w2(h,i) for testing input X_test and compares with known y_test. Run with python ELM_Mnist.py to use a smaller train/test set, print out debugging information about array sizes, and plot a few of the digits. Run with python -O ELM_Mnist.py to train the network to the full MNIST data set. Main computes the result for several numbers of hidden layer nodes to see how results improve - and take longer. Vesa Apaja, October 2022 """ import numpy as np import csv import matplotlib.pyplot as plt from time import time as T def read_MNIST_data(): try: with open('mnist_train.csv','r') as f: print('reading '+f.name) train = [] for ind,line in enumerate(csv.reader(f)): train.append(line) if ind > maxtrain: break train = np.array(train) train = train.astype(float) with open('mnist_test.csv','r') as f: print('reading '+f.name) test = [] for ind,line in enumerate(csv.reader(f)): test.append(line) if ind > maxtest: break test = np.array(test) test = test.astype(float) except: print('Reading data dailed') print('Have you downloaded MNIST data in csv format from https://pjreddie.com/projects/mnist-in-csv/ ?') exit() return train, test def convert_data(train, test): if __debug__: print('train :',train.shape) print('test :',test.shape) # MNIST data is given as 28x28 pictures, that's 784 values per picture # the first value is the identification, from 0..9 (now a float, convert to int) X_train = train[:,1:] y_train = train[:,:1].astype(int) # use :1 and not 0 to get a more usable array shape X_test = test[:,1:] y_test = test[:,:1].astype(int) # convert the number 0..9 in y_train and y_test to cathegories # 0 = [1,0,0,0,0,0,0,0,0,0] # 1 = [0,1,0,0,0,0,0,0,0,0] # .. # 9 = [0,0,0,0,0,0,0,0,0,1] # there are functions that does it, but I want to use plain NumPy def categorize(ys): yy = np.zeros((ys.shape[0],10), dtype=int) for y, ident, in zip(yy,ys): y[ident[0]] = 1 return yy y_train_orig = np.copy(y_train) # store original numeric identification y_train = categorize(y_train) y_test_orig = np.copy(y_test) # stote original numeric identification y_test = categorize(y_test) # Scale images to range [0, 1] X_train /= 255 X_test /= 255 if __debug__: print('X_train :',X_train.shape) print('y_train :',y_train.shape) print('X_test :',X_test.shape) print('y_test :',y_test.shape) print('y_test = ',y_test) return X_train, y_train, X_test, y_test, y_train_orig # plotting # -------- def plot_images(title,images,labels,findings=None): """ title: title of the whole 5x5 plot images: set of images to plot, first 25 picked labels: set of digits the images are showing findings (optional): set of digits the NN thinks he images are showing """ fig,axs = plt.subplots(5,5,figsize=(8,8)) plt.suptitle(title) k = 0 for i in range(5): for j in range(5): axs[i,j].axis('off') try: axs[i,j].imshow(images[k], cmap='Greys') try: axs[i,j].set_title(f'{labels[k]} not {findings[k]}') except: axs[i,j].set_title(f'{labels[k]}') k+=1 except: pass plt.ion() plt.draw() plt.pause(1e-3) plt.ioff() # activation functions def relu(z): return np.maximum(z, 0, z) def sigmoid(z): return 1/(1+np.exp(-z)) class ELM(): def __init__(self, hidden_size = 1000, activation = relu): self.hidden_size = hidden_size self.activation = activation print('ELM: activation',self.activation.__name__) print('ELM: number of nodes in hidden layer ',self.hidden_size) def _set_weight_bias(self, n, m, std = 1.0): self.w1 = np.random.normal(scale = std, size=[n,m]) self.b1 = np.random.normal(scale = std, size=[1,m]) def train(self, x, y): self._set_weight_bias(x.shape[1], self.hidden_size) if __debug__ : print('w1:',self.w1.shape) if __debug__ : print('x:',x.shape) if __debug__ : print('b1:',self.b1.shape) M = self.activation(x @ self.w1 + self.b1) if __debug__ : print('M:',M.shape) if __debug__ : print('y:',y.shape) Mpinv = np.linalg.pinv(M) if __debug__ : print('Mpinv:',Mpinv.shape) self.w2 = Mpinv @ y if __debug__ : print('w2: ',self.w2.shape) def predict(self, x): y = self.activation(x @ self.w1 + self.b1) @ self.w2 return y if __name__ == '__main__': plot_figures = False if __debug__ : plot_figures = True maxtrain = maxtest = 100000000 # all if __debug__: maxtrain = 398 if __debug__: maxtest = 13 train, test = read_MNIST_data() X_train, y_train, X_test, y_test, y_train_orig = convert_data(train, test) if plot_figures: plot_images('Sample of training digits', X_train.reshape(X_train.shape[0],28,28), y_train_orig[:,0]) plt.draw() plt.pause(1e-3) res = [] for hidden in range(100,1100,100): tic = T() model = ELM(hidden_size = hidden) model.train(X_train, y_train) y_pred = model.predict(X_test) # one-hot encoding pred_values = np.argmax(y_pred, axis=1) true_values = np.argmax(y_test, axis=1) bad_filter, = np.where(pred_values != true_values) nbad = len(bad_filter) n = len(true_values) acc = (n-nbad)*100.0/n print(f'accuracy {acc:.5f} % {nbad} bad identifications out of {n} test images') toc = T() res.append([hidden,acc,toc-tic]) if plot_figures: plot_images('Some misinterpreted digits', X_test.reshape(X_test.shape[0],28,28)[bad_filter], true_values[bad_filter],pred_values[bad_filter]) break else: hids,accs,times = zip(*res) plt.figure(1) plt.plot(hids,accs,'go-') plt.xlabel('hidden nodes') plt.ylabel('accuracy (%)') plt.draw() plt.pause(1e-3) plt.figure(2) plt.plot(hids,times,'go-') plt.xlabel('hidden nodes') plt.ylabel('timing (s)') plt.draw() plt.pause(1e-3) plt.show()