I’m using numpy to implement the functionality of a convolutional neural network, but the accuracy cannot be improved through learning. Can anyone help me find the problem in the code?
- Dataset: MNIST
- Structure:
- Input 28*28
- Conv 2055
- FullConnected 100
- Softmax 10
import numpy as np
import matplotlib.pyplot as plt
from mnist import imSet4train, lbSet4train
from mnist import imSet4test, lbSet4test
from nn import sigmoid, sigmoid1st
log = open("log.log", "w")
np.set_printoptions(linewidth=np.inf)
np.set_printoptions(threshold=999999)
xavier = lambda fanI, fanO: np.sqrt(2.0 / (fanI + fanO))
softmax = lambda z : np.exp(z) / np.sum(np.exp(z))
def mat_iterator(mat:np.ndarray, h_of_mat:int, w_of_mat:int, h_of_sub:int, w_of_sub:int, stride:int=1):
for i in range((h_of_mat - h_of_sub) // stride + 1):
for j in range((w_of_mat - w_of_sub) // stride + 1):
yield mat[i * stride : i * stride + h_of_sub,
j * stride : j * stride + w_of_sub], i, j
pass
pass
pass
class ConvNeuNet(object):
class meta(object):
def __init__(self, name):
self.name = name
self.w :np.ndarray = None
self.b :np.ndarray = None
self.w_delta :np.ndarray = None
self.b_delta :np.ndarray = None
self.iput :np.ndarray = None
self.oput_ina:np.ndarray = None
self.oput_act:np.ndarray = None
pass
def __call__(self, prev_act, prev_ina):
pass
def BP(self, err):
pass
pass
class IPut(meta):
def __init__(self, name, h, w):
super().__init__(name)
self.oh = h
self.ow = w
self.oput_act = np.zeros((h, w))
pass
def __call__(self, prev:np.ndarray) -> np.ndarray:
self.iput = prev
self.oput_act = self.iput
return self.oput_act
def BP(self, err:np.ndarray) -> np.ndarray:
return err
pass
class Conv(meta):
def __init__(self, name, size_i, size_k, stride=1, padding=0):
super().__init__(name)
self.nk = size_k[0]
self.kh = size_k[1]
self.kw = size_k[2]
self.xavier = xavier(self.kh * self.kw, self.kh * self.kw * self.nk)
self.stride = stride
self.padding = padding
self.w = np.random.randn(self.nk, self.kh, self.kw) * self.xavier
self.b = np.random.randn(self.nk)
self.oput_ina = np.zeros((self.nk,
(size_i[0] - self.kh + 2 * self.padding) // self.stride + 1,
(size_i[1] - self.kw + 2 * self.padding) // self.stride + 1))
self.oput_act = self.oput_ina.copy()
pass
def __call__(self, prev:np.ndarray) -> np.ndarray:
self.iput = prev
for k in range(self.nk):
for block, i, j in mat_iterator(self.iput, self.iput.shape[0], self.iput.shape[1], self.kh, self.kw, self.stride):
self.oput_ina[k, i, j] = np.sum(block * self.w[k], axis=(0,1)) + self.b[k]
self.oput_act[k, i, j] = sigmoid(self.oput_ina[k, i, j])
pass
pass
return self.oput_act
def BP(self, err:np.ndarray) -> np.ndarray:
err = err.reshape(self.oput_act.shape)
err *= sigmoid1st(self.oput_ina)
self.w_delta = np.zeros(self.w.shape)
self.b_delta = np.zeros(self.b.shape)
for k in range(self.nk):
for block, i, j in mat_iterator(self.iput, self.iput.shape[0], self.iput.shape[1], self.kh, self.kw, self.stride):
self.w_delta[k] += err[k, i, j] * block
pass
self.b_delta[k] = np.sum(err[k], axis=(0,1))
pass
return err
pass
class PoolingMAX(meta):
def __init__(self, name, size_i, size_p, stride=2):
super().__init__(name)
self.ph = size_p[0]
self.pw = size_p[1]
self.stride = stride
self.oput_act = np.zeros((size_i[0],
size_i[1] // self.ph,
size_i[2] // self.pw))
self.oput_pos = [[[None for j in range(self.oput_act.shape[2])]
for i in range(self.oput_act.shape[1])]
for k in range(self.oput_act.shape[0])]
pass
def __call__(self, prev:np.ndarray) -> np.ndarray:
self.iput = prev
for k in range(self.oput_act.shape[0]):
for block, i, j in mat_iterator(self.iput[k], self.iput.shape[1], self.iput.shape[2], self.ph, self.pw, self.stride):
self.oput_act[k, i, j] = np.max(block)
self.oput_pos[k][i][j] = np.unravel_index(np.argmax(block.flatten()), block.shape)
pass
pass
return self.oput_act
def BP(self, err_i:np.ndarray) -> np.ndarray:
err_i = err_i.reshape(self.oput_act.shape)
err_o = np.zeros((err_i.shape[0], err_i.shape[1] * self.ph, err_i.shape[2] * self.pw))
for k in range(err_i.shape[0]):
for block, i, j in mat_iterator(err_o[k], err_o.shape[1], err_o.shape[2], self.ph, self.pw, self.stride):
block[self.oput_pos[k][i][j][0], self.oput_pos[k][i][j][1]] = self.oput_act[k, i, j]
# err_o[k, i * self.ph : (i + 1) * self.ph, j * self.pw : (j + 1) * self.pw] = block
pass
pass
return err_o
pass
class FullConn(meta):
def __init__(self, name, dim_i, dim_o):
super().__init__(name)
self.dim = dim_o
self.xavier = xavier(dim_i, dim_o)
self.w = np.random.randn(dim_i, dim_o).T * self.xavier
self.b = np.random.randn(dim_o, 1)
self.oput_ina = np.zeros((dim_o, 1))
self.oput_act = np.zeros((dim_o, 1))
pass
def __call__(self, prev:np.ndarray) -> np.ndarray:
self.iput = prev.reshape(-1, 1)
self.oput_ina = np.dot(self.w, self.iput) + self.b
self.oput_act = sigmoid(self.oput_ina)
return self.oput_act
def BP(self, err:np.ndarray) -> np.ndarray:
err *= sigmoid1st(self.oput_ina)
self.w_delta = np.dot(err, self.iput.T)
self.b_delta = err
return np.dot(self.w.T, err)
pass
class Softmax(meta):
def __init__(self, name, dim_i, dim_o):
super().__init__(name)
self.dim = dim_o
self.xavier = xavier(dim_i, dim_o)
self.w = np.random.randn(dim_i, dim_o).T * self.xavier
self.b = np.random.randn(dim_o, 1)
self.oput_ina = np.zeros((dim_o, 1))
self.oput_act = np.zeros((dim_o, 1))
pass
def __call__(self, prev:np.ndarray) -> np.ndarray:
self.iput = prev.reshape(-1, 1)
self.oput_ina = np.dot(self.w, self.iput) + self.b
self.oput_act = softmax(self.oput_ina)
return self.oput_act
def BP(self, err:np.ndarray) -> np.ndarray:
self.w_delta = np.dot(err, self.iput.T)
self.b_delta = err
return np.dot(self.w.T, err)
pass
def __init__(self):
self.st_of_cnn = []
pass
def feedforward(self, im2D):
def ____(l):
if l == 0:
return self.st_of_cnn[l](im2D)
else :
return self.st_of_cnn[l](____(l - 1))
pass
return ____(len(self.st_of_cnn) - 1)
def backpropagation(self, x:np.ndarray, y:np.ndarray):
def ____(l):
if l == len(self.st_of_cnn) - 1:
err = self.st_of_cnn[l].BP(error)
pass
else:
err = self.st_of_cnn[l].BP(____(l + 1))
pass
w_delta_lst.insert(0, self.st_of_cnn[l].w_delta)
b_delta_lst.insert(0, self.st_of_cnn[l].b_delta)
return err
w_delta_lst, b_delta_lst = [], []
error = self.feedforward(x) - y
____(0)
return w_delta_lst, b_delta_lst
def SGD_MiniBatch(self, imSet:np.ndarray, lbSet:np.ndarray):
w_nabla_lst = [None if layer.w is None else np.zeros((layer.w.shape)) for layer in self.st_of_cnn]
b_nabla_lst = [None if layer.b is None else np.zeros((layer.b.shape)) for layer in self.st_of_cnn]
for image, label in zip(imSet, lbSet):
w_delta_lst, b_delta_lst = self.backpropagation(image, label.reshape((-1, 1)))
w_nabla_lst = [None if w_delta is None else (w_nabla + w_delta) for w_nabla, w_delta in zip(w_nabla_lst, w_delta_lst)]
b_nabla_lst = [None if b_delta is None else (b_nabla + b_delta) for b_nabla, b_delta in zip(b_nabla_lst, b_delta_lst)]
pass
return w_nabla_lst, b_nabla_lst
def SGD(self, imSet4train:np.ndarray, lbSet4train:np.ndarray,
imSet4test :np.ndarray, lbSet4test :np.ndarray, epochs, batsz, eta):
print('Epochs', '-', 'Untrained.', 'Accuracy is {0:.2f}%'.format(self.evaluate(imSet4test, lbSet4test)))
for e in range(epochs):
axis = np.random.permutation(imSet4train.shape[0])
imSet4train = imSet4train[axis]
lbSet4train = lbSet4train[axis]
for pos in range(0, len(imSet4train), batsz):
print(f"*Training Epoch[{e}]: {(pos + batsz) / len(imSet4train) * 100.0:.2f}%", end='r')
w_nabla_lst, b_nabla_lst = self.SGD_MiniBatch(imSet4train[pos : pos + batsz if pos + batsz < len(imSet4train) else len(imSet4train)],
lbSet4train[pos : pos + batsz if pos + batsz < len(imSet4train) else len(imSet4train)])
for layer, (w_nabla, b_nabla) in zip(self.st_of_cnn, zip(w_nabla_lst, b_nabla_lst)):
if layer.w is None and layer.b is None:
continue
else :
layer.w = layer.w - (eta / batsz) * w_nabla
layer.b = layer.b - (eta / batsz) * b_nabla
pass
pass
pass
print('Epochs', e, 'Completed.', 'Accuracy is {0:.2f}%'.format(self.evaluate(imSet4test, lbSet4test)))
pass
pass
def evaluate(self, imSet, lbSet):
correct = 0
for image, label in zip(imSet, lbSet):
correct += 1 if np.argmax(label) == self.predict(image) else 0
pass
return correct / len(imSet) * 100.0
def predict(self, im2D, show=False):
if (show):
plt.imshow(im2D, cmap='gray')
plt.axis('off')
plt.show()
pass
else:
pass
return np.argmax(self.feedforward(im2D))
def add_layer_iput(self, h, w):
self.st_of_cnn.append(
ConvNeuNet.IPut("IPut", h, w)
)
return self
def add_layer_conv(self, h, w, n, stride=1, padding=0):
self.st_of_cnn.append(
ConvNeuNet.Conv("Conv", self.st_of_cnn[-1].oput_act.shape, (n, h, w), stride, padding)
)
return self
def add_layer_poolingMAX(self, h, w, stride=2):
self.st_of_cnn.append(
ConvNeuNet.PoolingMAX("PoolingMAX", self.st_of_cnn[-1].oput_act.shape, (h, w), stride)
)
return self
def add_layer_FullConn(self, dim):
self.st_of_cnn.append(
ConvNeuNet.FullConn("FullConn", *self.st_of_cnn[-1].oput_act.flatten().shape, dim)
)
return self
def add_layer_softmax(self, dim):
self.st_of_cnn.append(
ConvNeuNet.Softmax("Softmax", *self.st_of_cnn[-1].oput_act.flatten().shape, dim)
)
return self
if __name__ == "__main__":
cnn = ConvNeuNet()
cnn.add_layer_iput ( 28, 28)
cnn.add_layer_conv ( 5, 5, 20)
cnn.add_layer_poolingMAX( 2, 2)
cnn.add_layer_FullConn (100)
cnn.add_layer_softmax ( 10)
# cnn.SGD(imSet4train[0:1], lbSet4train[0:1], imSet4test[0:1], lbSet4test[0:1], 1, 1, 1.5)
cnn.SGD(imSet4train[0:100], lbSet4train[0:100], imSet4train[0:100], lbSet4train[0:100], 5, 10, 1.5)
# cnn.SGD(imSet4train[0:100], lbSet4train[0:100], imSet4test[0:100], lbSet4test[0:100], 5, 10, 1.5)
# cnn.predict(np.ones((28, 28)))
raise
pass
else:
from sys import exit
exit()
nn = ConvNeuNet()
nn.add_layer_iput (28, 28)
nn.add_layer_FullConn (28 * 28)
nn.add_layer_FullConn (30)
nn.add_layer_softmax (10)
nn.SGD(imSet4train, lbSet4train, imSet4train, lbSet4train, 5, 10, 1.5)
Since the accuracy after execution of the following code can reach more than 90%, I speculate that the problem should be in the convolution layer.
nn = ConvNeuNet()
nn.add_layer_iput (28, 28)
nn.add_layer_FullConn (28 * 28)
nn.add_layer_FullConn (30)
nn.add_layer_softmax (10)
nn.SGD(imSet4train, lbSet4train, imSet4train, lbSet4train, 5, 10, 1.5)
The other code is here:
https://github.com/xxZhLF/PracticeOnMNIST/tree/main/.py_impl
How can I improve the accuracy of the convolutional neural network?
New contributor