今天状态不太好,写了一个基础神经网络,可能有写错和写的不好的地方
用异或网络训练后,发现只有tanh函数成功了,sigmoid函数和relu失败了,tanh建立三层以上网络也预测失败了
可能是tanh的收敛速度比较快的缘故
只考虑了全连接层,没有使用任何优化训练的方法,卷积神经网络应该只是连接方式不同(更加稀疏),目前还不想写也写不出来,试验正确性也挺麻烦的,以后可能理解深刻了再写
import numpy as np
import pandas as pd
import copy
def tanh(x):
return np.tanh(x)
def tanh_derivative(x):
return 1.0 - np.tanh(x) * np.tanh(x)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
return sigmoid(x) * (1 - sigmoid(x))
def relu(x):
t = copy.copy(x)
for i in range(len(t)):
if t[i] < 0:
t[i] = 0
return t
def relu_derivative(x):
t = copy.copy(x)
for i in range(len(t)):
if t[i] < 0:
t[i] = 0
else:
t[i] = 1
return t
class ActivationFunc:
def __init__(self):
self.tdict = dict()
self.tdict['tanh'] = np.tanh
self.tdict['sigmoid'] = lambda x: 1 / (1 + np.exp(-x))
self.tdict['relu'] = relu
self.tdict['softmax'] = np.exp
self.ddict = dict()
self.ddict['tanh'] = tanh_derivative
self.ddict['sigmoid'] = sigmoid_derivative
self.ddict['relu'] = relu_derivative
self.ddict['softmax'] = np.exp
def getActivation(self, activation):
if activation in self.tdict:
return self.tdict[activation]
else:
return self.tdict['relu']
def getDActivation(self, activation):
if activation in self.ddict:
return self.ddict[activation]
else:
return self.ddict['relu']
#print(ActivationFunc().getActivation('logistic')(1.0))
#print(logistic_derivative(1.0))
class NNetwork:
def __init__(self, inputsize, lr = 0.01, withbias = True) :
self.para = []
self.layerout = []
self.grad = []
self.backout = []
self.activationclass = ActivationFunc()
self.inputsize = inputsize
self.lastsize = inputsize
self.lr = lr
self.layerlen = 0
self.activation = []
self.deactivation = []
self.wbias = withbias
self.outputfunc = 'softmax'
self.maxnum = 0
#self.activation = ActivationFunc().getActivation(mactivation)
def add(self, densesize, actstr):
tsize = self.lastsize
if self.wbias:
tsize += 1
self.para.append(np.random.rand(densesize, tsize) - 0.5)
self.grad.append(np.zeros((densesize, tsize)))
self.lastsize = densesize
self.activation.append(self.activationclass.tdict[actstr])
self.deactivation.append(self.activationclass.ddict[actstr])
self.layerlen += 1
self.outputfunc = actstr
def forward(self,input):
self.layerout = []
if self.wbias:
self.layerout.append(np.append(np.array(input),1))
else:
self.layerout.append(np.array(input))
for i in range(self.layerlen):
#print(self.layerout[-1].shape, self.para[i].shape)
if self.wbias and i != self.layerlen - 1:
self.layerout.append(np.append(self.activation[i](np.dot(self.para[i], self.layerout[-1].T)), 1))
else:
self.layerout.append(self.activation[i](np.dot(self.para[i], self.layerout[-1].T)))
return self.layerout[-1]
def backward(self, y, y_label):
self.maxnum = 0.001
tsumy = sum(y)
y[y_label] -= tsumy
self.maxnum = max(self.maxnum, max(y))
self.backout = []
self.backout.append(np.matrix(y).T)
for i in range(self.layerlen, 0, -1):
#print(self.backout[-1].shape, np.matrix(self.layerout[i - 1]).shape)
self.grad[i - 1] += np.dot(self.backout[-1], np.matrix(self.layerout[i - 1]))
self.maxnum = max(self.grad[i - 1].max().max(), self.maxnum)
if i > 1:
if self.wbias:
self.backout.append(np.multiply(self.deactivation[i - 2](self.layerout[i - 1]), np.dot(self.backout[-1].T, self.para[i - 1])).T[:-1,:])
else:
self.backout.append(np.multiply(self.deactivation[i - 2](self.layerout[i - 1]), np.dot(self.backout[-1].T, self.para[i - 1])).T)
def zero_grad(self):
for obj in self.grad:
obj.fill(0)
def step(self):
for obj1, obj2 in zip(self.para, self.grad):
obj1 -= self.lr /self.maxnum * obj2
def predict(self, input):
y = self.forward(input)
y /= np.sum(y)
return y
#2*x + y - 3
model = NNetwork(2, withbias = True)
#model.add(16, 'relu')
model.add(16, 'tanh')
model.add(4, 'tanh')
model.add(2, 'softmax')
X = [[0,0],[0,1],[1,0],[1,1]]
y = [0, 1, 1, 0]
for i in range(1000000):
tid = i % 4
model.zero_grad()
output = model.forward(X[tid])
model.backward(output, y[tid])
model.step()
print(model.predict([1,1]))
print(model.predict([0,1]))
print(model.predict([0,0]))
print(model.predict([1,0]))
发现写错了,激活函数对输入求导写成对输出求导了,突然发现sigmoid,relu,tanh这三个函数求导都可以写成输出的函数,而且更加简单,比如y = tanh(x) , dy = (1 - y * y)dx,比dy = (1 - tanh(x) * tanh(x)) dx更加简洁
import numpy as np
import pandas as pd
import copy
def tanh(x):
return np.tanh(x)
def tanh_derivative(x):
return 1.0 - x * x
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
return x * (1 - x)
def relu(x):
return np.maximum(x, 0)
#t = copy.copy(x)
#for i in range(len(t)):
# if t[i] < 0:
# t[i] = 0
#return t
def relu_derivative(x):
t = copy.copy(x)
for i in range(len(t)):
if t[i] <= (1e-12):
t[i] = 0
else:
t[i] = 1
return t
class ActivationFunc:
def __init__(self):
self.tdict = dict()
self.tdict['tanh'] = np.tanh
self.tdict['sigmoid'] = lambda x: 1 / (1 + np.exp(-x))
self.tdict['relu'] = relu
self.tdict['softmax'] = np.exp
self.ddict = dict()
self.ddict['tanh'] = tanh_derivative
self.ddict['sigmoid'] = sigmoid_derivative
self.ddict['relu'] = relu_derivative
self.ddict['softmax'] = np.exp
def getActivation(self, activation):
if activation in self.tdict:
return self.tdict[activation]
else:
return self.tdict['relu']
def getDActivation(self, activation):
if activation in self.ddict:
return self.ddict[activation]
else:
return self.ddict['relu']
#print(ActivationFunc().getActivation('logistic')(1.0))
#print(logistic_derivative(1.0))
class NNetwork:
def __init__(self, inputsize, lr = 0.01, withbias = True) :
self.para = []
self.layerout = []
self.grad = []
self.backout = []
self.activationclass = ActivationFunc()
self.inputsize = inputsize
self.lastsize = inputsize
self.lr = lr
self.layerlen = 0
self.activation = []
self.deactivation = []
self.wbias = withbias
self.outputfunc = 'softmax'
self.maxnum = 0.001
#self.activation = ActivationFunc().getActivation(mactivation)
def add(self, densesize, actstr):
tsize = self.lastsize
if self.wbias:
tsize += 1
self.para.append(np.random.rand(densesize, tsize) - 0.5)
self.grad.append(np.zeros((densesize, tsize)))
self.lastsize = densesize
self.activation.append(self.activationclass.tdict[actstr])
self.deactivation.append(self.activationclass.ddict[actstr])
self.layerlen += 1
self.outputfunc = actstr
def forward(self,input):
self.layerout = []
if self.wbias:
self.layerout.append(np.append(np.array(input),1))
else:
self.layerout.append(np.array(input))
for i in range(self.layerlen):
#print(self.layerout[-1].shape, self.para[i].shape)
if self.wbias and i != self.layerlen - 1:
self.layerout.append(np.append(self.activation[i](np.dot(self.para[i], self.layerout[-1].T)), 1))
else:
self.layerout.append(self.activation[i](np.dot(self.para[i], self.layerout[-1].T)))
return self.layerout[-1]
def backward(self, y, y_label):
self.maxnum = 0.001
tsumy = sum(y)
y[y_label] -= tsumy
#self.maxnum = max(self.maxnum, max(y))
self.backout = []
self.backout.append(np.matrix(y).T)
for i in range(self.layerlen, 0, -1):
#print(self.backout[-1].shape, np.matrix(self.layerout[i - 1]).shape)
self.grad[i - 1] += np.dot(self.backout[-1], np.matrix(self.layerout[i - 1]))
self.maxnum = max(np.abs(self.grad[i - 1]).max().max(), self.maxnum)
if i > 1:
if self.wbias:
self.backout.append(np.multiply(self.deactivation[i - 2](self.layerout[i - 1]), np.dot(self.backout[-1].T, self.para[i - 1])).T[:-1,:])
else:
self.backout.append(np.multiply(self.deactivation[i - 2](self.layerout[i - 1]), np.dot(self.backout[-1].T, self.para[i - 1])).T)
def zero_grad(self):
for obj in self.grad:
obj.fill(0)
self.maxnum = 0.001
def step(self):
for obj1, obj2 in zip(self.para, self.grad):
obj1 -= self.lr * obj2 / self.maxnum
self.zero_grad()
def predict(self, input):
y = self.forward(input)
y /= np.sum(y)
return y
model = NNetwork(2, withbias = True)
#model.add(16, 'relu')
model.add(16, 'relu')
model.add(8, 'relu')
model.add(4, 'relu')
model.add(2, 'softmax')
X = [[0,0],[0,1],[1,0],[1,1]]
y = [0, 1, 1, 0]
for i in range(100000):
tid = i % 4
#model.zero_grad()
output = model.forward(X[tid])
model.backward(output, y[tid])
if tid == 3:
model.step()
print(model.predict([1,1]))
print(model.predict([0,1]))
print(model.predict([0,0]))
print(model.predict([1,0]))
亲测relu也可以成功预测了,sigmoid只能使用一层,使用多层会出错,可能是梯度消失,relu和tanh使用多少层都没事
因篇幅问题不能全部显示,请点此查看更多更全内容
Copyright © 2019- sarr.cn 版权所有 赣ICP备2024042794号-1
违法及侵权请联系:TEL:199 1889 7713 E-MAIL:2724546146@qq.com
本站由北京市万商天勤律师事务所王兴未律师提供法律服务