分类: Python/Ruby
2021-04-06 17:18:49
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
def getdata(fpath):
'''
定义getdata 获取训练集数据
'''
#fpath = r'd:iris.xls'
object = pd.read_excel(fpath)
object = shuffle(object)
m1 = object.iloc[0:120, 0:5]
k3 = []
k4=[]
o=[]
m2 = m1.values.tolist()
k1 = object.iloc[0:120, 0:5]
k2 = k1.values.tolist()
for i in k2:
k3.append(i)
for i in k3:
o.append([i[4]])
labelset = np.array(o)
for i in k3:
k4.append(i[0:4])
dataset = np.array(k4)
return dataset, labelset,
def gettest(fpath):
'''
定义测试集数据
这里主要是取了 第120-150行的数据(前面已经使用shuffle函数进行打乱顺序)
'''
#fpath = r'd:iris.xls'
object = pd.read_excel(fpath)
object = shuffle(object)
m1 = object.iloc[120:150, 0:5]
m2 = m1.values.tolist()
k1 = object.iloc[120:150, 0:5]
k2 = k1.values.tolist()
k3 = []
k4 = []
for i in k2:
k3.append(i)
o = []
for i in k3:
o.append([i[4]])
labeltrain = np.array(o)
for i in k3:
k4.append(i[0:4])
datatrain = np.array(k4)
print(datatrain,labeltrain)
return datatrain,labeltrain
def prime_time(x, y, z):
'''
x,y,z主要是input层 hide层 output层 的神经元个数
这里主要提供的是各个层的随机权重值
'''
value1 = np.random.randint(-5, 5, (1, y)).astype(np.float64)
value2 = np.random.randint(-5, 5, (1, z)).astype(np.float64)
weight1 = np.random.randint(-5, 5, (x, y)).astype(np.float64)
weight2 = np.random.randint(-5, 5, (y, z)).astype(np.float64)
return weight1, weight2, value1, value2
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def trainning_process(dataset, labelset, weight1, weight2, value1, value2):
'''
这个过程是训练过程
主要对应的内容是 对e和w值利用链式法则求偏导数 然后利用损失函数对权重和偏执进行不断修正
'''
learning_rate = 0.01#学习率
for i in range(len(dataset)):
# 输入数据
inputset = np.mat(dataset[i]).astype(np.float64)
# 数据标签
outputset = np.mat(labelset[i]).astype(np.float64)
# 隐层输入
input1 = np.dot(inputset, weight1).astype(np.float64)
# 隐层输出
output2 = sigmoid(input1 - value1).astype(np.float64)
# 输出层输入
input2 = np.dot(output2, weight2).astype(np.float64)
# 输出层输出
output3 = sigmoid(input2 - value2).astype(np.float64)
update_b = np.multiply(output3, 1 - output3)
update_a = np.multiply(update_b, outputset - output3)
update_c = np.dot(update_a, np.transpose(weight2))
update_d = np.multiply(output2, 1 - output2)
update_e = np.multiply(update_c, update_d)
value1_change = -learning_rate * update_e
value2_change = -learning_rate * update_a
weight1_change = learning_rate* np.dot(np.transpose(inputset), update_e )
weight2_change = learning_rate * np.dot(np.transpose(output2), update_a)
value1 += value1_change
value2 += value2_change
weight1 += weight1_change
weight2 += weight2_change
return weight1, weight2, value1, value2
def testing(dataset, labelset, weight1, weight2, value1, value2):
'''
这是测试的过程
返回的是一个记数器 只要predict - true_value的绝对值<0.04 我们就算预测成功
'''
rightcount = 0
for i in range(len(dataset)):#这里是训练权重和偏执的过程
inputset = np.mat(dataset[i]).astype(np.float64)
outputset = np.mat(labelset[i]).astype(np.float64)
output2 = sigmoid(np.dot(inputset, weight1) - value1)
output3 = sigmoid(np.dot(output2, weight2) - value2)
print(output3)
# 确定其预测标签
if abs(output3[0][0]-labelset[i][0]) <0.04:
rightcount =rightcount+1
# 输出预测结果
result = rightcount/len(labelset)
print("Acurracy ={:.4f} ".format(result))
if __name__ == '__main__':
dataset, labelset = getdata( r'd:iri1.xls')
weight1, weight2, value1, value2 = prime_time(len(dataset[0]), len(dataset[0]), 1)
datatrain,labeltrain = gettest(r'd:iri1.xls')
for i in range(20000):
weight1, weight2, value1, value2 = trainning_process(dataset, labelset, weight1, weight2, value1, value2)
rate = testing(datatrain, labeltrain, weight1, weight2, value1, value2)
```python
在这里插入代码片
```![在这里插入图片描述](https://img-blog.csdnimg.cn/20210405133809860.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl81MjMyODY3OA==,size_16,color_FFFFFF,t_70#pic_center)
最后,iris数据集有需要的童鞋可以私信我哦,sklearn库里面也可以调用。
我这里主要用的是pandas来导入exel表的数据,训练结果还算不错,有准确率有93.1%-97.0%