Chinaunix首页 | 论坛 | 博客
  • 博客访问: 2634914
  • 博文数量: 100
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 925
  • 用 户 组: 普通用户
  • 注册时间: 2019-10-28 13:40
文章分类

全部博文(100)

文章存档

2021年(28)

2020年(50)

2019年(22)

我的朋友

分类: Python/Ruby

2021-04-06 17:18:49

import numpy as np

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.utils import shuffle

def getdata(fpath):

    '''

    定义getdata 获取训练集数据

    '''

    #fpath = r'd:iris.xls'

    object = pd.read_excel(fpath)

    object = shuffle(object)

    m1 = object.iloc[0:120, 0:5]

    k3 = []

    k4=[]

    o=[]

    m2 = m1.values.tolist()

    k1 = object.iloc[0:120, 0:5]

    k2 = k1.values.tolist()

    for i in k2:

        k3.append(i)

    for i in k3:

        o.append([i[4]])

    labelset = np.array(o)

    for i in k3:

        k4.append(i[0:4])

    dataset = np.array(k4)

    return dataset, labelset,

def gettest(fpath):

    '''

    定义测试集数据

    这里主要是取了 120-150行的数据(前面已经使用shuffle函数进行打乱顺序)

    '''

    #fpath = r'd:iris.xls'

    object = pd.read_excel(fpath)

    object = shuffle(object)

    m1 = object.iloc[120:150, 0:5]

    m2 = m1.values.tolist()

    k1 = object.iloc[120:150, 0:5]

    k2 = k1.values.tolist()

    k3 = []

    k4 = []

    for i in k2:

        k3.append(i)

    o = []

    for i in k3:

        o.append([i[4]])

    labeltrain = np.array(o)

    for i in k3:

        k4.append(i[0:4])

    datatrain = np.array(k4)

    print(datatrain,labeltrain)

    return datatrain,labeltrain

def prime_time(x, y, z):

    '''

    x,y,z主要是inputhideoutput层 的神经元个数

    这里主要提供的是各个层的随机权重值

    '''

    value1 = np.random.randint(-5, 5, (1, y)).astype(np.float64)

    value2 = np.random.randint(-5, 5, (1, z)).astype(np.float64)

    weight1 = np.random.randint(-5, 5, (x, y)).astype(np.float64)

    weight2 = np.random.randint(-5, 5, (y, z)).astype(np.float64)

    return weight1, weight2, value1, value2

def sigmoid(z):

    return 1 / (1 + np.exp(-z))

def trainning_process(dataset, labelset, weight1, weight2, value1, value2):

    '''

    这个过程是训练过程

    主要对应的内容是 ew值利用链式法则求偏导数 然后利用损失函数对权重和偏执进行不断修正

    '''

    learning_rate = 0.01#学习率

    for i in range(len(dataset)):

        # 输入数据

        inputset = np.mat(dataset[i]).astype(np.float64)

        # 数据标签

        outputset = np.mat(labelset[i]).astype(np.float64)

        # 隐层输入

        input1 = np.dot(inputset, weight1).astype(np.float64)

        # 隐层输出

        output2 = sigmoid(input1 - value1).astype(np.float64)

        # 输出层输入

        input2 = np.dot(output2, weight2).astype(np.float64)

        # 输出层输出

        output3 = sigmoid(input2 - value2).astype(np.float64)

        update_b = np.multiply(output3, 1 - output3)

        update_a = np.multiply(update_b, outputset - output3)

        update_c = np.dot(update_a, np.transpose(weight2))

        update_d = np.multiply(output2, 1 - output2)

        update_e = np.multiply(update_c, update_d)

        value1_change = -learning_rate * update_e

        value2_change = -learning_rate * update_a

        weight1_change = learning_rate* np.dot(np.transpose(inputset), update_e )

        weight2_change = learning_rate * np.dot(np.transpose(output2), update_a)

        value1 += value1_change

        value2 += value2_change

        weight1 += weight1_change

        weight2 += weight2_change

    return weight1, weight2, value1, value2

def testing(dataset, labelset, weight1, weight2, value1, value2):

    '''

    这是测试的过程

    返回的是一个记数器 只要predict - true_value的绝对值<0.04 我们就算预测成功

    '''

    rightcount = 0

    for i in range(len(dataset)):#这里是训练权重和偏执的过程

        inputset = np.mat(dataset[i]).astype(np.float64)

        outputset = np.mat(labelset[i]).astype(np.float64)

        output2 = sigmoid(np.dot(inputset, weight1) - value1)

        output3 = sigmoid(np.dot(output2, weight2) - value2)

        print(output3)

        # 确定其预测标签

        if abs(output3[0][0]-labelset[i][0]) <0.04:

            rightcount =rightcount+1

        # 输出预测结果

    result = rightcount/len(labelset)

    print("Acurracy ={:.4f} ".format(result))

if __name__ == '__main__':

    dataset, labelset = getdata( r'd:iri1.xls')

    weight1, weight2, value1, value2 = prime_time(len(dataset[0]), len(dataset[0]), 1)

    datatrain,labeltrain = gettest(r'd:iri1.xls')

    for i in range(20000):

        weight1, weight2, value1, value2 = trainning_process(dataset, labelset, weight1, weight2, value1, value2)

    rate = testing(datatrain, labeltrain, weight1, weight2, value1, value2)

```python

在这里插入代码片

```![在这里插入图片描述](https://img-blog.csdnimg.cn/20210405133809860.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl81MjMyODY3OA==,size_16,color_FFFFFF,t_70#pic_center)

最后,iris数据集有需要的童鞋可以私信我哦,sklearn库里面也可以调用。

我这里主要用的是pandas来导入exel表的数据,训练结果还算不错,有准确率有93.1%-97.0%

阅读(26) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~