一个简单的强化学习实现案列-基于学习自动机的链路预测模型

malakashi 发布于2019-07-31 10:13 / 1578人阅读

摘要：强化学习强化学习英语，简称是机器学习中的一个领域，强调如何基于环境而行动，以取得最大化的预期利益。在强化学习的领域中，学习自动机的特征是马可夫决策过程。

强化学习

强化学习（英语：Reinforcement learning，简称RL）是机器学习中的一个领域，强调如何基于环境而行动，以取得最大化的预期利益。其灵感来源于心理学中的行为主义理论，即有机体如何在环境给予的奖励或惩罚的刺激下，逐步形成对刺激的预期，产生能获得最大利益的习惯性行为。这个方法具有普适性，因此在其他许多领域都有研究，例如博弈论、控制论、运筹学、信息论、仿真优化、多主体系统学习、群体智能、统计学以及遗传算法。在运筹学和控制理论研究的语境下，强化学习被称作“近似动态规划”（approximate dynamic programming，ADP）。在最优控制理论中也有研究这个问题，虽然大部分的研究是关于最优解的存在和特性，并非是学习或者近似方面。在经济学和博弈论中，强化学习被用来解释在有限理性的条件下如何出现平衡。

在机器学习问题中，环境通常被规范为马可夫决策过程（MDP），所以许多强化学习算法在这种情况下使用动态规划技巧。传统的技术和强化学习算法的主要区别是，后者不需要关于MDP的知识，而且针对无法找到确切方法的大规模MDP。

强化学习和标准的监督式学习之间的区别在于，它并不需要出现正确的输入/输出对，也不需要精确校正次优化的行为。强化学习更加专注于在线规划，需要在探索（在未知的领域）和遵从（现有知识）之间找到平衡。强化学习中的“探索-遵从”的交换，在多臂老虎机问题和有限MDP中研究得最多。

学习自动机

学习自动机是在一随机环境下的适应性决策产生单元，可以根据和环境重复的互动来学习最佳的动作。动作是依照特定的机率分布来决定，而系统会依采取特定行动后的环境反应来更新机率分布。

在强化学习的领域中，学习自动机的特征是马可夫决策过程。政策迭代者会直接处理π，这点其他强化学习的算法不同。另一个政策迭代者的例子是演化算法。

链路预测

网络中的链路预测(Link Prediction)是指如何通过已知的网络节点以及网络结构等信息预测网络中尚未产生连边的两个节点之间产生链接的可能性。这种预测既包含了对未知链接（exist yet unknown links）的预测也包含了对未来链接（future links）的预测。该问题的研究在理论和应用两个方面都具有重要的意义和价值。

基于学习自动机的链路预测模型实现

import numpy as np
import time
from random import choice
import pandas as pd
import os

定义计算共同邻居指标的方法 define some functions to calculate some baseline index

"""
def Cn(MatrixAdjacency):

Matrix_similarity = np.dot(MatrixAdjacency,MatrixAdjacency)
return Matrix_similarity

"""

计算Jaccard相似性指标

def Jaccavrd(MatrixAdjacency_Train):

Matrix_similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train)

deg_row = sum(MatrixAdjacency_Train)
deg_row.shape = (deg_row.shape[0],1)
deg_row_T = deg_row.T
tempdeg = deg_row + deg_row_T
temp = tempdeg - Matrix_similarity

Matrix_similarity = Matrix_similarity / temp
return Matrix_similarity

定义计算Salton指标的方法

def Salton_Cal(MatrixAdjacency_Train):

similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train)

deg_row = sum(MatrixAdjacency_Train)
deg_row.shape = (deg_row.shape[0],1)
deg_row_T = deg_row.T
tempdeg = np.dot(deg_row,deg_row_T)
temp = np.sqrt(tempdeg)

np.seterr(divide="ignore", invalid="ignore")
Matrix_similarity = np.nan_to_num(similarity / temp)

print np.isnan(Matrix_similarity) Matrix_similarity = np.nan_to_num(Matrix_similarity) print np.isnan(Matrix_similarity)

return Matrix_similarity

定义计算Katz1指标的方法

def Katz_Cal(MatrixAdjacency):

#α取值
Parameter = 0.01
Matrix_EYE = np.eye(MatrixAdjacency.shape[0])
Temp = Matrix_EYE - MatrixAdjacency * Parameter

Matrix_similarity = np.linalg.inv(Temp)

Matrix_similarity = Matrix_similarity - Matrix_EYE
return Matrix_similarity

定义计算局部路径LP相似性指标的方法

"""
def LP_Cal(MatrixAdjacency):

Matrix_similarity = np.dot(MatrixAdjacency,MatrixAdjacency)

Parameter = 0.05
Matrix_LP = np.dot(np.dot(MatrixAdjacency,MatrixAdjacency),MatrixAdjacency) * Parameter

Matrix_similarity = np.dot(Matrix_similarity,Matrix_LP)
return Matrix_similarity

"""

计算资源分配（Resource Allocation）相似性指标

def RA(MatrixAdjacency_Train):

RA_Train = sum(MatrixAdjacency_Train)
RA_Train.shape = (RA_Train.shape[0],1)
MatrixAdjacency_Train_Log = MatrixAdjacency_Train / RA_Train
MatrixAdjacency_Train_Log = np.nan_to_num(MatrixAdjacency_Train_Log)

Matrix_similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train_Log)
return Matrix_similarity

随机环境一：针对活跃性的节点对

def RandomEnviromentForActive(MatrixAdjacency,i,j):

Index = np.random.randint(1, 5)
print(Index)
global IndexName
if Index == 1:
    IndexName = "相似性指标是：Jaccard Index"
    print(IndexName)
    similarity_matrix = Jaccavrd(MatrixAdjacency)
    similarity = similarity_matrix[i,j]
elif Index == 2:
    IndexName = "相似性指标是：Salton Index"
    print(IndexName)
    similarity_matrix = Salton_Cal(MatrixAdjacency)
    similarity = similarity_matrix[i,j]
elif Index == 3:
    IndexName = "相似性指标是：Katz Index"
    print(IndexName)
    similarity_matrix = Katz_Cal(MatrixAdjacency)
    similarity = similarity_matrix[i,j]
else index == 4:
    IndexName = "相似性指标是：RA Index"
    print(IndexName)
    similarity_matrix = RA(MatrixAdjacency)
    similarity = similarity_matrix[i,j]
return similarity

随机环境二：主要针对非活跃性的节点对

def RandomEnviromentForNonActive():

Action = np.random.randint(1, 4)
if Action == 1:
    ActionName = "ID3"
    similarity_matrix = ID3_Cal(MatrixAdjacency)
    #similarity = similarity_matrix[i,j]
elif Action == 2:
    ActionName = "CART"
    similarity_matrix = Cart_Cal(MatrixAdjacency)
    #similarity = similarity_matrix[i,j]
elif Action == 3:
    ActionName = "C4.5"
    similarity_matrix = C4_Cal(MatrixAdjacency)
    #similarity = similarity_matrix[i,j]
return similarity

构建学习自动机(To Construct the agent)

def ContructionAgent(filepath,n1,n2):

f = open(filepath)
lines = f.readlines()
A = np.zeros((50, 50), dtype=float)
A_row = 0
for line in lines:
    list = line.strip("
").split(" ")
    A[A_row:] = list[0:50]
    A_row += 1

# 初始化p1和p2
a = 0.05
b = 0.01
p1 =0.5
p2 =0.5
Action = 1
# 在这里使用数字1代表选择动作‘Yes’,用2代表动作‘No’
for i in range(1):

    #         global Action
    # 相似性阈值（the threashhold_value of similarity）
    if (p1 >= p2):
        Action = 1
    else:
        Action = 2
    print("选择的动作是：" + str(Action))
    threshhold_value = 0.3
    similarity = RandomEnviromentForActive(A, n1, n2)
    # p1表示动作1"Yes"被选择的概率，p2表示动作2"No"被选择的概率
    # 前一次选择的动作是‘Yes’，并且该动作得到了奖励
    if (similarity > threshhold_value) and (Action == 1):
        p1 = p1 + a * (1 - p1)
        p2 = 1-p1
       # p2 = (1 - a) * p2
    # 前一次选择的动作是"No",并且该动作得到了奖励
    elif (similarity < threshhold_value) and (Action == 2):
        p2 = (1-a)*p2
        p1 = 1-p2
       # p1 = (1 - a) * p1
    # 前一次选择的动作是‘Yes’，但该动作得到了惩罚
    elif (similarity < threshhold_value) and (Action == 1):
        p2 = 1-b*p2
        p1 = 1-p2
        #p2 = 1 - b * p2

    # 前一次选择的动作是‘No’，但该动作得到了惩罚
    elif (similarity > threshhold_value) and (Action == 2):
        p1 = b + (1 - b) * (1 - p1)
        p2 = 1-p1
       # p1 = 1 - b * p1

if (p1 >= p2):
    print("下一时刻选择的动作是:Yes")
else:
    print("下一时刻选择的动作是:No")
return p1, p2



import os

import pandas as pd
import numpy as np
path=r"../Data/itcmatrixs/36000/"
result = np.zeros((50, 50))
for i in os.walk(path):

#print(i)
#print(type(i))
for m in range(50):
    for n in range(50):
        r = None
        for j in range(26):
            datapath = path+i[2][j]
            p1,p2 = ContructionAgent(datapath,m,n)
            r = int(p1>=p2)
        result[m,n] = r;

r.save("result.npy")
pass

附注

有需要源码和数据集的请发送信息到280815640@qq.com，感谢您的关注。

GPU云服务器云服务器基于机器学习的简单的深度学习基于深度学习的语音增强基于深度学习的图像识别

文章版权归作者所有，未经允许请勿转载,若此文章存在违规行为，您可以联系管理员删除。

转载请注明本文地址：https://www.ucloud.cn/yun/43450.html

贪心学院-图神经网络高级训练营

摘要：百度网盘提取码最近一直关注贪心学院的机器学习训练营，发现这门课讲的很有深度，不仅适合职场也适合科研人员，加入行业拿到高薪仅仅是职业生涯的开始。百度网盘提取码：u6C4最近一直关注贪心学院的机器学习训练营，发现这门课讲的很有深度，不仅适合职场也适合科研人员，加入AI行业拿到高薪仅仅是职业生涯的开始。现阶段AI人才结...

番茄西红柿 2021-11-25 09:43 评论0 收藏2637
从Pix2Code到CycleGAN：2017年深度学习重大研究进展全解读

摘要：文本谷歌神经机器翻译去年，谷歌宣布上线的新模型，并详细介绍了所使用的网络架构循环神经网络。目前唇读的准确度已经超过了人类。在该技术的发展过程中，谷歌还给出了新的，它包含了大量的复杂案例。谷歌收集该数据集的目的是教神经网络画画。 1. 文本1.1 谷歌神经机器翻译去年，谷歌宣布上线 Google Translate 的新模型，并详细介绍了所使用的网络架构——循环神经网络（RNN）。关键结果：与...

kuangcaibao 2019-04-25 18:22 评论0 收藏0