作为一名python小白,导师让我用python实现论文中的算法,对于其中所要求的技术点以及如何实现算法显得一头雾水。目前python过完廖老师的python教程,正在看networkx文档。
望各位帮我解决以下问题:
1.实现算法所要求技术点
2.如何应对此类论文
3.数据挖掘方向学习建议
论文地址 : http://cjc.ict.ac.cn/online/o...
经过一周,现已初步完成,其中多出代码不够美观以及效率不高,还请指点
# _*_ coding:utf-8 _*_
# ==================================================================================
#
# Description: Influence Maximization on Multiple Social Networks
#
# ==================================================================================
import matplotlib.pyplot as plt
import networkx as nx
import heapq
#总图
G = nx.DiGraph()
def load_graph(file):
'''
加载文件为列表格式,并得到G,画出图结构
'''
#将总列表设成全局格式
global gllist
#迭代文件中每个元素
with open(file) as f:
lines = f.readlines()
mylist = [line.strip().split() for line in lines]
gllist = []
#将字符串型转换为整型
for i in mylist:
gllist.append(i[:-2]+map(lambda x: float(x), i[-2:]))
print '初始全局列表:'
print gllist
drawlist=[]
#提取二维列表mylist每行前三个元素,赋给新的列表drawlist
for i in range(len(mylist)):
drawlist.append([])
for j in range(3):
drawlist[i].append(mylist[i][j])
#将列表drawlist加载为有向加权图
G.add_weighted_edges_from(drawlist)
nx.draw(G, with_labels=True, width=1, node_color='y', edge_color='b')
plt.show()
print 'G图中所有节点:',G.nodes()
print 'G图中所有边:',G.edges()
print '\n'
def get_self_node(gllist, target=None):
'''
获取目标节点的自传播节点,返回selflist并包含目标节点
'''
#初始化自传播节点列表
selflist = [target]
#存放已传播节点列表
haslist = []
flag = 0
while (flag != 0):
flag = 0
for target in selflist:
if target not in haslist:
for i in range(len(gllist)):
#判断二维列表中,每行第三个元素是否为1,若为1,则为自传播节点
if ((gllist[i][0] == target)or(gllist[i][1]==target))and(gllist[i][3]==1.0):
if gllist[i][0] == target:
if gllist[i][1] not in haslist:
selflist.append(gllist[i][1])
haslist.append(gllist[i][1])
flag += 1
else:
if gllist[i][0] not in haslist:
selflist.append(gllist[i][0])
haslist.append(gllist[i][0])
flag += 1
#去除重复元素
haslist = set(haslist)
selflist = set(selflist)
#去除重复元素
selflist = set(selflist)
return selflist
def longest_path(gllist,source=None,target=None):
'''
获取起始点到实体的最大路径集合,返回为longestpath列表
'''
longestpath = []
newlist = []
for i in range(len(gllist)):
newlist.append([])
for j in range(3):
newlist[i].append(gllist[i][j])
#构建图结构
G1 = nx.DiGraph()
#添加带权有向边
G1.add_weighted_edges_from(newlist)
#获取目标节点的所有自传播街边,并存入selflist中
selflist = get_self_node(gllist, target)
max_path = 0
val_path = 1
#获取初始节点到目标节点及目标节点的自传播节点的最大路径
for v in selflist:
if v != source:
#遍历两点之间所有路径,并进行比对
for path in nx.all_simple_paths(G1,source=source,target=v):
#判断路径后两个元素是否为相同实体(如:b1->b2)
if is_self_transmit_node(path[-2], v) == 0:
for i in range(0, len(path)-1):
val_path *= G1.get_edge_data(path[i], path[i+1])['weight']
if max_path < val_path:
max_path = val_path
val_path = 1
#若目标节点为起始节点则直接跳出
else: continue
test.txt
a1 b1 0.2 0
a1 c1 0.8 0
a2 b2 0.4 0
a2 d2 1 0
b1 c1 0.7 0
c2 a2 0.8 0
d2 b2 0.6 0
a1 a2 1 1
a2 a1 0.1 1
....
a1 l1 0.5 0
a1 m1 0.5 0
a1 q1 0.5 0
a1 v1 0.5 0
a1 z1 0.5 0
a1 s1 0.5 0
a1 w1 0.5 0
a1 u1 0.5 0
其中前两列为传播实体,第三列为实体间传播概率,最后一列为0代表同一网络传播,为1代表网络间自传播。
下来要进行优化:
1.采用独立级联模型,设置阈值
2.将最大路径改为最短路径,利用log