python实现的NMF的多图聚类算法
python实现的基于NMF的多图聚类算法的封装
代码均由论文复现
前言
怕忘记干了这个事,后面再来补充说明
参考论文
Identification of multi-layer networks community by fusing nonnegative matrix factorization and topological structural information
Graph Learning for Multiview Clustering
一、NMF
代码
# -*- coding: utf-8 -*-
import os
import sys
import numpy as np
import networkx as nx
import sklearn.preprocessing as nor
def graphsToMatrix(graphs):
"""
## 返回图的临接矩阵表示
"""
if isinstance(graphs, list):
return [np.asarray(nx.adjacency_matrix(g,weight='weight').todense(),dtype='float32') for g in graphs]
else:
return np.asarray(nx.adjacency_matrix(graphs,weight='weight').todense(),dtype='float32')
def getRandomData(xNum,yNum):
"""
# 获取随机的数据
"""
return np.random.rand(xNum,yNum)
class NMF(object):
# 最小分母
minTrueNum = 10 ** -8
maxIterNum = 1000
alpha = 10 ** -2
# 聚类的数量
k = 4
# 节点的数量
nodesNum = 0
layerNum = 0
# 图数据的存储
graphs = []
# 图的临接矩阵表示
wItems = []
#相似度矩阵
sItems = []
# 左侧的综合矩阵
B = []
# 右侧的系数矩阵
fItems = []
# 错误的分数
j = [100 ** 4]
def __init__(self, graphs, k):
self.graphs = graphs
self.wItems = graphsToMatrix(graphs)
self.sItems = [self.simplify(matrix) for matrix in self.wItems]
# self.wItems = self.sItems
self.k = k
self.nodesNum = graphs[0].number_of_nodes()
self.layerNum = len(graphs)
self.__initialize()
def __initialize(self):
self.B = getRandomData(self.nodesNum, self.k)
self.fItems = [getRandomData(self.k,self.nodesNum) for i in range(self.layerNum)]
def calCulateErr(self):
"""
## 计算本轮的误差值
"""
itemErr = sum([np.sum((w - self.B @k) ** 2) for w,k in zip(self.wItems,self.fItems)])
self.j.append(itemErr)
return itemErr
def updateB(self):
"""
## 迭代更新B矩阵
"""
molecule = sum([w @ f.T for w,f in zip(self.wItems,self.fItems)])
denominator = self.B @ sum([f @ f.T for f in self.fItems])
self.B = self.B * (molecule / np.maximum(denominator, self.minTrueNum))
def updateK(self):
"""
## 更新右侧的系数矩阵
"""
molecules = [self.B.T @ w for w in self.wItems]
denominators = [self.B.T @ self.B @ f for f in self.fItems]
self.fItems = [f * (k1 / np.maximum(k2, self.minTrueNum)) for k1,k2,f in zip(molecules, denominators,self.fItems)]
# self.fItems = [f * ((self.B.T @ w) / np.maximum(self.B.T @ self.B @ f , self.minTrueNum))
# for f,w in zip(self.fItems, self.wItems)]
def simplify(self, w):
"""
# 计算获得相似度矩阵
"""
cs = nor.normalize(w,axis=0,norm="l2")
re = cs.T @cs
np.fill_diagonal(re, 1)
return re
def fit(self):
"""
## 运行进行迭代更新
"""
for i in range(self.maxIterNum):
self.updateB()
self.updateK()
errRate = self.calCulateErr()
# print(f"iter:: {i} err -> {errRate}")
if errRate < self.alpha:
print(f"end iter in {i} 轮 -> errRate::{errRate}")
def getClusterLabel(self):
"""
## 获得得到的分类标签
"""
labels = np.argmax(self.B, 1)
return labels.tolist()
# from tools import getGraphData
# def runTest():
# graphs = getGraphData("CELE")
# m = NMF(graphs, 5)
# m.fit()
# print(m.getClusterLabel())
if __name__ == '__main__':
# runTest()
pass
二、Mjnmf
代码
# -*- coding: utf-8 -*-
from logging import error
from numpy.linalg import inv
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from sklearn import metrics
import scipy.sparse as sp
import sklearn.preprocessing as nor
from copy import deepcopy
from scipy.io import loadmat
from munkres import Munkres
from sklearn.cluster import KMeans
import networkx as nx
# import loadData
def getRandomData(xNum,yNum):
"""
# 获取随机的数据
"""
return np.random.rand(xNum,yNum)
def read_adj(filename):
"""
读取txt文件获取每一重图中的信息
return :
``graphs`` -> 代表每重图对应的邻接矩阵的表示
``nodes`` -> 每一重图中的点
``layers`` -> 不同的layer对应的id
"""
df = pd.read_csv(filename, header=None, sep=' ', names=['layerID', 'source', 'target', 'weight'], dtype='int32')
layers = list(set(df['layerID'].tolist()))
graphs = []
nodes = []
for i in layers:
g = nx.from_pandas_edgelist(df[df['layerID'] == i], source='source', target='target',edge_attr='weight')
nodelist=sorted(g.nodes())
L = np.asarray(nx.adjacency_matrix(g, nodelist=nodelist,weight='weight').todense(),dtype='float32')
graphs.append(L)
nodes.append(nodelist)
return graphs, nodes,layers
class Mjnmf(object):
"""
# Mjnmf 方法的封装
"""
lower_control = 10 ** -10
nodeNum = 0
layerNum = 3
l = 4
theta = 0.1
beta = 0.1
alpha = 1
lamda = 10
iterNum = 700
errorradio = 10 ** -6
k = 4
dataSet = []
WItems =[]
fItems = []
h = []
b = []
wGroups = []
uItems = []
sItems = []
errItems = []
e = [10 **7]
def __init__(self,dataSet, k = 4, layerNum =None , nodeNum = None):
"""
# 设置初始化的数据
"""
self.dataSet = dataSet
if layerNum == None: self.layerNum = dataSet.shape[0]
else : self.layerNum = layerNum
if nodeNum == None: self.nodeNum = dataSet[0].shape[1]
else :self.nodeNum = nodeNum
print(f"初始化 layerNum{self.layerNum} :: nodeNum{self.nodeNum}")
self.k = k
self.WItems = [self.constructkernal(item) for item in dataSet ]
self.__runInitial()
def __runInitial(self):
self.fItems = [getRandomData(self.nodeNum,self.k) for i in range(self.nodeNum) ]
self.h = getRandomData(self.nodeNum,self.k)
self.b = getRandomData(self.nodeNum,self.k)
self.wGroups = [self.high_order(WItem) for WItem in self.WItems]
self.sItems = [self.similarity(adj) for adj in self.dataSet]
for index,item in enumerate(self.sItems):
np.fill_diagonal(self.sItems[index],1)
self.uItems = [self.cal_sim(sItem) for sItem in self.sItems]
self.wGroups = [self.cal_sim(wItem) for wItem in self.wGroups]
def caldistance(self,adj):
"""
## 计算获得距离矩阵
"""
'''numberoffeature = (adj.shape)[0]'''
pingfanghe = np.sum(adj ** 2,0)
jiaocha = adj.T @ adj
distance = np.sqrt(pingfanghe.reshape(self.nodeNum,1) pingfanghe.reshape(1,self.nodeNum)- 2 * jiaocha)
return distance
def constructkernal(self,adjcency):
"""初始化得到 W 对应的矩阵"""
dis = self.caldistance(adjcency)
sig = np.median(dis)
'''sig = 1'''
fenzi = dis ** 2
fenmu = max(2 *(sig ** 2), self.lower_control)
wkernal = np.exp(-fenzi / fenmu)
np.fill_diagonal(wkernal,0)
return wkernal
def high_order(self,W):
P = np.zeros((self.nodeNum,self.nodeNum))
temp = 1
for i in range(self.l):
if i == 0:
A = deepcopy(W)
else:
A = A @ W
temp *= (i 1)
P = ((A * (self.theta ** i)) / temp)
return P
def similarity(self,W):
cs = nor.normalize(W,axis=0,norm="l2")
re = cs.T @cs
return re
def cal_sim(self,s):
"""
对矩阵进行归一化处理
"""
re = nor.normalize(s,axis=1,norm="l1")
return re
def b_update(self,B):
fenzi = sum([w @ f for w,f in zip(self.wGroups, self.fItems)])
fenmu = B @ sum([f.T @ f for f in self.fItems])
B = B * (fenzi / np.maximum(fenmu, self.lower_control))
return B
def f_update(self,w,u,f):
fenzi = w.T @ self.b self.beta * (u @ self.h)
fenmu = f @ (self.b.T @ self.b) self.beta * f
'''f = f * np.sqrt(fenzi / np.maximum(fenmu, lower_control))'''
f = f * (fenzi / np.maximum(fenmu, self.lower_control))
return f
def h_update(self,H):
uuh = 2 * self.beta * sum([u.T@(u @ H) for u in self.uItems])
sh = 4 * self.alpha * sum([s.T@ H for s in self.sItems])
uf = 2 * self.beta * sum([u.T @ f for u,f in zip(self.uItems,self.fItems)])
hhh = 8 * (self.layerNum * self.alpha self.lamda) * (H @ (H.T @ H))
fenzi = -uuh np.sqrt((uuh * uuh) (2*hhh)* (sh uf 4 * self.lamda * H))
H = H * np.sqrt(fenzi / np.maximum(hhh, self.lower_control))
return H
def u_update(self,u,f):
fenzi = f @ (self.h.T)
fenmu = u @ self.h @ (self.h.T)
u = u * (fenzi / np.maximum(fenmu, self.lower_control))
return u
def error(self,w,f,s,u):
e1 = np.sum((w - self.b @ f.T)**2)
e2 = self.alpha * np.sum((s - self.h @ self.h.T) ** 2)
e3 = self.beta * np.sum((f - u @ self.h)**2)
'''e4 = lamda * np.linalg.norm(h.T @ h - np.eye(k),2)'''
err = e1 e2 e3
return err
def runIter(self):
"""
# 运行进行迭代
"""
for i in range(self.iterNum):
self.b= self.b_update(self.b)
self.fItems = [ self.f_update(w,u,f) for w,u,f in zip(self.wGroups,self.uItems, self.fItems)]
self.uItems = [self.u_update(u,f) for u,f in zip(self.uItems,self.fItems)]
self.h = self.h_update(self.h)
self.errItems = [self.error(w,f,s,u) for w,f,s,u in zip(self.wGroups,self.fItems, self.sItems, self.uItems)]
self.e.append(sum(self.errItems))
# print(f"iter::{i} -> {self.e[-1]}")
if abs(self.e[-1] - self.e[-2]) / self.e[-1] <= self.errorradio:
print(f"结束于 {i}")
break
def getRes(self):
"""
# 获得聚类的结果
"""
ans = np.argmax(self.h, 1)
return ans
def dataTrans(fileName):
graphs_adj, nodesF, _ = read_adj('./DataSet/' fileName '.txt')
return graphs_adj, nodesF
def main(fileName:str,clusterNum:int):
"""
:params
:``fileName`` 对应的文件名
:``clusterNum`` 对应的文件名
"""
data, nodes = dataTrans(fileName)
d = np.array(data)
m = Mjnmf(d, k = clusterNum)
m.runIter()
return m.getRes().tolist(), nodes[0]
if __name__ == '__main__':
print(main("CELE"))
数据
网盘
提取码:es4o
总结
有时间来补充非负矩阵分解的知识
这篇好文章是转载于:学新通技术网
- 版权申明: 本站部分内容来自互联网,仅供学习及演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,请提供相关证据及您的身份证明,我们将在收到邮件后48小时内删除。
- 本站站名: 学新通技术网
- 本文地址: /boutique/detail/tanhghfjgf
系列文章
更多
-
photoshop保存的图片太大微信发不了怎么办
PHP中文网 06-15 -
《学习通》视频自动暂停处理方法
HelloWorld317 07-05 -
word里面弄一个表格后上面的标题会跑到下面怎么办
PHP中文网 06-20 -
Android 11 保存文件到外部存储,并分享文件
Luke 10-12 -
photoshop扩展功能面板显示灰色怎么办
PHP中文网 06-14 -
微信公众号没有声音提示怎么办
PHP中文网 03-31 -
excel下划线不显示怎么办
PHP中文网 06-23 -
excel打印预览压线压字怎么办
PHP中文网 06-22 -
TikTok加速器哪个好免费的TK加速器推荐
TK小达人 10-01 -
怎样阻止微信小程序自动打开
PHP中文网 06-13