Skip to content

Commit

Permalink
Chance functions to class
Browse files Browse the repository at this point in the history
Chance functions to class.
  • Loading branch information
xhuang31 committed Apr 17, 2018
1 parent 0908bf6 commit eb2a9e3
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 115 deletions.
182 changes: 93 additions & 89 deletions AANE_fun.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
def AANE_fun(Net, Attri, d, *varargs):
import numpy as np
from scipy import sparse
from scipy.sparse import csc_matrix
from scipy.sparse.linalg import svds
from math import ceil

class AANE:
"""Jointly embed Net and Attri into embedding representation H
H = AANE_fun(Net,Attri,d)
H = AANE_fun(Net,Attri,d,lambd,rho)
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter)
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter,'Att')
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter,'Att',splitnum)
H = AANE_fun(Net,Attri,d)
H = AANE_fun(Net,Attri,d,lambd,rho)
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter)
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter,'Att')
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter,'Att',splitnum)
:param Net: the weighted adjacency matrix
:param Attri: the attribute information matrix with row denotes nodes
:param d: the dimension of the embedding representation
Expand All @@ -17,96 +23,94 @@ def AANE_fun(Net, Attri, d, *varargs):
Copyright 2017 & 2018, Xiao Huang and Jundong Li.
$Revision: 1.0.2 $ $Date: 2018/02/19 00:00:00 $
"""
import numpy as np
from scipy import sparse
from scipy.sparse import csc_matrix
from scipy.sparse.linalg import svds
from math import ceil
'''################# Parameters #################'''
global affi, sa, H, Z
maxiter = 2 # Max num of iteration
[n, m] = Attri.shape # n = Total num of nodes, m = attribute category num
Net = sparse.lil_matrix(Net)
Net.setdiag(np.zeros(n))
Net = csc_matrix(Net)
Attri = csc_matrix(Attri)
lambd = 0.05 # Initial regularization parameter
rho = 5 # Initial penalty parameter
splitnum = 1 # number of pieces we split the SA for limited cache
if len(varargs) >= 4 and varargs[3] == 'Att':
sumcol = np.arange(m)
np.random.shuffle(sumcol)
H = svds(Attri[:, sumcol[0:min(10 * d, m)]], d)[0]
else:
sumcol = Net.sum(0)
H = svds(Net[:, sorted(range(n), key=lambda k: sumcol[0, k], reverse=True)[0:min(10 * d, n)]], d)[0]
def __init__(self, Net, Attri, d, *varargs):
self.maxiter = 2 # Max num of iteration
[self.n, m] = Attri.shape # n = Total num of nodes, m = attribute category num
Net = sparse.lil_matrix(Net)
Net.setdiag(np.zeros(self.n))
Net = csc_matrix(Net)
Attri = csc_matrix(Attri)
self.lambd = 0.05 # Initial regularization parameter
self.rho = 5 # Initial penalty parameter
splitnum = 1 # number of pieces we split the SA for limited cache
if len(varargs) >= 4 and varargs[3] == 'Att':
sumcol = np.arange(m)
np.random.shuffle(sumcol)
self.H = svds(Attri[:, sumcol[0:min(10 * d, m)]], d)[0]
else:
sumcol = Net.sum(0)
self.H = svds(Net[:, sorted(range(self.n), key=lambda k: sumcol[0, k], reverse=True)[0:min(10 * d, self.n)]], d)[0]

if len(varargs) > 0:
self.lambd = varargs[0]
self.rho = varargs[1]
if len(varargs) >= 3:
self.maxiter = varargs[2]
if len(varargs) >= 5:
splitnum = varargs[4]
self.block = min(int(ceil(float(self.n) / splitnum)), 7575) # Treat at least each 7575 nodes as a block
self.splitnum = int(ceil(float(self.n) / self.block))
with np.errstate(divide='ignore'): # inf will be ignored
self.Attri = Attri.transpose() * sparse.diags(np.ravel(np.power(Attri.power(2).sum(1), -0.5)))
self.Z = self.H.copy()
self.affi = -1 # Index for affinity matrix sa
self.U = np.zeros((self.n, d))
self.nexidx = np.split(Net.indices, Net.indptr[1:-1])
self.Net = np.split(Net.data, Net.indptr[1:-1])
self.d = d


if len(varargs) > 0:
lambd = varargs[0]
rho = varargs[1]
if len(varargs) >= 3:
maxiter = varargs[2]
if len(varargs) >=5:
splitnum = varargs[4]
block = min(int(ceil(float(n) / splitnum)), 7575) # Treat at least each 7575 nodes as a block
splitnum = int(ceil(float(n) / block))
with np.errstate(divide='ignore'): # inf will be ignored
Attri = Attri.transpose() * sparse.diags(np.ravel(np.power(Attri.power(2).sum(1), -0.5)))
Z = H.copy()
affi = -1 # Index for affinity matrix sa
U = np.zeros((n, d))
nexidx = np.split(Net.indices, Net.indptr[1:-1])
Net = np.split(Net.data, Net.indptr[1:-1])
'''################# Update functions #################'''
def updateH():
global affi, sa, H
xtx = np.dot(Z.transpose(), Z) * 2 + rho * np.eye(d)
for blocki in range(splitnum): # Split nodes into different Blocks
indexblock = block * blocki # Index for splitting blocks
if affi != blocki:
sa = Attri[:, range(indexblock, indexblock + min(n - indexblock, block))].transpose() * Attri
affi = blocki
sums = sa.dot(Z) * 2
for i in range(indexblock, indexblock + min(n - indexblock, block)):
neighbor = Z[nexidx[i], :] # the set of adjacent nodes of node i
def updateH(self):
xtx = np.dot(self.Z.transpose(), self.Z) * 2 + self.rho * np.eye(self.d)
for blocki in range(self.splitnum): # Split nodes into different Blocks
indexblock = self.block * blocki # Index for splitting blocks
if self.affi != blocki:
self.sa = self.Attri[:, range(indexblock, indexblock + min(self.n - indexblock, self.block))].transpose() * self.Attri
self.affi = blocki
sums = self.sa.dot(self.Z) * 2
for i in range(indexblock, indexblock + min(self.n - indexblock, self.block)):
neighbor = self.Z[self.nexidx[i], :] # the set of adjacent nodes of node i
for j in range(1):
normi_j = np.linalg.norm(neighbor - H[i, :], axis=1) # norm of h_i^k-z_j^k
normi_j = np.linalg.norm(neighbor - self.H[i, :], axis=1) # norm of h_i^k-z_j^k
nzidx = normi_j != 0 # Non-equal Index
if np.any(nzidx):
normi_j = (lambd * Net[i][nzidx]) / normi_j[nzidx]
H[i, :] = np.linalg.solve(xtx + normi_j.sum() * np.eye(d), sums[i - indexblock, :] + (
neighbor[nzidx, :] * normi_j.reshape((-1, 1))).sum(0) + rho * (
Z[i, :] - U[i, :]))
normi_j = (self.lambd * self.Net[i][nzidx]) / normi_j[nzidx]
self.H[i, :] = np.linalg.solve(xtx + normi_j.sum() * np.eye(self.d), sums[i - indexblock, :] + (
neighbor[nzidx, :] * normi_j.reshape((-1, 1))).sum(0) + self.rho * (
self.Z[i, :] - self.U[i, :]))
else:
H[i, :] = np.linalg.solve(xtx, sums[i - indexblock, :] + rho * (
Z[i, :] - U[i, :]))
def updateZ():
global affi, sa, Z
xtx = np.dot(H.transpose(), H) * 2 + rho * np.eye(d)
for blocki in range(splitnum): # Split nodes into different Blocks
indexblock = block * blocki # Index for splitting blocks
if affi != blocki:
sa = Attri[:, range(indexblock, indexblock + min(n - indexblock, block))].transpose() * Attri
affi = blocki
sums = sa.dot(H) * 2
for i in range(indexblock, indexblock + min(n - indexblock, block)):
neighbor = H[nexidx[i], :] # the set of adjacent nodes of node i
self.H[i, :] = np.linalg.solve(xtx, sums[i - indexblock, :] + self.rho * (
self.Z[i, :] - self.U[i, :]))
def updateZ(self):
xtx = np.dot(self.H.transpose(), self.H) * 2 + self.rho * np.eye(self.d)
for blocki in range(self.splitnum): # Split nodes into different Blocks
indexblock = self.block * blocki # Index for splitting blocks
if self.affi != blocki:
self.sa = self.Attri[:, range(indexblock, indexblock + min(self.n - indexblock, self.block))].transpose() * self.Attri
self.affi = blocki
sums = self.sa.dot(self.H) * 2
for i in range(indexblock, indexblock + min(self.n - indexblock, self.block)):
neighbor = self.H[self.nexidx[i], :] # the set of adjacent nodes of node i
for j in range(1):
normi_j = np.linalg.norm(neighbor - Z[i, :], axis=1) # norm of h_i^k-z_j^k
normi_j = np.linalg.norm(neighbor - self.Z[i, :], axis=1) # norm of h_i^k-z_j^k
nzidx = normi_j != 0 # Non-equal Index
if np.any(nzidx):
normi_j = (lambd * Net[i][nzidx]) / normi_j[nzidx]
Z[i, :] = np.linalg.solve(xtx + normi_j.sum() * np.eye(d), sums[i - indexblock, :] + (
neighbor[nzidx, :] * normi_j.reshape((-1, 1))).sum(0) + rho * (
H[i, :] + U[i, :]))
normi_j = (self.lambd * self.Net[i][nzidx]) / normi_j[nzidx]
self.Z[i, :] = np.linalg.solve(xtx + normi_j.sum() * np.eye(self.d), sums[i - indexblock, :] + (
neighbor[nzidx, :] * normi_j.reshape((-1, 1))).sum(0) + self.rho * (
self.H[i, :] + self.U[i, :]))
else:
Z[i, :] = np.linalg.solve(xtx, sums[i - indexblock, :] + rho * (
H[i, :] + U[i, :]))
'''################# First update H #################'''
updateH()
'''################# Iterations #################'''
for iternum in range(maxiter - 1):
updateZ()
U = U + H - Z
updateH()
return H
self.Z[i, :] = np.linalg.solve(xtx, sums[i - indexblock, :] + self.rho * (
self.H[i, :] + self.U[i, :]))

def function(self):
self.updateH()
'''################# Iterations #################'''
for __ in range(self.maxiter - 1):
self.updateZ()
self.U = self.U + self.H - self.Z
self.updateH()
return self.H


43 changes: 20 additions & 23 deletions AANE_fun_distri.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,27 @@


class AANE:

"""Jointly embed Net and Attri into embedding representation H
H = AANE_fun(Net,Attri,d)
H = AANE_fun(Net,Attri,d,lambd,rho)
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter)
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter,'Att')
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter,'Att', worknum)
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter,'Att', worknum, splitnum)
:param Net: the weighted adjacency matrix
:param Attri: the attribute information matrix with row denotes nodes
:param d: the dimension of the embedding representation
:param lambd: the regularization parameter
:param rho: the penalty parameter
:param maxiter: the maximum number of iteration
:param 'Att': refers to conduct Initialization from the SVD of Attri
:param worknum: the number of worker
:param splitnum: number of pieces we split the SA for limited cache
:return: the embedding representation H
Copyright 2017 & 2018, Xiao Huang and Jundong Li.
$Revision: 1.0.3 $ $Date: 2018/04/05 00:00:00 $
"""
def __init__(self, Net, Attri, d, *varargs):
"""Jointly embed Net and Attri into embedding representation H
H = AANE_fun(Net,Attri,d)
H = AANE_fun(Net,Attri,d,lambd,rho)
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter)
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter,'Att')
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter,'Att', worknum)
H = AANE_fun(Net,Attri,d,lambd,rho,maxiter,'Att', worknum, splitnum)
:param Net: the weighted adjacency matrix
:param Attri: the attribute information matrix with row denotes nodes
:param d: the dimension of the embedding representation
:param lambd: the regularization parameter
:param rho: the penalty parameter
:param maxiter: the maximum number of iteration
:param 'Att': refers to conduct Initialization from the SVD of Attri
:param worknum: the number of worker
:param splitnum: number of pieces we split the SA for limited cache
:return: the embedding representation H
Copyright 2017 & 2018, Xiao Huang and Jundong Li.
$Revision: 1.0.3 $ $Date: 2018/04/05 00:00:00 $
"""

# shared memory
#self.output = mp.Manager().dict()

Expand All @@ -52,7 +50,6 @@ def __init__(self, Net, Attri, d, *varargs):
#self.worknum = None
#self.splitnum = None


self.maxiter = 2 # Max num of iteration
[self.n, m] = Attri.shape # n = Total num of nodes, m = attribute category num
Net = sparse.lil_matrix(Net)
Expand Down
6 changes: 3 additions & 3 deletions Runme.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
import scipy.io as sio
from AANE_fun import AANE_fun
from AANE_fun import AANE
import time


Expand Down Expand Up @@ -34,12 +34,12 @@
'''################# Accelerated Attributed Network Embedding #################'''
print("Accelerated Attributed Network Embedding (AANE), 5-fold with 100% of training is used:")
start_time = time.time()
H_AANE = AANE_fun(CombG, CombA, d, lambd, rho)
H_AANE = AANE(CombG, CombA, d, lambd, rho).function()
print("time elapsed: {:.2f}s".format(time.time() - start_time))

'''################# AANE for a Pure Network #################'''
print("AANE for a pure network:")
start_time = time.time()
H_Net = AANE_fun(CombG, CombG, d, lambd, rho)
H_Net = AANE(CombG, CombG, d, lambd, rho).function()
print("time elapsed: {:.2f}s".format(time.time() - start_time))
sio.savemat('Embedding.mat', {"H_AANE": H_AANE, "H_Net": H_Net})

0 comments on commit eb2a9e3

Please sign in to comment.