-
Notifications
You must be signed in to change notification settings - Fork 1
/
baseline.py
459 lines (349 loc) · 17 KB
/
baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
import torch
import torch.nn as nn
class GCN3(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim ): #kdropout, num_layers=2, return_embeds=False):
super(GCN3, self).__init__()
self.linear_1 = torch.nn.Linear(input_dim, hidden_dim) # 定义一个线性层
self.linear_2 = torch.nn.Linear(hidden_dim, hidden_dim) # 定义一个线性层
self.linear_3 = torch.nn.Linear(hidden_dim, output_dim) # 定义一个线性层
self.rule = torch.nn.ReLU()
def forward(self, data, device):
graph = data["graph"].to(device)[0] # [B, N, N]
adj = GCN3.get_adj(graph)
flow_x = data["flow_x"].to(device) # [B, N, H, D]
B, N = flow_x.size(0), flow_x.size(1)
flow_x = flow_x.view(B, N, -1)
# 第一个图卷积层
output_1 = self.linear_1(flow_x) # [B, N, hid_C],这个就是 WX,其中W是可学习的参数,X是输入的流量数据(就是flow_x)
output_1 = self.rule(torch.matmul(adj, output_1)) # [B, N, N] ,[B, N, hid_c],就是 \hat AWX
output_2 = self.linear_3(output_1)
output_2 = self.rule(torch.matmul(adj, output_2))
output_3 = self.linear_3(output_2)
output_3 = self.rule(torch.matmul(adj, output_3))
return output_3.unsqueeze(2) # [B, N, 1, Out_C] , 就是 \hat AWX
@staticmethod
def get_adj(graph):
N = graph.size(0)
matrix = torch.eye(N, dtype=torch.float, device=graph.device)
graph += matrix # A+I
degree = torch.sum(graph, dim=1, keepdim=False) # N
degree = degree.pow(-1) # -1 makes possibility for infinite
degree[degree == float("inf")] = 0
degree = torch.diag(degree)
return torch.mm(degree, graph) # AWX
class GCN2(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim ): #kdropout, num_layers=2, return_embeds=False):
super(GCN2, self).__init__()
self.linear_1 = torch.nn.Linear(input_dim, hidden_dim) # 定义一个线性层
self.linear_2 = torch.nn.Linear(hidden_dim, output_dim) # 定义一个线性层
self.rule = torch.nn.ReLU()
def forward(self, data, device):
graph = data["graph"].to(device)[0] # [B, N, N]
adj = GCN2.get_adj(graph)
flow_x = data["flow_x"].to(device) # [B, N, H, D]
B, N = flow_x.size(0), flow_x.size(1)
flow_x = flow_x.view(B, N, -1)
# 第一个图卷积层
output_1 = self.linear_1(flow_x) # [B, N, hid_C],这个就是 WX,其中W是可学习的参数,X是输入的流量数据(就是flow_x)
output_1 = self.rule(torch.matmul(adj, output_1)) # [B, N, N] ,[B, N, hid_c],就是 \hat AWX
output_2 = self.linear_2(output_1)
output_2 = self.rule(torch.matmul(adj, output_2))
return output_2.unsqueeze(2) # [B, N, 1, Out_C] , 就是 \hat AWX
@staticmethod
def get_adj(graph):
N = graph.size(0)
matrix = torch.eye(N, dtype=torch.float, device=graph.device)
graph += matrix # A+I
degree = torch.sum(graph, dim=1, keepdim=False) # N
degree = degree.pow(-1) # -1 makes possibility for infinite
degree[degree == float("inf")] = 0
degree = torch.diag(degree)
return torch.mm(degree, graph) # AWX
class bGCN(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim ): #kdropout, num_layers=2, return_embeds=False):
super(bGCN, self).__init__()
self.linear_1 = torch.nn.Linear(input_dim, hidden_dim) # 定义一个线性层
self.linear_2 = torch.nn.Linear(hidden_dim, output_dim) # 定义一个线性层
self.rule = torch.nn.ReLU()
def forward(self, data, device):
graph = data["graph"].to(device)[0] # [B, N, N]
adj = bGCN.get_adj(graph)
flow_x = data["flow_x"].to(device) # [B, N, H, D]
B, N = flow_x.size(0), flow_x.size(1)
flow_x = flow_x.view(B, N, -1)
# 第一个图卷积层
output_1 = self.linear_1(flow_x) # [B, N, hid_C],这个就是 WX,其中W是可学习的参数,X是输入的流量数据(就是flow_x)
output_1 = self.rule(torch.matmul(adj, output_1)) # [B, N, N] ,[B, N, hid_c],就是 \hat AWX
# output_2 = self.linear_2(output_1)
# output_2 = self.rule(torch.matmul(adj, output_2))
return output_1.unsqueeze(2) # [B, N, 1, Out_C] , 就是 \hat AWX
@staticmethod
def get_adj(graph):
N = graph.size(0)
matrix = torch.eye(N, dtype=torch.float, device=graph.device)
graph += matrix # A+I
degree = torch.sum(graph, dim=1, keepdim=False) # N
degree = degree.pow(-0.5) # -1 makes possibility for infinite
#degree[degree == float("inf")] = 0
degree[torch.isinf(degree)] = 0.0
degree = torch.diag(degree)
normalized_laplacian = (
matrix.matmul(degree).transpose(0, 1).matmul(degree)
)
return normalized_laplacian # AWX
# x * g_/theta = U_g_/theta U^T x = /sum{k=0, K} /alpha_k T_k (L) x
class ChebConv(torch.nn.Module):
"""
THE ChebNet convolution operation
input_dim: int
output_dim: int
"""
def __init__(self, input_dim, output_dim, K, bias=True, normalize=True):
super(ChebConv, self).__init__()
self.normalize = normalize
self.weight = torch.nn.Parameter(torch.Tensor(K + 1, 1, input_dim, output_dim)) # /alpha_k [K+1, 1, in, out]
torch.nn.init.xavier_normal_(self.weight) # /alpha_k
if bias:
self.bias = torch.nn.Parameter(torch.Tensor(1, 1, output_dim))
torch.nn.init.zeros_(self.bias) # filling with 0
else:
self.register_parameter("bias", None)
self.K = K + 1
def forward(self, inputs, A):
"""
:param inputs: [B, N, C]
:param graph: adj matrix [N, N]
:return: [B, N, D]
"""
L = ChebConv.get_laplacian(A, self.normalize)
mul_L = self.cheb_polynomial(L).unsqueeze(1) # [K, 1, N, N],
result = torch.matmul(mul_L, inputs) # [K, B, N, C]
result = torch.matmul(result, self.weight) # [K, B, N, D]
result = torch.sum(result, dim=0) + self.bias # [B, N, D]
return result
def cheb_polynomial(self, laplacian):
"""
:param laplacian: [N, N]
:return: [K, N, N]
"""
N = laplacian.size(0)
multi_order_laplacian = torch.zeros([self.K, N, N], device=laplacian.device, dtype=torch.float) # [K, N, N]
multi_order_laplacian[0] = torch.eye(N, device=laplacian.device, dtype=torch.float)
if self.K == 1:
return multi_order_laplacian
else:
multi_order_laplacian[1] = laplacian
if self.K == 2:
return multi_order_laplacian
else:
for k in range(2, self.K):
multi_order_laplacian[k] = 2 * torch.mm(laplacian, multi_order_laplacian[k - 1]) - \
multi_order_laplacian[k - 2]
return multi_order_laplacian
@staticmethod
def get_laplacian(A, normalize): # 计算拉普拉斯矩阵
"""
return the laplacian of the graph.
:param graph: the graph structure without self loop, [N, N].
:param normalize: whether to used the normalized laplacian.
:return: graph laplacian.
"""
if normalize:
D = torch.diag(torch.sum(A, dim=-1) ** (-1 / 2)) # 这里的graph就是邻接矩阵,这个D
L = torch.eye(A.size(0), device=A.device, dtype=A.dtype) - torch.mm(torch.mm(D, A),
D) # L = I - D * A * D,这个也就是正则化
else:
D = torch.diag(torch.sum(A, dim=-1))
L = D - A
return L
class ChebNet(torch.nn.Module): # 定义图网络的类
def __init__(self, input_dim, hidden_dim, output_dim, K):
"""
:param in_c: int, number of input channels.
:param hid_c: int, number of hidden channels.class
:param out_c: int, number of output channels.
:param K:
"""
super(ChebNet, self).__init__()
self.conv1 = ChebConv(input_dim=input_dim, output_dim=hidden_dim, K=K) # 第一个图卷积层
self.conv2 = ChebConv(input_dim=hidden_dim, output_dim=output_dim, K=K) # 第二个图卷积层
self.rule = torch.nn.ReLU() # 激活函数
def forward(self, data, device):
graph_data = data["graph"].to(device)[0] # [N, N]
flow_x = data["flow_x"].to(device) # [B, N, H, D] # B是batch size,N是节点数,H是历史数据长度,D是特征维度
B, N = flow_x.size(0), flow_x.size(1)
flow_x = flow_x.view(B, N, -1) # [B, N, H*D] H = 6, D = 1把最后两维缩减到一起了,这个就是把历史时间的特征放一起
output_1 = self.rule(self.conv1(flow_x, graph_data))
output_2 = self.rule(self.conv2(output_1, graph_data))
return output_2.unsqueeze(3)
import torch.nn.functional as F
class GraphAttentionLayer(torch.nn.Module):
def __init__(self, input_dim, output_dim):
super(GraphAttentionLayer, self).__init__()
self.in_c = input_dim
self.out_c = output_dim
self.F = F.softmax
self.W = nn.Linear(input_dim, output_dim, bias=False) # y = W * x
self.b = nn.Parameter(torch.Tensor(output_dim))
nn.init.normal_(self.W.weight)
#print(self.W.weight.size())
nn.init.normal_(self.b)
def forward(self, inputs, graph):
"""
:param inputs: input features, [B, N, D].
:param graph: graph structure, [N, N].
:return:
output features, [B, N, D].
"""
h = self.W(inputs) # [B, N, D],一个线性层,就是第一步中公式的 W*h
# 下面这个就是,第i个节点和第j个节点之间的特征做了一个内积,表示它们特征之间的关联强度
# 再用graph也就是邻接矩阵相乘,因为邻接矩阵用0-1表示,0就表示两个节点之间没有边相连
# 那么最终结果中的0就表示节点之间没有边相连
outputs = torch.bmm(h, h.transpose(1, 2)) * graph.unsqueeze(0) # [B, N, D]*[B, D, N]->[B, N, N], x(i)^T * x(j)
# 由于上面计算的结果中0表示节点之间没关系,所以将这些0换成负无穷大,因为softmax的负无穷大=0
outputs.data.masked_fill_(torch.eq(outputs, 0), -float(1e16))
attention = self.F(outputs, dim=2) # [B, N, N],在第2维做归一化,就是说所有有边相连的节点做一个归一化,得到了注意力系数
result = torch.bmm(attention, h) + self.b
return result # [B, N, N] * [B, N, D],,这个是第三步的,利用注意力系数对邻域节点进行有区别的信息聚合
class GATSubNet(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, n_heads):
super(GATSubNet, self).__init__()
self.attention_module = nn.ModuleList([GraphAttentionLayer(input_dim, hidden_dim,) for _ in range(n_heads)]) # in_c为输入特征维度,hid_c为隐藏层特征维度
# 上面的多头注意力都得到了不一样的结果,使用注意力层给聚合起来
self.out_att = GraphAttentionLayer(hidden_dim * n_heads, output_dim)
self.act = nn.LeakyReLU()
def forward(self, inputs, graph):
"""
:param inputs: [B, N, C]
:param graph: [N, N]
:return:
"""
# 每一个注意力头用循环取出来,放入list里,然后在最后一维串联起来
outputs = torch.cat([attn(inputs, graph) for attn in self.attention_module], dim=-1) # [B, N, hid_c * h_head]
outputs = self.act(outputs)
outputs = self.out_att(outputs, graph)
return self.act(outputs)
class GATNet(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, n_heads):
super(GATNet, self).__init__()
self.subnet = GATSubNet(input_dim, hidden_dim, output_dim, n_heads)
def forward(self, data, device):
graph = data["graph"][0].to(device) # [N, N]
flow = data["flow_x"] # [B, N, T, C]
flow = flow.to(device) # 将流量数据送入设备
B, N = flow.size(0), flow.size(1)
flow = flow.view(B, N, -1) # [B, N, T * C]
"""
上面是将这一段的时间的特征数据摊平做为特征,这种做法实际上忽略了时序上的连续性
这种做法可行,但是比较粗糙,当然也可以这么做:
flow[:, :, 0] ... flow[:, :, T-1] 则就有T个[B, N, C]这样的张量,也就是 [B, N, C]*T
每一个张量都用一个SubNet来表示,则一共有T个SubNet,初始化定义 self.subnet = [GATSubNet(...) for _ in range(T)]
然后用nn.ModuleList将SubNet分别拎出来处理,参考多头注意力的处理,同理
"""
prediction = self.subnet(flow, graph).unsqueeze(2) # [B, N, 1, C],这个1加上就表示预测的是未来一个时刻
return prediction
class GATNet2(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, n_heads, T):
super(GATNet2, self).__init__()
self.gatnet = nn.ModuleList([GATSubNet(input_dim, hidden_dim, output_dim, n_heads) for _ in range(T)])
self.out = GraphAttentionLayer(output_dim * T, output_dim)
def forward(self, data, device):
flow = data["flow_x"].to(device)
graph = data["graph"][0].to(device)
T = flow.size(2)
output = []
for i in range(T):
f = flow[:, :, i, :]
output.append(self.gatnet[i](f, graph))
# hidden_put = torch.cat((hidden_put,self.gatnet[i](f, graph)), dim=2)
# output.append(gat(f, graph) for gat in self.gat) torch.cat([gat(f, graph) for gat in self.gat], dim =2)
# print(output)
hidden_put = torch.cat([i for i in output], dim=2)
# print(hidden_put)
# print(hidden_put.size())
out = self.out(hidden_put, graph)
out = out.unsqueeze(2)
# print(out.size())
return out
import torch.optim as optim
import time
if __name__ == '__main__': # 测试模型是否合适
x = torch.randn(2, 6, 4, 4) # [B, N, T, C]
graph = torch.randn(2, 6, 6) # [N, N]
end_index = 3
data_y = x[:,:, end_index].unsqueeze(2)
print(data_y[0])
data = {"flow_x": x, "graph": graph, "flow_y":data_y}
print(data_y.size())
if torch.cuda.is_available():
device = torch.device("cuda:0")
print("running on the GPU")
else:
device = torch.device("cpu")
print("running on the CPU")
net = GCN(input_dim=4 * 1, hidden_dim=2, output_dim=1)
# net = GATNet2(input_dim=4, hidden_dim=2, output_dim=4, n_heads = 2,T = 4)
print(net)
net = net.to(device)
# y = net(dataset, device)
# print(y.size())
# loss and optimizer
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(params=net.parameters())
# train
Epoch = 10
loss_train_plt = []
net.train()
for epoch in range(Epoch):
epoch_loss = 0.0
start_time = time.time()
net.zero_grad()
predict = net(data, device).to(torch.device("cpu"))
loss = criterion(predict, data["flow_y"])
epoch_loss = loss.item()
loss.backward()
optimizer.step() # 更新参数
end_time = time.time()
loss_train_plt.append(10 * epoch_loss / len(data) / 64)
print("Epoch: {:04d}, Loss: {:02.4f}, TIme: {:02.2f} mins".format(epoch, 1000 * epoch_loss / len(data),
(end_time - start_time) / 60))
# train_data = LoadData(data_path=["PeMS_04/PeMS04.csv", "PeMS_04/PeMS04.npz"], num_nodes=307, divide_days=[45, 14],
# time_interval=5, history_length=6,
# train_mode="train")
#
# train_loader = DataLoader(train_data, batch_size=64, shuffle=False, num_workers=8)
# if torch.cuda.is_available():
# device = torch.device("cuda:0")
# print("running on the GPU")
# else:
# device = torch.device("cpu")
# print("running on the CPU")
#
# net = GATNet(input_dim=6 * 2, hidden_dim=6, output_dim=2, n_heads=2)
# net = net.to(device)
#
# for data in train_loader:
# print(data.keys())
# print(data['graph'].size())
# print(data['flow_x'].size())
# print(data['flow_y'].size())
# y = net(data, device)
# print(y.size())
# #%%
# import numpy as np
# file = 'PeMS_04/PeMS04.npz'
# p = np.load(file, allow_pickle = True)
# print(p.files)
# print(len(p['data'][0]))
# if __name__ == '__main__': # 测试模型是否合适
# x = torch.randn(32, 278, 6, 2) # [B, N, T, C]
# graph = torch.randn(32, 278, 278) # [N, N]
# data = {"flow_x": x, "graph": graph}
#
# device = torch.device("cpu")
# # tensor = torch.to(device)
#
# net = GATNet(input_dim=6 * 2, hidden_dim=6, output_dim=2, n_heads=2)
#
# y = net(data, device)
# print(y.size())