-
Notifications
You must be signed in to change notification settings - Fork 0
/
Attention_based_CLV_train_test.py
77 lines (58 loc) · 3.29 KB
/
Attention_based_CLV_train_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def main():
configure_gpu()
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
file_paths = {
"test": '/home/tale2@ad.umbc.edu/Dataset/wadi/test.csv',
"train": '/home/tale2@ad.umbc.edu/Dataset/wadi/train.csv',
"label": '/home/tale2@ad.umbc.edu/Dataset/wadi/label.csv'
}
train = load_data(file_paths['train'])
clustered_dataframes, cluster_labels = cluster_data(train, N_CLUSTERS)
x = [create_sequences(clustered_dataframes[i].values, T) for i in range(N_CLUSTERS)]
# Verify the shape of each sequence
for i, seq in enumerate(x):
print(f"Cluster {i}: Sequence shape {seq.shape}")
assert seq.ndim == 3, f"Sequence for cluster {i} does not have 3 dimensions"
auto = vae_model(x)
total_features = sum([df.shape[2] for df in x])
output_data = np.concatenate([df.reshape(df.shape[0], df.shape[1], df.shape[2]) for df in x], axis=-1)
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=10, verbose=1, mode='auto', restore_best_weights=True)
history = auto.fit(x, output_data, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[monitor], shuffle=False).history
train_pred = auto.predict(x)
keys = list(clustered_dataframes.keys())
actual_train = clustered_dataframes[keys[0]]
for key in keys[1:]:
actual_train = actual_train.join(clustered_dataframes[key], how='outer')
# Training Data Processing and MSE Calculation
#train_pred = sequences_to_original(train_pred, T)
#actual_train = clustered_dataframes[0].join(clustered_dataframes[1:])
#actual_train2 = actual_train.values
#train_mse = mean_squared_error(actual_train2, train_pred)
#print(f"Mean Squared Error (Train): {train_mse}")
# Test Data Processing and MSE Calculation
test_data = load_data(file_paths['test']) # Load the test data
X_test = process_test_data(test_data, clustered_dataframes, T)
#X_test, test_data = process_data(file_paths['test'], cluster_labels, T)
test_pred = auto.predict(X_test)
test_pred = sequences_to_original(test_pred, T)
actual_test = test_data[actual_train.columns].values
#actual_test = test_data.values
# Ensure the test_pred and actual_test have the same shape before calculating MSE
if test_pred.shape != actual_test.shape:
min_shape = min(test_pred.shape[0], actual_test.shape[0])
test_pred = test_pred[:min_shape, :]
actual_test = actual_test[:min_shape, :]
test_mse = mean_squared_error(actual_test, test_pred)
print(f"Mean Squared Error (Test): {test_mse}")
print(f"test_pred: {test_pred.shape}")
print(f"actual_test: {actual_test.shape}")
# Anomaly Detection
scored = np.mean(np.abs(test_pred - actual_test), axis=1)
print(f"scored: {scored.shape}")
predicted_labels = dynamic_thresholding(scored)
print(f"predicted_labels: {predicted_labels.shape}")
true_labels = load_data(file_paths['label']).values
evaluate_model(true_labels, predicted_labels)
if __name__ == "__main__":
main()