-
Notifications
You must be signed in to change notification settings - Fork 0
/
Perturb-RBF.py
113 lines (76 loc) · 3.41 KB
/
Perturb-RBF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import rbf_kernel
def perturb_with_radial_kernel(x, kernel_width, scale=1):
"""Perturb the given example using a radial kernel function.
Args:
x (numpy.ndarray or pandas.DataFrame or pandas.Series): The example to perturb.
kernel_width (float): The width of the radial kernel.
Returns:
numpy.ndarray: The perturbed example.
"""
# Convert to NumPy array if necessary
if isinstance(x, pd.DataFrame) or isinstance(x, pd.Series):
x = x.to_numpy()
# Generate random noise
noise = np.random.normal(0, scale, size=x.shape)
# # Compute radial kernel
distances = np.linalg.norm(x - x.reshape(-1, 1), axis=0)
print(distances)
kernel = np.exp(-((distances ** 2) / (2 * (kernel_width ** 2))))
print(kernel)
# Compute perturbed example
perturbed_x = x + (noise * kernel)
return perturbed_x
# def perturb_with_radial_kernel(x, kernel_width, scale=1):
# """Perturb the given example using a radial kernel function.
# Args:
# x (numpy.ndarray or pandas.DataFrame or pandas.Series): The example to perturb.
# kernel_width (float): The width of the radial kernel.
# Returns:
# numpy.ndarray: The perturbed example.
# """
# # Convert to NumPy array if necessary
# if isinstance(x, pd.DataFrame) or isinstance(x, pd.Series):
# x = x.to_numpy()
# # Generate random noise
# noise = np.random.normal(0, scale, size=x.shape)
# print(x)
# # # Compute radial kernel
# distances = np.linalg.norm(x - x.reshape(-1, 1), axis=0)
# print(distances)
# kernel = np.exp(-((distances ** 2) / (2 * (kernel_width ** 2))))
# print(kernel)
# # Compute perturbed example
# perturbed_x = x + (noise * kernel)
# return perturbed_x
def generate_perturbations(example, num_perturbations, kernel_width, scale=1):
"""Generate multiple perturbed examples from the given example using a radial kernel function.
Args:
example (pandas.DataFrame): The example to perturb.
num_perturbations (int): The number of perturbed examples to generate.
kernel_width (float): The width of the radial kernel.
Returns:
pandas.DataFrame: A DataFrame containing the perturbed examples.
"""
category_features = example.select_dtypes(include=['category'])
# One-hot encode categorical features
example = pd.get_dummies(example)
# print(example)
# Generate perturbed examples
perturbed_examples = []
for i in range(num_perturbations):
perturbed_x = perturb_with_radial_kernel(
example.values, kernel_width, scale)
# Convert perturbed array to DataFrame
perturbed_df = pd.DataFrame(perturbed_x, columns=example.columns)
# Reverse one-hot encoding
for columName in category_features:
hotstuff = [col for col in perturbed_df if col.startswith(columName+"_ ")]
# print(perturbed_df[hotstuff])
# perturbed_df[columName] = perturbed_df[hotstuff].idxmax(
# axis=1)
# perturbed_df[columName] = perturbed_df[columName].str.removeprefix(columName+"_ ")
# perturbed_df = perturbed_df.drop(hotstuff, axis=1)
perturbed_examples.append(perturbed_df)
return pd.concat(perturbed_examples)