Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kenansville DFT attack #916

Merged
merged 5 commits into from
Nov 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions armory/art_experimental/attacks/kenansville_dft.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import numpy as np


class KenansvilleDFT:
def __init__(
self,
estimator,
sample_rate=16000,
snr_db=100,
partial_attack=False,
attack_len=500,
attack_prob=0.5,
targeted=False,
kmerchant31892 marked this conversation as resolved.
Show resolved Hide resolved
):
"""
This DFT attack is a variant of the one described in https://arxiv.org/abs/1910.05262.
In the paper, the attack assumed to have knowledge of word or phoneme locations
in the input. The attack implemented here assumes complete blackbox knowledge,
so the only options are: 1) modified the whole input or 2) modified subsequences
of the input with some probability.

param sample_rate: sample rate in Hz of inputs
param estimator: not used but necessary for interoperability with Armory/ART
param snr_db: the minimum SNR (in dB) to maintain
type snr_db: 'float'
param partial_attack: boolean to indicate if subsequences of the input are to be modified
param attack_len: length of subsequences to attack. Valid when partial_attack = True
param attack_prob: probability each subsequence will be attacked. Valid when partial_attack = True
param targeted: not used but necessary for interoperability with Armory
"""
self.sample_rate = sample_rate
self.snr_db = snr_db
self.targeted = targeted
self.partial_attack = partial_attack
self.attack_len = attack_len
self.attack_prob = attack_prob

if targeted:
raise Warning("'targeted' argument is not used in Kenansville attack")

if snr_db < 0:
raise ValueError("Negative SNR is not allowed")

def _attack(self, x):
x_len = len(x)
x_fft = np.fft.fft(x)
x_psd = np.abs(x_fft) ** 2
# sort by frequencies with increasing power
x_psd_ind = np.argsort(x_psd)
dc_ind = np.where(x_psd_ind == 0)[0][0]
signal_db = 10 * np.log10(np.sum(x_psd))

"""
The goal of the following search is to find all the
low power frequencies that can be discarded while
maintaining a minimum SNR.

If desired, the following coarse and fine search could
be replaced with binary search for faster convergence.
"""
# coarse search
id = 2
noise = np.sum(x_psd[x_psd_ind[:id]])
noise_db = 10 * np.log10(noise)
while signal_db - noise_db > self.snr_db:
kmerchant31892 marked this conversation as resolved.
Show resolved Hide resolved
id *= 2
noise = np.sum(x_psd[x_psd_ind[: min(id, x_len)]])
noise_db = 10 * np.log10(noise)

if id == 2:
return x

# fine search
id = int(id / 2)
noise = np.sum(x_psd[x_psd_ind[:id]])
noise_db = 10 * np.log10(noise)
while signal_db - noise_db > self.snr_db:
kmerchant31892 marked this conversation as resolved.
Show resolved Hide resolved
id += 2
noise = np.sum(x_psd[x_psd_ind[: min(id, x_len)]])
noise_db = 10 * np.log10(noise)

id -= 2

# make sure the only non-paired frequencies are DC and, if x is even, x_len/2
if (dc_ind in x_psd_ind[:id]) ^ (
x_len % 2 == 0 and x_len / 2 in x_psd_ind[:id]
):
id -= 1

# zero out low power frequencies
x_fft[x_psd_ind[:id]] = 0

x_ifft = np.fft.ifft(x_fft)

return np.real(x_ifft).astype(np.float32)

def generate(self, x):
x_out = np.empty((len(x),), dtype=object)
for i, x_example in enumerate(x):
if self.partial_attack:
# split input into multiple segments and attack each with some probability
x_adv = np.zeros_like(x_example)
seg_len = self.attack_len
for j in range(int(np.ceil(len(x_example) / seg_len))):
xs = x_example[seg_len * j : min((j + 1) * seg_len, len(x_example))]
if np.random.rand(1) < self.attack_prob:
xs = self._attack(xs)
x_adv[seg_len * j : min((j + 1) * seg_len, len(x_example))] = xs
else:
x_adv = self._attack(x_example)
x_out[i] = x_adv

return x_out
31 changes: 0 additions & 31 deletions armory/art_experimental/defences/mp3_compression_channelized.py

This file was deleted.

64 changes: 64 additions & 0 deletions scenario_configs/asr_deepspeech_baseline_kenansville.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{
"_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
"adhoc": {
"skip_adversarial": false
},
"attack": {
"knowledge": "white",
"kwargs": {
"partial_attack": false,
"snr_db": 30,
"targeted": false
},
"module": "armory.art_experimental.attacks.kenansville_dft",
"name": "KenansvilleDFT",
"use_label": false
},
"dataset": {
"batch_size": 8,
"eval_split": "test_clean",
"framework": "numpy",
"module": "armory.data.datasets",
"name": "librispeech",
"train_split": "train_clean100"
},
"defense": null,
"metric": {
"means": false,
"perturbation": "snr_db",
"record_metric_per_sample": true,
"task": [
"word_error_rate"
]
},
"model": {
"fit": false,
"fit_kwargs": {
"nb_epochs": 20000
},
"model_kwargs": {},
"module": "armory.baseline_models.pytorch.deep_speech",
"name": "get_art_model",
"predict_kwargs": {
"transcription_output": true
},
"weights_file": null,
"wrapper_kwargs": {
"pretrained_model": "librispeech"
}
},
"scenario": {
"kwargs": {},
"module": "armory.scenarios.audio_asr",
"name": "AutomaticSpeechRecognition"
},
"sysconfig": {
"docker_image": "twosixarmory/pytorch-deepspeech:0.13.0-dev",
"external_github_repo": "hkakitani/deepspeech.pytorch",
"gpus": "all",
"local_repo_path": null,
"output_dir": null,
"output_filename": null,
"use_gpu": false
}
}
75 changes: 75 additions & 0 deletions scenario_configs/asr_deepspeech_defended_baseline_kenansville.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{
"_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
"adhoc": {
"skip_adversarial": false
},
"attack": {
"knowledge": "white",
"kwargs": {
"partial_attack": false,
"snr_db": 30,
"targeted": false
},
"module": "armory.art_experimental.attacks.kenansville_dft",
"name": "KenansvilleDFT",
"use_label": false
},
"dataset": {
"batch_size": 8,
"eval_split": "test_clean",
"framework": "numpy",
"module": "armory.data.datasets",
"name": "librispeech",
"train_split": "train_clean100"
},
"defense": {
"kwargs": {
"apply_fit": false,
"apply_predict": true,
"channels_first": false,
"sample_rate": 16000,
"verbose": false
},
"module": "art.defences.preprocessor",
"name": "Mp3Compression",
"type": "Preprocessor"
},
"metric": {
"means": false,
"perturbation": "snr_db",
"record_metric_per_sample": true,
"task": [
"word_error_rate"
]
},
"model": {
"fit": false,
"fit_kwargs": {
"nb_epochs": 20000
},
"model_kwargs": {},
"module": "armory.baseline_models.pytorch.deep_speech",
"name": "get_art_model",
"predict_kwargs": {
"transcription_output": true
},
"weights_file": null,
"wrapper_kwargs": {
"pretrained_model": "librispeech"
}
},
"scenario": {
"kwargs": {},
"module": "armory.scenarios.audio_asr",
"name": "AutomaticSpeechRecognition"
},
"sysconfig": {
"docker_image": "twosixarmory/pytorch-deepspeech:0.13.0-dev",
"external_github_repo": "hkakitani/deepspeech.pytorch",
"gpus": "all",
"local_repo_path": null,
"output_dir": null,
"output_filename": null,
"use_gpu": false
}
}