twosixlabs · kmerchant31892 · Nov 19, 2020 · Nov 18, 2020 · Nov 18, 2020 · Nov 18, 2020
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -69,6 +69,10 @@ jobs:
           docker push twosixarmory/tf1:${version}
           docker tag twosixarmory/tf1:${version} twosixarmory/tf1:latest
           docker push twosixarmory/tf1:latest
+          bash docker/build-poison.sh tf1-poison
+          docker push twosixarmory/tf1-poison:${version}
+          docker tag twosixarmory/tf1-poison:${version} twosixarmory/tf1-poison:latest
+          docker push twosixarmory/tf1-poison:latest
   release-tf2-docker:
     name: Build and release tf2 docker image
     needs: [release-wheel, release-armory-docker]
@@ -92,6 +96,10 @@ jobs:
           docker push twosixarmory/tf2:${version}
           docker tag twosixarmory/tf2:${version} twosixarmory/tf2:latest
           docker push twosixarmory/tf2:latest
+          bash docker/build-poison.sh tf2-poison
+          docker push twosixarmory/tf2-poison:${version}
+          docker tag twosixarmory/tf2-poison:${version} twosixarmory/tf2-poison:latest
+          docker push twosixarmory/tf2-poison:latest
   release-pytorch-docker:
     name: Build and release pytorch and pytorch-deepspeech docker images
     needs: [release-wheel, release-armory-docker]
@@ -118,3 +126,7 @@ jobs:
           docker push twosixarmory/pytorch-deepspeech:${version}
           docker tag twosixarmory/pytorch-deepspeech:${version} twosixarmory/pytorch-deepspeech:latest
           docker push twosixarmory/pytorch-deepspeech:latest
+          bash docker/build-poison.sh pytorch-poison
+          docker push twosixarmory/pytorch-poison:${version}
+          docker tag twosixarmory/pytorch-poison:${version} twosixarmory/pytorch-poison:latest
+          docker push twosixarmory/pytorch-poison:latest
diff --git a/armory/art_experimental/attacks/poison_loader.py b/armory/art_experimental/attacks/poison_loader.py
@@ -11,12 +11,12 @@ def poison_loader_GTSRB(**kwargs):
     if poison_type == "pattern":
 
         def mod(x):
-            return perturbations.add_pattern_bd(x, pixel_value=255)
+            return perturbations.add_pattern_bd(x, pixel_value=1)
 
     elif poison_type == "pixel":
 
         def mod(x):
-            return perturbations.add_single_bd(x, pixel_value=255)
+            return perturbations.add_single_bd(x, pixel_value=1)
 
     elif poison_type == "image":
         backdoor_path = kwargs.get("backdoor_path")

diff --git a/armory/art_experimental/attacks/poison_loader_clbd.py b/armory/art_experimental/attacks/poison_loader_clbd.py
@@ -0,0 +1,24 @@
+"""
+This module enables loading of CLBD attack from a json config
+"""
+
+
+from art.attacks.poisoning import PoisoningAttackCleanLabelBackdoor
+from art.utils import to_categorical
+
+from armory.art_experimental.attacks.poison_loader import poison_loader_GTSRB
+
+
+def poison_loader_clbd(**kwargs):
+    backdoor_kwargs = kwargs.pop("backdoor_kwargs")
+    backdoor = poison_loader_GTSRB(**backdoor_kwargs)
+
+    # Targets is a one-hot numpy array -- need to map from sparse representation
+    target = kwargs.pop("target")
+    n_classes = kwargs.pop("n_classes")
+    targets = to_categorical([target], n_classes)[0]
+
+    return (
+        PoisoningAttackCleanLabelBackdoor(backdoor=backdoor, target=targets, **kwargs),
+        backdoor,
+    )
diff --git a/armory/scenarios/poisoning_gtsrb_clbd.py b/armory/scenarios/poisoning_gtsrb_clbd.py
@@ -0,0 +1,307 @@
+"""
+Classifier evaluation within ARMORY
+
+Scenario Contributor: MITRE Corporation
+"""
+
+import logging
+from typing import Optional
+import os
+import random
+from copy import deepcopy
+
+import numpy as np
+from tensorflow import set_random_seed, ConfigProto, Session
+from tensorflow.keras.backend import set_session
+from tensorflow.keras.utils import to_categorical
+from tqdm import tqdm
+from PIL import ImageOps, Image
+
+from art.defences.trainer import AdversarialTrainerMadryPGD
+
+from armory.utils.config_loading import (
+    load_dataset,
+    load_model,
+    load,
+    load_fn,
+)
+from armory.utils import metrics
+from armory.scenarios.base import Scenario
+
+logger = logging.getLogger(__name__)
+
+
+def poison_scenario_preprocessing(batch):
+    img_size = 48
+    img_out = []
+    quantization = 255.0
+    for im in batch:
+        img_eq = ImageOps.equalize(Image.fromarray(im))
+        width, height = img_eq.size
+        min_side = min(img_eq.size)
+        center = width // 2, height // 2
+
+        left = center[0] - min_side // 2
+        top = center[1] - min_side // 2
+        right = center[0] + min_side // 2
+        bottom = center[1] + min_side // 2
+
+        img_eq = img_eq.crop((left, top, right, bottom))
+        img_eq = np.array(img_eq.resize([img_size, img_size])) / quantization
+
+        img_out.append(img_eq)
+
+    return np.array(img_out, dtype=np.float32)
+
+
+def poison_dataset(src_imgs, src_lbls, src, tgt, ds_size, attack, poisoned_indices):
+    # In this example, all images of "src" class have a trigger
+    # added and re-labeled as "tgt" class
+    poison_x = []
+    poison_y = []
+    for idx in range(ds_size):
+        if src_lbls[idx] == src and idx in poisoned_indices:
+            src_img = src_imgs[idx]
+            p_img, p_label = attack.poison(src_img, [tgt])
+            poison_x.append(p_img)
+            poison_y.append(p_label)
+        else:
+            poison_x.append(src_imgs[idx])
+            poison_y.append(src_lbls[idx])
+    poison_x, poison_y = np.array(poison_x), np.array(poison_y)
+
+    return poison_x, poison_y
+
+
+class GTSRB_CLBD(Scenario):
+    def _evaluate(
+        self,
+        config: dict,
+        num_eval_batches: Optional[int],
+        skip_benign: Optional[bool],
+        skip_attack: Optional[bool],
+    ) -> dict:
+        """
+        Evaluate a config file for classification robustness against attack.
+
+        Note: num_eval_batches shouldn't be set for poisoning scenario and will raise an
+        error if it is
+        """
+        if config["sysconfig"].get("use_gpu"):
+            os.environ["TF_CUDNN_DETERMINISM"] = "1"
+        if num_eval_batches:
+            raise ValueError("num_eval_batches shouldn't be set for poisoning scenario")
+        if skip_benign:
+            raise ValueError("skip_benign shouldn't be set for poisoning scenario")
+        if skip_attack:
+            raise ValueError("skip_attack shouldn't be set for poisoning scenario")
+
+        model_config = config["model"]
+        # Scenario assumes canonical preprocessing_fn is used makes images all same size
+        classifier, _ = load_model(model_config)
+        proxy_classifier, _ = load_model(model_config)
+
+        config_adhoc = config.get("adhoc") or {}
+        train_epochs = config_adhoc["train_epochs"]
+        src_class = config_adhoc["source_class"]
+        tgt_class = config_adhoc["target_class"]
+        fit_batch_size = config_adhoc.get(
+            "fit_batch_size", config["dataset"]["batch_size"]
+        )
+
+        if not config["sysconfig"].get("use_gpu"):
+            conf = ConfigProto(intra_op_parallelism_threads=1)
+            set_session(Session(config=conf))
+
+        # Set random seed due to large variance in attack and defense success
+        np.random.seed(config_adhoc["split_id"])
+        set_random_seed(config_adhoc["split_id"])
+        random.seed(config_adhoc["split_id"])
+        use_poison_filtering_defense = config_adhoc.get(
+            "use_poison_filtering_defense", True
+        )
+        if self.check_run:
+            # filtering defense requires more than a single batch to run properly
+            use_poison_filtering_defense = False
+
+        logger.info(f"Loading dataset {config['dataset']['name']}...")
+
+        clean_data = load_dataset(
+            config["dataset"],
+            epochs=1,
+            split=config["dataset"].get("train_split", "train"),
+            preprocessing_fn=poison_scenario_preprocessing,
+            shuffle_files=False,
+        )
+        # Flag for whether to poison dataset -- used to evaluate
+        #     performance of defense on clean data
+        poison_dataset_flag = config["adhoc"]["poison_dataset"]
+        # detect_poison does not currently support data generators
+        #     therefore, make in memory dataset
+        x_train_all, y_train_all = [], []
+
+        logger.info("Building in-memory dataset for poisoning detection and training")
+        for x_train, y_train in clean_data:
+            x_train_all.append(x_train)
+            y_train_all.append(y_train)
+        x_train_all = np.concatenate(x_train_all, axis=0)
+        y_train_all = np.concatenate(y_train_all, axis=0)
+
+        if poison_dataset_flag:
+            y_train_all_categorical = to_categorical(y_train_all)
+            attack_train_epochs = train_epochs
+            attack_config = deepcopy(config["attack"])
+            use_adversarial_trainer_flag = attack_config.get(
+                "use_adversarial_trainer", False
+            )
+
+            proxy_classifier_fit_kwargs = {
+                "batch_size": fit_batch_size,
+                "nb_epochs": attack_train_epochs,
+            }
+            logger.info("Fitting proxy classifier...")
+            if use_adversarial_trainer_flag:
+                logger.info("Using adversarial trainer...")
+                adversarial_trainer_kwargs = attack_config.pop(
+                    "adversarial_trainer_kwargs", {}
+                )
+                for k, v in proxy_classifier_fit_kwargs.items():
+                    adversarial_trainer_kwargs[k] = v
+                proxy_classifier = AdversarialTrainerMadryPGD(
+                    proxy_classifier, **adversarial_trainer_kwargs
+                )
+                proxy_classifier.fit(x_train_all, y_train_all)
+                attack_config["kwargs"][
+                    "proxy_classifier"
+                ] = proxy_classifier.get_classifier()
+            else:
+                proxy_classifier_fit_kwargs["verbose"] = False
+                proxy_classifier_fit_kwargs["shuffle"] = True
+                proxy_classifier.fit(
+                    x_train_all, y_train_all, **proxy_classifier_fit_kwargs
+                )
+                attack_config["kwargs"]["proxy_classifier"] = proxy_classifier
+
+            attack, backdoor = load(attack_config)
+
+            x_train_all, y_train_all_categorical = attack.poison(
+                x_train_all, y_train_all_categorical
+            )
+            y_train_all = np.argmax(y_train_all_categorical, axis=1)
+
+        if use_poison_filtering_defense:
+            y_train_defense = to_categorical(y_train_all)
+
+            defense_config = config["defense"]
+            detection_kwargs = config_adhoc.get("detection_kwargs", dict())
+
+            defense_model_config = config_adhoc.get("defense_model", model_config)
+
+            # Assumes classifier_for_defense and classifier use same preprocessing function
+            classifier_for_defense, _ = load_model(defense_model_config)
+            # ART/Armory API requires that classifier_for_defense trains inside defense_fn
+            defense_fn = load_fn(defense_config)
+            defense = defense_fn(classifier_for_defense, x_train_all, y_train_defense)
+
+            _, is_clean = defense.detect_poison(**detection_kwargs)
+            is_clean = np.array(is_clean)
+            logger.info(f"Total clean data points: {np.sum(is_clean)}")
+
+            logger.info("Filtering out detected poisoned samples")
+            indices_to_keep = is_clean == 1
+            x_train_final = x_train_all[indices_to_keep]
+            y_train_final = y_train_all[indices_to_keep]
+        else:
+            logger.info(
+                "Defense does not require filtering. Model fitting will use all data."
+            )
+            x_train_final = x_train_all
+            y_train_final = y_train_all
+        if len(x_train_final):
+            logger.info(
+                f"Fitting model of {model_config['module']}.{model_config['name']}..."
+            )
+            classifier.fit(
+                x_train_final,
+                y_train_final,
+                batch_size=fit_batch_size,
+                nb_epochs=train_epochs,
+                verbose=False,
+                shuffle=True,
+            )
+        else:
+            logger.warning("All data points filtered by defense. Skipping training")
+
+        logger.info("Validating on clean test data")
+        test_data = load_dataset(
+            config["dataset"],
+            epochs=1,
+            split=config["dataset"].get("eval_split", "test"),
+            preprocessing_fn=poison_scenario_preprocessing,
+            shuffle_files=False,
+        )
+        benign_validation_metric = metrics.MetricList("categorical_accuracy")
+        target_class_benign_metric = metrics.MetricList("categorical_accuracy")
+        for x, y in tqdm(test_data, desc="Testing"):
+            # Ensure that input sample isn't overwritten by classifier
+            x.flags.writeable = False
+            y_pred = classifier.predict(x)
+            benign_validation_metric.append(y, y_pred)
+            y_pred_tgt_class = y_pred[y == src_class]
+            if len(y_pred_tgt_class):
+                target_class_benign_metric.append(
+                    [src_class] * len(y_pred_tgt_class), y_pred_tgt_class
+                )
+        logger.info(
+            f"Unpoisoned validation accuracy: {benign_validation_metric.mean():.2%}"
+        )
+        logger.info(
+            f"Unpoisoned validation accuracy on targeted class: {target_class_benign_metric.mean():.2%}"
+        )
+        results = {
+            "benign_validation_accuracy": benign_validation_metric.mean(),
+            "benign_validation_accuracy_targeted_class": target_class_benign_metric.mean(),
+        }
+
+        poisoned_test_metric = metrics.MetricList("categorical_accuracy")
+        poisoned_targeted_test_metric = metrics.MetricList("categorical_accuracy")
+
+        if poison_dataset_flag:
+            logger.info("Testing on poisoned test data")
+            test_data = load_dataset(
+                config["dataset"],
+                epochs=1,
+                split=config["dataset"].get("eval_split", "test"),
+                preprocessing_fn=poison_scenario_preprocessing,
+                shuffle_files=False,
+            )
+            for x_test, y_test in tqdm(test_data, desc="Testing"):
+                src_indices = np.where(y_test == src_class)[0]
+                poisoned_indices = src_indices  # Poison entire class
+                x_test, _ = poison_dataset(
+                    x_test,
+                    y_test,
+                    src_class,
+                    tgt_class,
+                    len(y_test),
+                    backdoor,
+                    poisoned_indices,
+                )
+                y_pred = classifier.predict(x_test)
+                poisoned_test_metric.append(y_test, y_pred)
+
+                y_pred_targeted = y_pred[y_test == src_class]
+                if len(y_pred_targeted):
+                    poisoned_targeted_test_metric.append(
+                        [tgt_class] * len(y_pred_targeted), y_pred_targeted
+                    )
+            results["poisoned_test_accuracy"] = poisoned_test_metric.mean()
+            results[
+                "poisoned_targeted_misclassification_accuracy"
+            ] = poisoned_targeted_test_metric.mean()
+            logger.info(f"Test accuracy: {poisoned_test_metric.mean():.2%}")
+            logger.info(
+                f"Test targeted misclassification accuracy: {poisoned_targeted_test_metric.mean():.2%}"
+            )
+
+        return results