Source code for cleverhans.attacks.deep_fool

"""The DeepFool attack

"""
import copy
import logging
import warnings

import numpy as np
import tensorflow as tf

from cleverhans.attacks.attack import Attack
from cleverhans.model import Model, wrapper_warning_logits, CallableModelWrapper
from cleverhans import utils
from cleverhans import utils_tf

np_dtype = np.dtype('float32')

_logger = utils.create_logger("cleverhans.attacks.deep_fool")
_logger.setLevel(logging.INFO)

[docs]class DeepFool(Attack): """ DeepFool is an untargeted & iterative attack which is based on an iterative linearization of the classifier. The implementation here is w.r.t. the L2 norm. Paper link: "https://arxiv.org/pdf/1511.04599.pdf" :param model: cleverhans.model.Model :param sess: tf.Session :param dtypestr: dtype of the data :param kwargs: passed through to super constructor """ def __init__(self, model, sess, dtypestr='float32', **kwargs): """ Create a DeepFool instance. """ if not isinstance(model, Model): wrapper_warning_logits() model = CallableModelWrapper(model, 'logits') super(DeepFool, self).__init__(model, sess, dtypestr, **kwargs) self.structural_kwargs = [ 'overshoot', 'max_iter', 'clip_max', 'clip_min', 'nb_candidate' ]
[docs] def generate(self, x, **kwargs): """ Generate symbolic graph for adversarial examples and return. :param x: The model's symbolic inputs. :param kwargs: See `parse_params` """ assert self.sess is not None, \ 'Cannot use `generate` when no `sess` was provided' from cleverhans.utils_tf import jacobian_graph # Parse and save attack-specific parameters assert self.parse_params(**kwargs) # Define graph wrt to this input placeholder logits = self.model.get_logits(x) self.nb_classes = logits.get_shape().as_list()[-1] assert self.nb_candidate <= self.nb_classes, \ 'nb_candidate should not be greater than nb_classes' preds = tf.reshape( tf.nn.top_k(logits, k=self.nb_candidate)[0], [-1, self.nb_candidate]) # grads will be the shape [batch_size, nb_candidate, image_size] grads = tf.stack(jacobian_graph(preds, x, self.nb_candidate), axis=1) # Define graph def deepfool_wrap(x_val): """deepfool function for py_func""" return deepfool_batch(self.sess, x, preds, logits, grads, x_val, self.nb_candidate, self.overshoot, self.max_iter, self.clip_min, self.clip_max, self.nb_classes) wrap = tf.py_func(deepfool_wrap, [x], self.tf_dtype) wrap.set_shape(x.get_shape()) return wrap
[docs] def parse_params(self, nb_candidate=10, overshoot=0.02, max_iter=50, clip_min=0., clip_max=1., **kwargs): """ :param nb_candidate: The number of classes to test against, i.e., deepfool only consider nb_candidate classes when attacking(thus accelerate speed). The nb_candidate classes are chosen according to the prediction confidence during implementation. :param overshoot: A termination criterion to prevent vanishing updates :param max_iter: Maximum number of iteration for deepfool :param clip_min: Minimum component value for clipping :param clip_max: Maximum component value for clipping """ self.nb_candidate = nb_candidate self.overshoot = overshoot self.max_iter = max_iter self.clip_min = clip_min self.clip_max = clip_max if len(kwargs.keys()) > 0: warnings.warn("kwargs is unused and will be removed on or after " "2019-04-26.") return True
def deepfool_batch(sess, x, pred, logits, grads, X, nb_candidate, overshoot, max_iter, clip_min, clip_max, nb_classes, feed=None): """ Applies DeepFool to a batch of inputs :param sess: TF session :param x: The input placeholder :param pred: The model's sorted symbolic output of logits, only the top nb_candidate classes are contained :param logits: The model's unnormalized output tensor (the input to the softmax layer) :param grads: Symbolic gradients of the top nb_candidate classes, procuded from gradient_graph :param X: Numpy array with sample inputs :param nb_candidate: The number of classes to test against, i.e., deepfool only consider nb_candidate classes when attacking(thus accelerate speed). The nb_candidate classes are chosen according to the prediction confidence during implementation. :param overshoot: A termination criterion to prevent vanishing updates :param max_iter: Maximum number of iteration for DeepFool :param clip_min: Minimum value for components of the example returned :param clip_max: Maximum value for components of the example returned :param nb_classes: Number of model output classes :return: Adversarial examples """ X_adv = deepfool_attack( sess, x, pred, logits, grads, X, nb_candidate, overshoot, max_iter, clip_min, clip_max, feed=feed) return np.asarray(X_adv, dtype=np_dtype) def deepfool_attack(sess, x, predictions, logits, grads, sample, nb_candidate, overshoot, max_iter, clip_min, clip_max, feed=None): """ TensorFlow implementation of DeepFool. Paper link: see https://arxiv.org/pdf/1511.04599.pdf :param sess: TF session :param x: The input placeholder :param predictions: The model's sorted symbolic output of logits, only the top nb_candidate classes are contained :param logits: The model's unnormalized output tensor (the input to the softmax layer) :param grads: Symbolic gradients of the top nb_candidate classes, procuded from gradient_graph :param sample: Numpy array with sample input :param nb_candidate: The number of classes to test against, i.e., deepfool only consider nb_candidate classes when attacking(thus accelerate speed). The nb_candidate classes are chosen according to the prediction confidence during implementation. :param overshoot: A termination criterion to prevent vanishing updates :param max_iter: Maximum number of iteration for DeepFool :param clip_min: Minimum value for components of the example returned :param clip_max: Maximum value for components of the example returned :return: Adversarial examples """ adv_x = copy.copy(sample) # Initialize the loop variables iteration = 0 current = utils_tf.model_argmax(sess, x, logits, adv_x, feed=feed) if current.shape == (): current = np.array([current]) w = np.squeeze(np.zeros(sample.shape[1:])) # same shape as original image r_tot = np.zeros(sample.shape) original = current # use original label as the reference _logger.debug( "Starting DeepFool attack up to %s iterations", max_iter) # Repeat this main loop until we have achieved misclassification while (np.any(current == original) and iteration < max_iter): if iteration % 5 == 0 and iteration > 0: _logger.info("Attack result at iteration %s is %s", iteration, current) gradients = sess.run(grads, feed_dict={x: adv_x}) predictions_val = sess.run(predictions, feed_dict={x: adv_x}) for idx in range(sample.shape[0]): pert = np.inf if current[idx] != original[idx]: continue for k in range(1, nb_candidate): w_k = gradients[idx, k, ...] - gradients[idx, 0, ...] f_k = predictions_val[idx, k] - predictions_val[idx, 0] # adding value 0.00001 to prevent f_k = 0 pert_k = (abs(f_k) + 0.00001) / np.linalg.norm(w_k.flatten()) if pert_k < pert: pert = pert_k w = w_k r_i = pert * w / np.linalg.norm(w) r_tot[idx, ...] = r_tot[idx, ...] + r_i adv_x = np.clip(r_tot + sample, clip_min, clip_max) current = utils_tf.model_argmax(sess, x, logits, adv_x, feed=feed) if current.shape == (): current = np.array([current]) # Update loop variables iteration = iteration + 1 # need more revision, including info like how many succeed _logger.info("Attack result at iteration %s is %s", iteration, current) _logger.info("%s out of %s become adversarial examples at iteration %s", sum(current != original), sample.shape[0], iteration) # need to clip this image into the given range adv_x = np.clip((1 + overshoot) * r_tot + sample, clip_min, clip_max) return adv_x