Source code for cleverhans.attacks.attack

"""
The Attack interface.
"""

from abc import ABCMeta
import collections
import warnings

import numpy as np
import tensorflow as tf

from cleverhans.compat import reduce_max
from cleverhans.model import Model
from cleverhans import utils

_logger = utils.create_logger("cleverhans.attacks.attack")


[docs]class Attack(object): """ Abstract base class for all attack classes. """ __metaclass__ = ABCMeta def __init__(self, model, sess=None, dtypestr='float32', **kwargs): """ :param model: An instance of the cleverhans.model.Model class. :param sess: The (possibly optional) tf.Session to run graphs in. :param dtypestr: Floating point precision to use (change to float64 to avoid numerical instabilities). :param back: (deprecated and will be removed on or after 2019-03-26). The backend to use. Currently 'tf' is the only option. """ if 'back' in kwargs: if kwargs['back'] == 'tf': warnings.warn("Argument back to attack constructors is not needed" " anymore and will be removed on or after 2019-03-26." " All attacks are implemented using TensorFlow.") else: raise ValueError("Backend argument must be 'tf' and is now deprecated" "It will be removed on or after 2019-03-26.") self.tf_dtype = tf.as_dtype(dtypestr) self.np_dtype = np.dtype(dtypestr) if sess is not None and not isinstance(sess, tf.Session): raise TypeError("sess is not an instance of tf.Session") from cleverhans import attacks_tf attacks_tf.np_dtype = self.np_dtype attacks_tf.tf_dtype = self.tf_dtype if not isinstance(model, Model): raise TypeError("The model argument should be an instance of" " the cleverhans.model.Model class.") # Prepare attributes self.model = model self.sess = sess self.dtypestr = dtypestr # We are going to keep track of old graphs and cache them. self.graphs = {} # When calling generate_np, arguments in the following set should be # fed into the graph, as they are not structural items that require # generating a new graph. # This dict should map names of arguments to the types they should # have. # (Usually, the target class will be a feedable keyword argument.) self.feedable_kwargs = tuple() # When calling generate_np, arguments in the following set should NOT # be fed into the graph, as they ARE structural items that require # generating a new graph. # This list should contain the names of the structural arguments. self.structural_kwargs = []
[docs] def generate(self, x, **kwargs): """ Generate the attack's symbolic graph for adversarial examples. This method should be overriden in any child class that implements an attack that is expressable symbolically. Otherwise, it will wrap the numerical implementation as a symbolic operator. :param x: The model's symbolic inputs. :param **kwargs: optional parameters used by child classes. Each child class defines additional parameters as needed. Child classes that use the following concepts should use the following names: clip_min: minimum feature value clip_max: maximum feature value eps: size of norm constraint on adversarial perturbation ord: order of norm constraint nb_iter: number of iterations eps_iter: size of norm constraint on iteration y_target: if specified, the attack is targeted. y: Do not specify if y_target is specified. If specified, the attack is untargeted, aims to make the output class not be y. If neither y_target nor y is specified, y is inferred by having the model classify the input. For other concepts, it's generally a good idea to read other classes and check for name consistency. :return: A symbolic representation of the adversarial examples. """ error = "Sub-classes must implement generate." raise NotImplementedError(error) # Include an unused return so pylint understands the method signature return x
[docs] def construct_graph(self, fixed, feedable, x_val, hash_key): """ Construct the graph required to run the attack through generate_np. :param fixed: Structural elements that require defining a new graph. :param feedable: Arguments that can be fed to the same graph when they take different values. :param x_val: symbolic adversarial example :param hash_key: the key used to store this graph in our cache """ # try our very best to create a TF placeholder for each of the # feedable keyword arguments, and check the types are one of # the allowed types class_name = str(self.__class__).split(".")[-1][:-2] _logger.info("Constructing new graph for attack " + class_name) # remove the None arguments, they are just left blank for k in list(feedable.keys()): if feedable[k] is None: del feedable[k] # process all of the rest and create placeholders for them new_kwargs = dict(x for x in fixed.items()) for name, value in feedable.items(): given_type = value.dtype if isinstance(value, np.ndarray): if value.ndim == 0: # This is pretty clearly not a batch of data new_kwargs[name] = tf.placeholder(given_type, shape=[], name=name) else: # Assume that this is a batch of data, make the first axis variable # in size new_shape = [None] + list(value.shape[1:]) new_kwargs[name] = tf.placeholder(given_type, new_shape, name=name) elif isinstance(value, utils.known_number_types): new_kwargs[name] = tf.placeholder(given_type, shape=[], name=name) else: raise ValueError("Could not identify type of argument " + name + ": " + str(value)) # x is a special placeholder we always want to have x_shape = [None] + list(x_val.shape)[1:] x = tf.placeholder(self.tf_dtype, shape=x_shape) # now we generate the graph that we want x_adv = self.generate(x, **new_kwargs) self.graphs[hash_key] = (x, new_kwargs, x_adv) if len(self.graphs) >= 10: warnings.warn("Calling generate_np() with multiple different " "structural parameters is inefficient and should" " be avoided. Calling generate() is preferred.")
[docs] def generate_np(self, x_val, **kwargs): """ Generate adversarial examples and return them as a NumPy array. Sub-classes *should not* implement this method unless they must perform special handling of arguments. :param x_val: A NumPy array with the original inputs. :param **kwargs: optional parameters used by child classes. :return: A NumPy array holding the adversarial examples. """ if self.sess is None: raise ValueError("Cannot use `generate_np` when no `sess` was" " provided") packed = self.construct_variables(kwargs) fixed, feedable, _, hash_key = packed if hash_key not in self.graphs: self.construct_graph(fixed, feedable, x_val, hash_key) else: # remove the None arguments, they are just left blank for k in list(feedable.keys()): if feedable[k] is None: del feedable[k] x, new_kwargs, x_adv = self.graphs[hash_key] feed_dict = {x: x_val} for name in feedable: feed_dict[new_kwargs[name]] = feedable[name] return self.sess.run(x_adv, feed_dict)
[docs] def construct_variables(self, kwargs): """ Construct the inputs to the attack graph to be used by generate_np. :param kwargs: Keyword arguments to generate_np. :return: Structural arguments Feedable arguments Output of `arg_type` describing feedable arguments A unique key """ if isinstance(self.feedable_kwargs, dict): warnings.warn("Using a dict for `feedable_kwargs is deprecated." "Switch to using a tuple." "It is not longer necessary to specify the types " "of the arguments---we build a different graph " "for each received type." "Using a dict may become an error on or after " "2019-04-18.") feedable_names = tuple(sorted(self.feedable_kwargs.keys())) else: feedable_names = self.feedable_kwargs if not isinstance(feedable_names, tuple): raise TypeError("Attack.feedable_kwargs should be a tuple, but " "for subclass " + str(type(self)) + " it is " + str(self.feedable_kwargs) + " of type " + str(type(self.feedable_kwargs))) # the set of arguments that are structural properties of the attack # if these arguments are different, we must construct a new graph fixed = dict( (k, v) for k, v in kwargs.items() if k in self.structural_kwargs) # the set of arguments that are passed as placeholders to the graph # on each call, and can change without constructing a new graph feedable = {k: v for k, v in kwargs.items() if k in feedable_names} for k in feedable: if isinstance(feedable[k], (float, int)): feedable[k] = np.array(feedable[k]) for key in kwargs: if key not in fixed and key not in feedable: raise ValueError(str(type(self)) + ": Undeclared argument: " + key) feed_arg_type = arg_type(feedable_names, feedable) if not all(isinstance(value, collections.Hashable) for value in fixed.values()): # we have received a fixed value that isn't hashable # this means we can't cache this graph for later use, # and it will have to be discarded later hash_key = None else: # create a unique key for this set of fixed paramaters hash_key = tuple(sorted(fixed.items())) + tuple([feed_arg_type]) return fixed, feedable, feed_arg_type, hash_key
[docs] def get_or_guess_labels(self, x, kwargs): """ Get the label to use in generating an adversarial example for x. The kwargs are fed directly from the kwargs of the attack. If 'y' is in kwargs, then assume it's an untargeted attack and use that as the label. If 'y_target' is in kwargs and is not none, then assume it's a targeted attack and use that as the label. Otherwise, use the model's prediction as the label and perform an untargeted attack. """ if 'y' in kwargs and 'y_target' in kwargs: raise ValueError("Can not set both 'y' and 'y_target'.") elif 'y' in kwargs: labels = kwargs['y'] elif 'y_target' in kwargs and kwargs['y_target'] is not None: labels = kwargs['y_target'] else: preds = self.model.get_probs(x) preds_max = reduce_max(preds, 1, keepdims=True) original_predictions = tf.to_float(tf.equal(preds, preds_max)) labels = tf.stop_gradient(original_predictions) del preds if isinstance(labels, np.ndarray): nb_classes = labels.shape[1] else: nb_classes = labels.get_shape().as_list()[1] return labels, nb_classes
[docs] def parse_params(self, params=None): """ Take in a dictionary of parameters and applies attack-specific checks before saving them as attributes. :param params: a dictionary of attack-specific parameters :return: True when parsing was successful """ if params is not None: warnings.warn("`params` is unused and will be removed " " on or after 2019-04-26.") return True
def arg_type(arg_names, kwargs): """ Returns a hashable summary of the types of arg_names within kwargs. :param arg_names: tuple containing names of relevant arguments :param kwargs: dict mapping string argument names to values. These must be values for which we can create a tf placeholder. Currently supported: numpy darray or something that can ducktype it returns: API contract is to return a hashable object describing all structural consequences of argument values that can otherwise be fed into a graph of fixed structure. Currently this is implemented as a tuple of tuples that track: - whether each argument was passed - whether each argument was passed and not None - the dtype of each argument Callers shouldn't rely on the exact structure of this object, just its hashability and one-to-one mapping between graph structures. """ assert isinstance(arg_names, tuple) passed = tuple(name in kwargs for name in arg_names) passed_and_not_none = [] for name in arg_names: if name in kwargs: passed_and_not_none.append(kwargs[name] is not None) else: passed_and_not_none.append(False) passed_and_not_none = tuple(passed_and_not_none) dtypes = [] for name in arg_names: if name not in kwargs: dtypes.append(None) continue value = kwargs[name] if value is None: dtypes.append(None) continue assert hasattr(value, 'dtype'), type(value) dtype = value.dtype if not isinstance(dtype, np.dtype): dtype = dtype.as_np_dtype assert isinstance(dtype, np.dtype) dtypes.append(dtype) dtypes = tuple(dtypes) return (passed, passed_and_not_none, dtypes)