Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Keras classifier
cls.classifier_k = get_classifier_kr()
scores = cls.classifier_k._model.evaluate(x_train, y_train)
logger.info('[Keras, MNIST] Accuracy on training set: %.2f%%', scores[1] * 100)
scores = cls.classifier_k._model.evaluate(x_test, y_test)
logger.info('[Keras, MNIST] Accuracy on test set: %.2f%%', scores[1] * 100)
# Create basic CNN on MNIST using TensorFlow
cls.classifier_tf, sess = get_classifier_tf()
scores = get_labels_np_array(cls.classifier_tf.predict(x_train))
acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
logger.info('[TF, MNIST] Accuracy on training set: %.2f%%', acc * 100)
scores = get_labels_np_array(cls.classifier_tf.predict(x_test))
acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('[TF, MNIST] Accuracy on test set: %.2f%%', acc * 100)
# Create basic PyTorch model
cls.classifier_py = get_classifier_pt()
x_train, x_test = np.swapaxes(x_train, 1, 3), np.swapaxes(x_test, 1, 3)
x_train, x_test = x_train.astype(np.float32), x_test.astype(np.float32)
scores = get_labels_np_array(cls.classifier_py.predict(x_train))
acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
logger.info('[PyTorch, MNIST] Accuracy on training set: %.2f%%', acc * 100)
scores = get_labels_np_array(cls.classifier_py.predict(x_test))
acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('[PyTorch, MNIST] Accuracy on test set: %.2f%%', acc * 100)
def test_get_labels_np_array(self):
y = np.array([3, 1, 4, 1, 5, 9])
y_ = to_categorical(y)
logits = np.random.normal(1 * y_, scale=0.1)
ps = (np.exp(logits).T / np.sum(np.exp(logits), axis=1)).T
labels = get_labels_np_array(ps)
self.assertEqual(labels.shape, y_.shape)
self.assertTrue(np.all(labels == y_))
acc = np.sum(np.argmax(train_y_pred, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
logger.info('Accuracy on adversarial train examples: %.2f%%', acc * 100)
acc = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('Accuracy on adversarial test examples: %.2f%%', acc * 100)
# Test PGD with 3 random initialisations
attack = ProjectedGradientDescent(classifier, num_random_init=3)
x_train_adv = attack.generate(x_train)
x_test_adv = attack.generate(x_test)
self.assertFalse((x_train == x_train_adv).all())
self.assertFalse((x_test == x_test_adv).all())
train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
self.assertFalse((y_train == train_y_pred).all())
self.assertFalse((y_test == test_y_pred).all())
acc = np.sum(np.argmax(train_y_pred, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
logger.info('Accuracy on adversarial train examples with 3 random initialisations: %.2f%%', acc * 100)
acc = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('Accuracy on adversarial test examples with 3 random initialisations: %.2f%%', acc * 100)
cls.classifier_tf, sess = get_classifier_tf()
scores = get_labels_np_array(cls.classifier_tf.predict(x_train))
acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
logger.info('[TF, MNIST] Accuracy on training set: %.2f%%', (acc * 100))
scores = get_labels_np_array(cls.classifier_tf.predict(x_test))
acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('[TF, MNIST] Accuracy on test set: %.2f%%', (acc * 100))
# Create basic PyTorch model
cls.classifier_py = get_classifier_pt()
x_train, x_test = np.swapaxes(x_train, 1, 3), np.swapaxes(x_test, 1, 3)
x_train, x_test = x_train.astype(np.float32), x_test.astype(np.float32)
scores = get_labels_np_array(cls.classifier_py.predict(x_train))
acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
logger.info('[PyTorch, MNIST] Accuracy on training set: %.2f%%', (acc * 100))
scores = get_labels_np_array(cls.classifier_py.predict(x_test))
acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('\n[PyTorch, MNIST] Accuracy on test set: %.2f%%', (acc * 100))
def _test_backend_mnist(self, classifier):
attack = DeepFool(classifier, max_iter=5, batch_size=11)
x_train_adv = attack.generate(self.x_train)
x_test_adv = attack.generate(self.x_test)
self.assertFalse((self.x_train == x_train_adv).all())
self.assertFalse((self.x_test == x_test_adv).all())
train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
self.assertFalse((self.y_train == train_y_pred).all())
self.assertFalse((self.y_test == test_y_pred).all())
accuracy = np.sum(np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train, axis=1)) / self.y_train.shape[0]
logger.info('Accuracy on adversarial train examples: %.2f%%', (accuracy * 100))
accuracy = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
logger.info('Accuracy on adversarial test examples: %.2f%%', (accuracy * 100))
accuracy = np.sum(np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
logger.info('[TF, MNIST] Accuracy on training set: %.2f%%', (accuracy * 100))
scores = get_labels_np_array(cls.classifier_tf.predict(x_test))
accuracy = np.sum(np.argmax(scores, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('[TF, MNIST] Accuracy on test set: %.2f%%', (accuracy * 100))
# Create basic PyTorch model
cls.classifier_py = get_classifier_pt()
x_train, x_test = np.swapaxes(x_train, 1, 3), np.swapaxes(x_test, 1, 3)
scores = get_labels_np_array(cls.classifier_py.predict(x_train.astype(np.float32)))
accuracy = np.sum(np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
logger.info('[PyTorch, MNIST] Accuracy on training set: %.2f%%', (accuracy * 100))
scores = get_labels_np_array(cls.classifier_py.predict(x_test.astype(np.float32)))
accuracy = np.sum(np.argmax(scores, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('[PyTorch, MNIST] Accuracy on test set: %.2f%%', (accuracy * 100))
logger.info('Accuracy on MNIST with FGM adversarial test examples: %.2f%%', (accuracy * 100))
# Test minimal perturbations
attack_params = {"minimal": True, "eps_step": 0.1, "eps": 1.0}
attack.set_params(**attack_params)
x_train_adv_min = attack.generate(self.x_train)
x_test_adv_min = attack.generate(self.x_test)
self.assertFalse((x_train_adv_min == x_train_adv).all())
self.assertFalse((x_test_adv_min == x_test_adv).all())
self.assertFalse((self.x_train == x_train_adv_min).all())
self.assertFalse((self.x_test == x_test_adv_min).all())
train_y_pred = get_labels_np_array(classifier.predict(x_train_adv_min))
test_y_pred = get_labels_np_array(classifier.predict(x_test_adv_min))
self.assertFalse((self.y_train == train_y_pred).all())
self.assertFalse((self.y_test == test_y_pred).all())
accuracy = np.sum(np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train, axis=1)) / self.y_train.shape[0]
logger.info('Accuracy on MNIST with FGM adversarial train examples with minimal perturbation: %.2f%%',
(accuracy * 100))
accuracy = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
logger.info('Accuracy on MNIST with FGM adversarial test examples with minimal perturbation: %.2f%%',
(accuracy * 100))
# L_1 norm
attack = FastGradientMethod(classifier, eps=1, norm=1, batch_size=128)
x_test_adv = attack.generate(self.x_test)
:param y: The labels for the data `x`. Only provide this parameter if you'd like to use true
labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the
"label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
Labels should be one-hot-encoded.
:type y: `np.ndarray`
:return: An array holding the adversarial examples.
:rtype: `np.ndarray`
"""
if y is None:
# Throw error if attack is targeted, but no targets are provided
if self.targeted:
raise ValueError('Target labels `y` need to be provided for a targeted attack.')
# Use model predictions as correct outputs
logger.info('Using model predictions as correct labels for FGM.')
y = get_labels_np_array(self.classifier.predict(x, batch_size=self.batch_size))
y = y / np.sum(y, axis=1, keepdims=True)
# Return adversarial examples computed with minimal perturbation if option is active
if self.minimal:
logger.info('Performing minimal perturbation FGM.')
adv_x_best = self._minimal_perturbation(x, y)
rate_best = 100 * compute_success(self.classifier, x, y, adv_x_best,
self.targeted, batch_size=self.batch_size)
else:
adv_x_best = None
rate_best = None
for _ in range(max(1, self.num_random_init)):
adv_x = self._compute(x, x, y, self.eps, self.eps, self._project, self.num_random_init > 0)
if self.num_random_init > 1:
def generate(self, x, **kwargs):
"""
:param x:
:param kwargs:
:return:
"""
from art.utils import get_labels_np_array
x_adv = np.copy(x)
preds = get_labels_np_array(self.classifier.predict(x))
for i, input_ in enumerate(x_adv):
print('Attacking input %i' % i)
logger.debug('Attacking input %i', i)
scores = self.score(self.classifier, input_, preds[i])
prioritized_tokens = np.flip(scores.argsort(), axis=0)
if hasattr(self.transform, 'uses_embedding') and self.transform.uses_embedding:
input_emb = self.classifier.to_embedding(np.expand_dims(input_, axis=0))[0]
transform_values = self.transform(self.classifier, input_, preds[i])
for j, token_pos in enumerate(prioritized_tokens):
# TODO otherwise, detect automatically if the transform operates in the embedding space
if hasattr(self.transform, 'uses_embedding') and self.transform.uses_embedding:
input_emb[token_pos, :] = transform_values[token_pos]
old_token = input_[token_pos]
:type x: `np.ndarray`
:param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
(nb_samples,).
:type y: `np.ndarray`
:return: An array holding the adversarial examples.
:rtype: `np.ndarray`
"""
y = check_and_transform_label_format(y, self.classifier.nb_classes())
# Check that `y` is provided for targeted attacks
if self.targeted and y is None:
raise ValueError('Target labels `y` need to be provided for a targeted attack.')
# No labels provided, use model prediction as correct class
if y is None:
y = get_labels_np_array(self.classifier.predict(x, batch_size=self.batch_size))
# Compute adversarial examples with implicit batching
nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
x_adv = []
for batch_id in range(nb_batches):
logger.debug('Processing batch %i out of %i', batch_id, nb_batches)
batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
x_batch = x[batch_index_1:batch_index_2]
y_batch = y[batch_index_1:batch_index_2]
res = self._generate_batch(x_batch, y_batch)
x_adv.append(res)
x_adv = np.vstack(x_adv)
# Apply clip
if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None: