Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Test untargeted attack
eps = 0.1
attack = FastGradientMethod(classifier, eps=eps)
x_test_adv = attack.generate(self.x_test)
np.testing.assert_array_almost_equal(np.abs(x_test_adv - self.x_test), eps, decimal=5)
self.assertLessEqual(np.amax(x_test_adv), 1.0)
self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all())
accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
logger.info('Accuracy of ' + classifier.__class__.__name__ + ' on Iris with FGM adversarial examples: '
'%.2f%%', (accuracy * 100))
# Test targeted attack
targets = random_targets(self.y_test, nb_classes=3)
attack = FastGradientMethod(classifier, targeted=True, eps=0.1, batch_size=128)
x_test_adv = attack.generate(self.x_test, **{'y': targets})
self.assertFalse((self.x_test == x_test_adv).all())
self.assertLessEqual(np.amax(x_test_adv), 1.0)
self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any())
accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test.shape[0]
logger.info('Success rate of ' + classifier.__class__.__name__ + ' on targeted FGM on Iris: %.2f%%',
(accuracy * 100))
# First targeted attack and norm=2
hsj = HopSkipJump(classifier=tfc, targeted=True, max_iter=2, max_eval=100, init_eval=10)
params = {'y': random_targets(y_test, tfc.nb_classes())}
x_test_adv = hsj.generate(x_test, **params)
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1.0001).all())
self.assertTrue((x_test_adv >= -0.0001).all())
target = np.argmax(params['y'], axis=1)
y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
self.assertTrue((target == y_pred_adv).any())
# First targeted attack and norm=np.inf
hsj = HopSkipJump(classifier=tfc, targeted=True, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf)
params = {'y': random_targets(y_test, tfc.nb_classes())}
x_test_adv = hsj.generate(x_test, **params)
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1.0001).all())
self.assertTrue((x_test_adv >= -0.0001).all())
target = np.argmax(params['y'], axis=1)
y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
self.assertTrue((target == y_pred_adv).any())
# Second untargeted attack and norm=2
hsj = HopSkipJump(classifier=tfc, targeted=False, max_iter=2, max_eval=100, init_eval=10)
x_test_adv = hsj.generate(x_test)
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1.0001).all())
classifier, _ = get_iris_classifier_kr()
# Test untargeted attack
attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
x_test_adv = attack.generate(x_test)
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1).all())
self.assertTrue((x_test_adv >= 0).all())
preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%', (acc * 100))
# Test targeted attack
targets = random_targets(y_test, nb_classes=3)
attack = ProjectedGradientDescent(classifier, targeted=True, eps=1, eps_step=0.1)
x_test_adv = attack.generate(x_test, **{'y': targets})
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1).all())
self.assertTrue((x_test_adv >= 0).all())
preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
logger.info('Success rate of targeted PGD on Iris: %.2f%%', (acc * 100))
def test_krclassifier(self):
"""
Test with a KerasClassifier.
:return:
"""
# Build KerasClassifier
krc = get_classifier_kr()
# Get MNIST
(_, _), (x_test, y_test) = self.mnist
# First attack (without EoT):
fgsm = FastGradientMethod(classifier=krc, targeted=True)
params = {'y': random_targets(y_test, krc.nb_classes())}
x_test_adv = fgsm.generate(x_test, **params)
# Second attack (with EoT):
def t(x):
return x
def transformation():
while True:
yield t
eot = ExpectationOverTransformations(classifier=krc, sample_size=1, transformation=transformation)
fgsm_with_eot = FastGradientMethod(classifier=eot, targeted=True)
x_test_adv_with_eot = fgsm_with_eot.generate(x_test, **params)
self.assertTrue((np.abs(x_test_adv - x_test_adv_with_eot) < 0.001).all())
def test_random_targets(self):
y = np.array([3, 1, 4, 1, 5, 9])
y_ = to_categorical(y)
random_y = random_targets(y, 10)
self.assertTrue(np.all(y != random_y.argmax(axis=1)))
random_y = random_targets(y_, 10)
self.assertTrue(np.all(y != random_y.argmax(axis=1)))
def test_ptclassifier(self):
"""
Third test with the PyTorchClassifier.
:return:
"""
# Build PyTorchClassifier
ptc = get_classifier_pt()
x_test = np.swapaxes(self.x_test, 1, 3).astype(np.float32)
# First targeted attack
boundary = BoundaryAttack(classifier=ptc, targeted=True, max_iter=20)
params = {'y': random_targets(self.y_test, ptc.nb_classes())}
x_test_adv = boundary.generate(x_test, **params)
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1.0001).all())
self.assertTrue((x_test_adv >= -0.0001).all())
target = np.argmax(params['y'], axis=1)
y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
self.assertTrue((target == y_pred_adv).any())
# Second untargeted attack
boundary = BoundaryAttack(classifier=ptc, targeted=False, max_iter=20)
x_test_adv = boundary.generate(x_test)
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1.0001).all())
classifier, _ = get_iris_classifier_tf()
# Test untargeted attack
attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
x_test_adv = attack.generate(x_test)
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1).all())
self.assertTrue((x_test_adv >= 0).all())
preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%', (acc * 100))
# Test targeted attack
targets = random_targets(y_test, nb_classes=3)
attack = ProjectedGradientDescent(classifier, targeted=True, eps=1, eps_step=0.1)
x_test_adv = attack.generate(x_test, **{'y': targets})
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1).all())
self.assertTrue((x_test_adv >= 0).all())
preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
logger.info('Success rate of targeted PGD on Iris: %.2f%%', (acc * 100))
# Test targeted attack and norm=2
targets = random_targets(y_test, nb_classes=3)
attack = HopSkipJump(classifier, targeted=True, max_iter=2, max_eval=100, init_eval=10)
x_test_adv = attack.generate(x_test, **{'y': targets})
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1).all())
self.assertTrue((x_test_adv >= 0).all())
preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
logger.info('Success rate of targeted HopSkipJump on Iris: %.2f%%', (acc * 100))
# Test targeted attack and norm=np.inf
targets = random_targets(y_test, nb_classes=3)
attack = HopSkipJump(classifier, targeted=True, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf)
x_test_adv = attack.generate(x_test, **{'y': targets})
self.assertFalse((x_test == x_test_adv).all())
self.assertTrue((x_test_adv <= 1).all())
self.assertTrue((x_test_adv >= 0).all())
preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
logger.info('Success rate of targeted HopSkipJump on Iris: %.2f%%', (acc * 100))
# Clean-up session
sess.close()
def test_failure_attack(self):
"""
Test the corner case when attack is failed.
:return:
"""
# Build TensorFlowClassifier
tfc, sess = get_classifier_tf()
# Failure attack
cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=0, binary_search_steps=0, learning_rate=0,
initial_const=1)
params = {'y': random_targets(self.y_test, tfc.nb_classes())}
x_test_adv = cl2m.generate(self.x_test, **params)
self.assertLessEqual(np.amax(x_test_adv), 1.0)
self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
np.testing.assert_array_almost_equal(self.x_test, x_test_adv, decimal=3)
# Clean-up session
sess.close()
classifier, _ = get_iris_classifier_tf()
# Test untargeted attack
attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5)
x_test_adv = attack.generate(self.x_test)
self.assertFalse((self.x_test == x_test_adv).all())
self.assertLessEqual(np.amax(x_test_adv), 1.0)
self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all())
accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
logger.info('Accuracy on Iris with C&W adversarial examples: %.2f%%', (accuracy * 100))
# Test targeted attack
targets = random_targets(self.y_test, nb_classes=3)
attack = CarliniLInfMethod(classifier, targeted=True, max_iter=10, eps=0.5)
x_test_adv = attack.generate(self.x_test, **{'y': targets})
self.assertFalse((self.x_test == x_test_adv).all())
self.assertLessEqual(np.amax(x_test_adv), 1.0)
self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any())
accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test.shape[0]
logger.info('Success rate of targeted C&W on Iris: %.2f%%', (accuracy * 100))