foolbox
foolbox copied to clipboard
It seems like the 'success' value in the return of the 'attack' function is overconfident.
It seems like the 'success' value in the return of the 'attack' function is overconfident.
if __name__ == '__main__':
args = parser.parse_args()
model = ModelController(args).get_model().eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ConvCTrainer.load_model(args, model, device)
data_controller = DataCController(args)
eval_loader = data_controller.get_test_dataloader()
mean, std = data_controller.aug_controller.get_normalize_param()
attack = LinfFastGradientAttack()
fmodel = PyTorchModel(model, bounds = (0, 1), preprocessing = dict(mean = mean, std = std))
epsilons = [0.01, 0.03, 0.1, 0.3, 0.5]
cnt, total = torch.zeros(len(epsilons)).to(device),\
torch.zeros(len(epsilons)).to(device)
correct = torch.zeros(len(epsilons)).to(device)
for _, (images, labels) in enumerate(eval_loader):
images = images.to(device)
labels = labels.to(device)
images = images * std[:, None, None] + mean[:, None, None]
_, advs_list, success = attack(fmodel, images, labels, epsilons = epsilons)
cnt += success.sum(axis = 1)
total += images.shape[0]
for i, advs in enumerate(advs_list):
preds = model(advs).argmax(dim=1)
correct[i] += (preds == labels).sum().item() # Compute accuracy for each epsilon
print(f"Success rate vector: {cnt / total}")
print(f"Accuracy vector for each epsilon: {correct / total}")
And the output of this code is
Success rate vector: tensor([0.3397, 0.4297, 0.5650, 0.6253, 0.6954], device='cuda:0')
Accuracy vector for each epsilon: tensor([0.9872, 0.9851, 0.9676, 0.7457, 0.3955], device='cuda:0')
Can you please ensure that your model is fixed and does not change when running your code (e.g., because of batch norm layers)? I suggest that this causes your observation, as the internal computation of the success
variable (see here]) is pretty simple and most likely not incorrect.
You have to redo the normalization. The adversarial images will be between 0-1 (or your bounds). But your model expects something else. I had the same problem. This is easy to test if you check the min and max values of the images. Btw the part where you calculate the acc of your model yourself than becomes unnecessary, but it is a nice check.
images, labels, _ = utils.get_out_dataloader(out_dataloader, device)
min_max_images(images)
# denormalize for attack
images = images * std[:, None, None] + mean[:, None, None]
_, advs_list, is_adv = attack(fmodel, images, labels, epsilons=epsilons)
count_adv += is_adv.sum(dim=1)
total += images.shape[0]
for i, advs in enumerate(advs_list):
# min max before normalization
min_max_images(advs)
# normalize for model
advs_images = (advs - mean[:, None, None]) / std[:, None, None]
# after
min_max_images(advs_images)
preds = model(advs_images).argmax(dim=1)
correct[i] += (preds == labels).sum().item() # Compute accuracy for each epsilon
def min_max_images(images: torch.tensor):
min_ = images.min().item()
max_ = images.max().item()
print(f"min: {min_}, max: {max_}")