captum
captum copied to clipboard
Some Attr & Influence module tests failing randomly
🐛 Bug
When running tests for the optim module code I've been working on, occasionally some of the tests in other modules fail, and it looks as though that the repeat offenders can sometimes get values outside the expected range. I've verified that the failures occur on the master branch as well.
The tests.attr.test_deeplift_classification.test_softmax_classification_batch_zero_baseline test is particularly easy to make fail by running it more than once.
The test failure log below is from the most recent batch of failures that I've noticed. Interestingly enough, only one test failing in another module is more rare than multiple failing.
=================================== FAILURES ===================================
_____________ Test.test_softmax_classification_batch_zero_baseline _____________
self = <tests.attr.test_deeplift_classification.Test testMethod=test_softmax_classification_batch_zero_baseline>
def test_softmax_classification_batch_zero_baseline(self) -> None:
num_in = 40
input = torch.arange(0.0, num_in * 3.0, requires_grad=True).reshape(3, num_in)
baselines = 0
model = SoftmaxDeepLiftModel(num_in, 20, 10)
dl = DeepLift(model)
self.softmax_classification(
> model, dl, input, baselines, torch.tensor([2, 2, 2])
)
tests/attr/test_deeplift_classification.py:61:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/attr/test_deeplift_classification.py:169: in softmax_classification
self._assert_attributions(model, attributions, input, baselines, delta, target2)
tests/attr/test_deeplift_classification.py:187: in _assert_attributions
"some samples".format(delta),
E AssertionError: tensor(False) is not true : The sum of attribution values tensor([0.0007, 0.0020, 0.0034]) is not nearly equal to the difference between the endpoint for some samples
_ TestTracInRegression.test_tracin_regression_TracInCP_sample_wise_trick_features_20 _
a = (<tests.influence._core.test_tracin_regression.TestTracInRegression testMethod=test_tracin_regression_TracInCP_sample_wise_trick_features_20>,)
@wraps(func)
def standalone_func(*a):
> return func(*(a + p.args), **p.kwargs)
/usr/local/lib/python3.7/dist-packages/parameterized/parameterized.py:533:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/influence/_core/test_tracin_regression.py:153: in test_tracin_regression
self, train_scores, train_scores_sample_wise_trick
tests/helpers/basic.py:35: in assertTensorAlmostEqual
torch.sum(torch.abs(actual - expected)).item(), 0.0, delta=delta
E AssertionError: 6876.1328125 != 0.0 within 0.0001 delta (6876.1328125 difference)
_ TestTracInSelfInfluence.test_tracin_self_influence_TracInCPFastRandProjTests_unpack_inputs _
a = (<tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCPFastRandProjTests_unpack_inputs>,)
@wraps(func)
def standalone_func(*a):
> return func(*(a + p.args), **p.kwargs)
/usr/local/lib/python3.7/dist-packages/parameterized/parameterized.py:533:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/influence/_core/test_tracin_self_influence.py:74: in test_tracin_self_influence
mode="max",
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test = <tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCPFastRandProjTests_unpack_inputs>
actual = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7682e-06, 2.4623e+00,
1.3540e-05, 1.2162e+04])
expected = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7683e-06, 2.4623e+00,
1.3540e-05, 1.2161e+04])
delta = 0.01, mode = 'max'
def assertTensorAlmostEqual(test, actual, expected, delta=0.0001, mode="sum"):
assert isinstance(actual, torch.Tensor), (
"Actual parameter given for " "comparison must be a tensor."
)
if not isinstance(expected, torch.Tensor):
expected = torch.tensor(expected, dtype=actual.dtype)
assert (
actual.shape == expected.shape
), f"Expected tensor with shape: {expected.shape}. Actual shape {actual.shape}."
actual = actual.cpu()
expected = expected.cpu()
if mode == "sum":
test.assertAlmostEqual(
torch.sum(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
elif mode == "max":
# if both tensors are empty, they are equal but there is no max
if actual.numel() == expected.numel() == 0:
return
if actual.size() == torch.Size([]):
test.assertAlmostEqual(
torch.max(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
else:
for index, (input, ref) in enumerate(zip(actual, expected)):
almost_equal = abs(input - ref) <= delta
if hasattr(almost_equal, "__iter__"):
almost_equal = almost_equal.all()
assert (
almost_equal
), "Values at index {}, {} and {}, differ more than by {}".format(
> index, input, ref, delta
)
E AssertionError: Values at index 31, 12161.9365234375 and 12161.494140625, differ more than by 0.01
tests/helpers/basic.py:54: AssertionError
__ TestTracInSelfInfluence.test_tracin_self_influence_TracInCP_unpack_inputs ___
a = (<tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCP_unpack_inputs>,)
@wraps(func)
def standalone_func(*a):
> return func(*(a + p.args), **p.kwargs)
/usr/local/lib/python3.7/dist-packages/parameterized/parameterized.py:533:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/influence/_core/test_tracin_self_influence.py:74: in test_tracin_self_influence
mode="max",
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test = <tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCP_unpack_inputs>
actual = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7682e-06, 2.4623e+00,
1.3540e-05, 1.2162e+04])
expected = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7683e-06, 2.4623e+00,
1.3540e-05, 1.2161e+04])
delta = 0.01, mode = 'max'
def assertTensorAlmostEqual(test, actual, expected, delta=0.0001, mode="sum"):
assert isinstance(actual, torch.Tensor), (
"Actual parameter given for " "comparison must be a tensor."
)
if not isinstance(expected, torch.Tensor):
expected = torch.tensor(expected, dtype=actual.dtype)
assert (
actual.shape == expected.shape
), f"Expected tensor with shape: {expected.shape}. Actual shape {actual.shape}."
actual = actual.cpu()
expected = expected.cpu()
if mode == "sum":
test.assertAlmostEqual(
torch.sum(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
elif mode == "max":
# if both tensors are empty, they are equal but there is no max
if actual.numel() == expected.numel() == 0:
return
if actual.size() == torch.Size([]):
test.assertAlmostEqual(
torch.max(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
else:
for index, (input, ref) in enumerate(zip(actual, expected)):
almost_equal = abs(input - ref) <= delta
if hasattr(almost_equal, "__iter__"):
almost_equal = almost_equal.all()
assert (
almost_equal
), "Values at index {}, {} and {}, differ more than by {}".format(
> index, input, ref, delta
)
E AssertionError: Values at index 31, 12161.9365234375 and 12161.494140625, differ more than by 0.01
tests/helpers/basic.py:54: AssertionError
To Reproduce
It happens relatively randomly as I'm running tests, with no discernible cause.
Expected behavior
The tests should always pass. Currently these kinds of failures seem to happen too often.
Environment
I'm installing Captum straight from the source via !pip3 install -e .[dev]
# Colab Instance environment
PyTorch version: 1.11.0+cu113
Is debug build: False
CUDA used to build PyTorch: 11.3
ROCM used to build PyTorch: N/A
OS: Ubuntu 18.04.5 LTS (x86_64)
GCC version: (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Clang version: 6.0.0-1ubuntu2 (tags/RELEASE_600/final)
CMake version: version 3.22.4
Libc version: glibc-2.26
Python version: 3.7.13 (default, Apr 24 2022, 01:04:09) [GCC 7.5.0] (64-bit runtime)
Python platform: Linux-5.4.188+-x86_64-with-Ubuntu-18.04-bionic
Is CUDA available: False
CUDA runtime version: 11.1.105
GPU models and configuration: Could not collect
Nvidia driver version: Could not collect
cuDNN version: Probably one of the following:
/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5
/usr/lib/x86_64-linux-gnu/libcudnn.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.0.5
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True
Versions of relevant libraries:
[pip3] numpy==1.21.6
[pip3] torch==1.11.0+cu113
[pip3] torchaudio==0.11.0+cu113
[pip3] torchsummary==1.5.1
[pip3] torchtext==0.12.0
[pip3] torchvision==0.12.0+cu113
[conda] Could not collect
Another recorded set of failures:
=================================== FAILURES ===================================
_____________ Test.test_softmax_classification_batch_zero_baseline _____________
self = <tests.attr.test_deeplift_classification.Test testMethod=test_softmax_classification_batch_zero_baseline>
def test_softmax_classification_batch_zero_baseline(self) -> None:
num_in = 40
input = torch.arange(0.0, num_in * 3.0, requires_grad=True).reshape(3, num_in)
baselines = 0
model = SoftmaxDeepLiftModel(num_in, 20, 10)
dl = DeepLift(model)
self.softmax_classification(
> model, dl, input, baselines, torch.tensor([2, 2, 2])
)
tests/attr/test_deeplift_classification.py:61:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/attr/test_deeplift_classification.py:169: in softmax_classification
self._assert_attributions(model, attributions, input, baselines, delta, target2)
tests/attr/test_deeplift_classification.py:187: in _assert_attributions
"some samples".format(delta),
E AssertionError: tensor(False) is not true : The sum of attribution values tensor([0.0007, 0.0020, 0.0034]) is not nearly equal to the difference between the endpoint for some samples
_ TestTracInRegression.test_tracin_regression_TracInCP_sample_wise_trick_features_20 _
a = (<tests.influence._core.test_tracin_regression.TestTracInRegression testMethod=test_tracin_regression_TracInCP_sample_wise_trick_features_20>,)
@wraps(func)
def standalone_func(*a):
> return func(*(a + p.args), **p.kwargs)
/usr/local/lib/python3.7/dist-packages/parameterized/parameterized.py:533:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/influence/_core/test_tracin_regression.py:153: in test_tracin_regression
self, train_scores, train_scores_sample_wise_trick
tests/helpers/basic.py:35: in assertTensorAlmostEqual
torch.sum(torch.abs(actual - expected)).item(), 0.0, delta=delta
E AssertionError: 6876.1328125 != 0.0 within 0.0001 delta (6876.1328125 difference)
_ TestTracInSelfInfluence.test_tracin_self_influence_TracInCPFastRandProjTests_unpack_inputs _
a = (<tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCPFastRandProjTests_unpack_inputs>,)
@wraps(func)
def standalone_func(*a):
> return func(*(a + p.args), **p.kwargs)
/usr/local/lib/python3.7/dist-packages/parameterized/parameterized.py:533:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/influence/_core/test_tracin_self_influence.py:74: in test_tracin_self_influence
mode="max",
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test = <tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCPFastRandProjTests_unpack_inputs>
actual = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7682e-06, 2.4623e+00,
1.3540e-05, 1.2162e+04])
expected = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7683e-06, 2.4623e+00,
1.3540e-05, 1.2161e+04])
delta = 0.01, mode = 'max'
def assertTensorAlmostEqual(test, actual, expected, delta=0.0001, mode="sum"):
assert isinstance(actual, torch.Tensor), (
"Actual parameter given for " "comparison must be a tensor."
)
if not isinstance(expected, torch.Tensor):
expected = torch.tensor(expected, dtype=actual.dtype)
assert (
actual.shape == expected.shape
), f"Expected tensor with shape: {expected.shape}. Actual shape {actual.shape}."
actual = actual.cpu()
expected = expected.cpu()
if mode == "sum":
test.assertAlmostEqual(
torch.sum(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
elif mode == "max":
# if both tensors are empty, they are equal but there is no max
if actual.numel() == expected.numel() == 0:
return
if actual.size() == torch.Size([]):
test.assertAlmostEqual(
torch.max(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
else:
for index, (input, ref) in enumerate(zip(actual, expected)):
almost_equal = abs(input - ref) <= delta
if hasattr(almost_equal, "__iter__"):
almost_equal = almost_equal.all()
assert (
almost_equal
), "Values at index {}, {} and {}, differ more than by {}".format(
> index, input, ref, delta
)
E AssertionError: Values at index 31, 12161.9365234375 and 12161.494140625, differ more than by 0.01
tests/helpers/basic.py:54: AssertionError
__ TestTracInSelfInfluence.test_tracin_self_influence_TracInCP_unpack_inputs ___
a = (<tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCP_unpack_inputs>,)
@wraps(func)
def standalone_func(*a):
> return func(*(a + p.args), **p.kwargs)
/usr/local/lib/python3.7/dist-packages/parameterized/parameterized.py:533:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/influence/_core/test_tracin_self_influence.py:74: in test_tracin_self_influence
mode="max",
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test = <tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCP_unpack_inputs>
actual = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7682e-06, 2.4623e+00,
1.3540e-05, 1.2162e+04])
expected = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7683e-06, 2.4623e+00,
1.3540e-05, 1.2161e+04])
delta = 0.01, mode = 'max'
def assertTensorAlmostEqual(test, actual, expected, delta=0.0001, mode="sum"):
assert isinstance(actual, torch.Tensor), (
"Actual parameter given for " "comparison must be a tensor."
)
if not isinstance(expected, torch.Tensor):
expected = torch.tensor(expected, dtype=actual.dtype)
assert (
actual.shape == expected.shape
), f"Expected tensor with shape: {expected.shape}. Actual shape {actual.shape}."
actual = actual.cpu()
expected = expected.cpu()
if mode == "sum":
test.assertAlmostEqual(
torch.sum(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
elif mode == "max":
# if both tensors are empty, they are equal but there is no max
if actual.numel() == expected.numel() == 0:
return
if actual.size() == torch.Size([]):
test.assertAlmostEqual(
torch.max(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
else:
for index, (input, ref) in enumerate(zip(actual, expected)):
almost_equal = abs(input - ref) <= delta
if hasattr(almost_equal, "__iter__"):
almost_equal = almost_equal.all()
assert (
almost_equal
), "Values at index {}, {} and {}, differ more than by {}".format(
> index, input, ref, delta
)
E AssertionError: Values at index 31, 12161.9365234375 and 12161.494140625, differ more than by 0.01