MatX
MatX copied to clipboard
[BUG] argmax returns invalid indices
Describe the Bug The argmax() function returns invalid indices in certain situations.
To Reproduce Compile and run the code snippet.
Output is
$ ./testbug
Tensor{float} Rank: 1, Sizes:[6], Strides:[1]
000000: 1.0000e+00
000001: 1.0000e+00
000002: 1.0000e+00
000003: 1.0000e+00
000004: 1.0000e+00
000005: 1.0000e+00
Tensor{unknown} Rank: 1, Sizes:[6], Strides:[1]
000000: 1045
000001: 1278
000002: 1035
000003: 1304
000004: 1360
000005: 1047
0: 1045 == 1045
1: 1278 != 1080 (ERROR)
2: 1035 != 639 (ERROR)
3: 1304 != 710 (ERROR)
4: 1360 != 568 (ERROR)
5: 1047 != 57 (ERROR)
0: 1045 => 31,22
GetIdxFromAbs(1045) => 31,22
1: 1278 => 38,24
GetIdxFromAbs(1278) => 38,24
2: 1035 => 31,12
GetIdxFromAbs(1035) => 31,12
3: 1304 => 39,17
GetIdxFromAbs(1304) => 39,17
4: 1360 => 41,7
GetIdxFromAbs(1360) => 41,7
5: 1047 => 31,24
GetIdxFromAbs(1047) => 31,24
Expected Behavior Since the t_a tensor only has one non-zero entry in each 2D sub-matrix, we'd expect the argmax function to return the abs indices (flattened indices) matching the values in the expected_abs array. The program identifies errors in the printout for all except the first element.
Code Snippets
#include "matx.h"
using namespace matx;
int main(int argc, char **argv)
{
MATX_ENTER_HANDLER();
auto t_a = make_tensor<float>({6,33,33});
auto t_bi = make_tensor<index_t>({6});
auto t_b = make_tensor<float>({6});
(t_a = 0).run();
cudaDeviceSynchronize();
int expected_abs[6] {31*33+22, 32*33+24, 19*33+12, 21*33+17, 17*33+7, 1*33+24};
for (int n=0; n<6; n++)
{
int max_row = expected_abs[n] / 33;
int max_col = expected_abs[n] - max_row*33;
t_a(n,max_row,max_col) = 1.f;
}
cudaDeviceSynchronize();
(mtie(t_b, t_bi) = argmax(t_a, {1,2})).run();
cudaDeviceSynchronize();
matx::print(t_b);
matx::print(t_bi);
for (int n=0; n<6; n++)
{
if (t_bi(n) == expected_abs[n])
{
printf("%d: %lld == %d\n",n,t_bi(n),expected_abs[n]);
}
else
{
printf("%d: %lld != %d (ERROR)\n",n,t_bi(n),expected_abs[n]);
}
}
auto t_for_getidxfromabs = slice<2>(t_a, {0, 0, 0}, {matxDropDim, matxEnd, matxEnd});
for (int n=0; n<6; n++)
{
int max_row = t_bi(n) / 33;
int max_col = t_bi(n) - max_row*33;
printf("%d: %lld => %d,%d\n",n,t_bi(n),max_row,max_col);
auto t_max_index_coord = GetIdxFromAbs(t_for_getidxfromabs, t_bi(n));
printf(" GetIdxFromAbs(%lld) => %lld,%lld\n",t_bi(n),t_max_index_coord[0],t_max_index_coord[1]);
}
cudaDeviceSynchronize();
CUDA_CHECK_LAST_ERROR();
MATX_EXIT_HANDLER();
return 0;
}
System Details (please complete the following information): Tested with ubuntu 22.04 / CUDA 12.5 / gcc 11.4.0