xtensor
xtensor copied to clipboard
xt::view overhead in loop
This code runs 5-10x times faster with raw unchecked indexing on all dimensions
using T = float;
using Tensor = xt::xarray<T>;
auto start_line_points = xt::view(batch_of_lines, xt::all(), xt::range(_, -1));
auto end_line_points = xt::view(batch_of_lines, xt::all(), xt::range(1, _));
Tensor diff = end_line_points - start_line_points;
Tensor sq_norm = xt::norm_sq(
diff, {diff.dimension() - 1}, xt::evaluation_strategy::immediate);
static constexpr double eps = 1e-3;
for (size_t b = 0; b < closest_points.shape()[0]; ++b) {
for (size_t t = 0; t < closest_points.shape()[1]; ++t) {
if (abs(sq_norm.unchecked(b, t)) < eps) {
xt::view(closest_points, b, t) = xt::view(start_line_points, b, t);
continue;
}
for (size_t p = 0; p < closest_points.shape()[2]; ++p) {
auto curr_closest_pt = xt::view(closest_points, b, t, p);
auto curr_pt = xt::view(path_points, p);
auto curr_start_pt = xt::view(start_line_points, b, t);
auto curr_end_pt = xt::view(end_line_points, b, t);
auto curr_line_diff = xt::view(diff, b, t);
T u = ((curr_pt.unchecked(0) - curr_start_pt.unchecked(0)) * curr_line_diff.unchecked(0) +
(curr_pt.unchecked(1) - curr_start_pt.unchecked(1)) * curr_line_diff.unchecked(1)) /
sq_norm.unchecked(b, t);
if (u <= 0)
curr_closest_pt = curr_start_pt;
else if (u >= 1)
curr_closest_pt = curr_end_pt;
else
curr_closest_pt = curr_start_pt + u * curr_line_diff;
}
}
}
With unchecked undexing on all dimensions:
for (size_t b = 0; b < closest_points.shape()[0]; ++b) {
for (size_t t = 0; t < closest_points.shape()[1]; ++t) {
if (abs(sq_norm.unchecked(b, t)) < eps) {
xt::view(closest_points, b, t) = xt::view(start_line_points, b, t);
continue;
}
for (size_t p = 0; p < closest_points.shape()[2]; ++p) {
T u = ((path_points.unchecked(p, 0) - start_line_points.unchecked(b, t, 0)) * diff.unchecked(b, t, 0) +
(path_points.unchecked(p, 1) - start_line_points.unchecked(b, t, 1)) * diff.unchecked(b, t, 1)) /
sq_norm.unchecked(b, t);
if (u <= 0) {
closest_points.unchecked(b, t, p, 0) = start_line_points.unchecked(b, t, 0);
closest_points.unchecked(b, t, p, 1) = start_line_points.unchecked(b, t, 1);
} else if (u >= 1) {
closest_points.unchecked(b, t, p, 0) = end_line_points.unchecked(b, t, 0);
closest_points.unchecked(b, t, p, 1) = end_line_points.unchecked(b, t, 1);
}
else {
closest_points.unchecked(b, t, p, 0) = start_line_points.unchecked(b, t, 0) + u * diff.unchecked(b, t);
closest_points.unchecked(b, t, p, 1) = start_line_points.unchecked(b, t, 1) + u * diff.unchecked(b, t);
}
}
}
}
Does xt::view brings much overhead ?
Your example is not very minimal so a bit hard to judge and discuss.
That being said:
- In general
xt::view
is currently unfortunately not very efficient. - Even if
xt::view
would be overhauled in some (near) future, using indices will always be faster: inxt::view
you always need to create some overhead. - If you use a fixed-rank array I'm not sure that
unchecked
is still faster thanoperator()
if you compile without assertions. But on this point I'm not entirely sure. Didn't we discuss this once @JohanMabille ?
@tdegeus Sorry for verbosity. Overall here is looping by 3 dimensions, and in first chunk of code i used views to get xarray of points (x, y), and manipulate them, instead of indexing, and its came out much slower.