Still another array access performance issue (in debug)
Zig Version
0.11.0-dev.3105+e46d7a369
Steps to Reproduce and Observed Behavior
This is a follow-up to #13938 which was only fixed in release mode(which means I still have to rely on the workaround to be able to run my code in debug mode). The reproduction code is basically identical:
const std = @import("std");
const len = 32*32*32;
fn getIndex(i: u16) u16 {
return i;
}
pub const Chunk = struct {
blocks: [len]u16 = undefined,
};
pub noinline fn regenerateMainMesh(chunk: *Chunk) u32 {
var sum: u32 = 0;
var i: u16 = 0;
while(i < len) : (i += 1) {
sum += chunk.blocks[getIndex(i)]; // ← workaround: (&chunk.blocks)[...]
}
return sum;
}
pub fn main() void {
var chunk: Chunk = Chunk{};
for(&chunk.blocks, 0..) |*block, i| {
block.* = @intCast(i);
}
const start = std.time.nanoTimestamp();
const sum = regenerateMainMesh(&chunk);
const end = std.time.nanoTimestamp();
std.log.err("Time: {} Sum: {}", .{end - start, sum});
}
Running this in debug mode is taking 93 ms for iterating over a 32768 element array:
$ zig run test.zig
error: Time: 93325090 Sum: 536854528
Running this in a profiler, I can see that the bottleneck still is a memcpy that is called once every iteration and appears to be copying the entire array:
Expected Behavior
When applying the workaround
- sum += chunk.blocks[getIndex(i)]; // ← workaround: (&chunk.blocks)[...]
+ sum += (&chunk.blocks)[getIndex(i)]; // ← workaround: (&chunk.blocks)[...]
the performance is significantly better, taking only 0.4 ms instead of 93 ms:
$ zig run test.zig
error: Time: 357398 Sum: 536854528
I just updated my zig version and the problem got a lot worse.
EDIT: I updated again and it is now back to how it was originally.
Expand
This simplified example was not affected in the past:
const std = @import("std");
const len = 32*32*32;
var blocks: [len]u16 = undefined;
pub noinline fn regenerateMainMesh() u32 {
var sum: u32 = 0;
var i: u16 = 0;
while(i < len) : (i += 1) {
sum += blocks[i]; // ← workaround: (&blocks)[...]
}
return sum;
}
pub fn main() void {
for(&blocks, 0..) |*block, i| {
block.* = @intCast(u16, i);
}
const start = std.time.nanoTimestamp();
const sum = regenerateMainMesh();
const end = std.time.nanoTimestamp();
std.log.err("Time: {} Sum: {}", .{end - start, sum});
}
But now it is also slow:
$ zig version
0.11.0-dev.3380+7e0a02ee2
$ zig run test.zig
error: Time: 98298061 Sum: 536854528
$ ~/Downloads/zig-old/zig version
0.11.0-dev.3132+465272921
$ ~/Downloads/zig-old/zig run test.zig
error: Time: 632556 Sum: 536854528
Fix landed in 2ab78937dd29fbc299ac434f962a5ff41002cc43.
Fix reverted in 9d5a133f18f16108543cd54b2b37ee4e17f09cac.