zig
zig copied to clipboard
std.tar: add writer
Simplifies code in docs creation where we used std.tar.output.Header.
Writer uses that Header internally and provides higher level interface.
Updates checksum on write, handles long file names, allows setting mtime and file permission mode. Provides handy interface for passing Dir.WalkerEntry.
Tested that zig std and zig test -femit-docs are creating sources.tar as before this change.
Can you share the performance difference before/after?
Generating sources.tar 10 times in the loop:
before:
Time (mean ± σ): 103.1 ms ± 2.2 ms [User: 4.9 ms, System: 88.3 ms]
Range (min … max): 99.4 ms … 109.5 ms 27 runs
after:
Time (mean ± σ): 234.7 ms ± 4.0 ms [User: 19.8 ms, System: 204.8 ms]
Range (min … max): 228.9 ms … 242.3 ms 12 runs
The difference is because I used smaller buffer when writing file content 512 bytes instead of 4000 as in any().writeFile. When I change that buffer to 4000 I got same result:
Time (mean ± σ): 103.3 ms ± 2.9 ms [User: 4.7 ms, System: 88.9 ms]
Range (min … max): 99.7 ms … 114.5 ms 27 runs
Let me do something about that.
Comparing previous and this in creating sources.tar
$ poop ./prev ./this
Benchmark 1 (48 runs): ./prev
measurement mean ± σ min … max outliers delta
wall_time 104ms ± 2.32ms 99.2ms … 110ms 2 ( 4%) 0%
peak_rss 0 ± 0 0 … 0 0 ( 0%) 0%
cpu_cycles 19.8M ± 371K 19.2M … 20.3M 0 ( 0%) 0%
instructions 25.5M ± 897 25.5M … 25.5M 3 ( 6%) 0%
cache_references 26.2K ± 1.18K 23.2K … 28.2K 0 ( 0%) 0%
cache_misses 7.23K ± 1.27K 5.33K … 11.6K 1 ( 2%) 0%
branch_misses 69.4K ± 1.10K 67.5K … 72.4K 0 ( 0%) 0%
Benchmark 2 (48 runs): ./this
measurement mean ± σ min … max outliers delta
wall_time 104ms ± 2.66ms 99.1ms … 111ms 3 ( 6%) + 0.6% ± 1.0%
peak_rss 0 ± 0 0 … 0 0 ( 0%) - nan% ± -nan%
cpu_cycles 20.7M ± 583K 18.5M … 21.9M 4 ( 8%) 💩+ 5.0% ± 1.0%
instructions 29.8M ± 622K 26.9M … 29.9M 6 (13%) 💩+ 16.8% ± 0.7%
cache_references 24.8K ± 1.52K 20.4K … 28.0K 1 ( 2%) ⚡- 5.2% ± 2.1%
cache_misses 7.39K ± 1.20K 4.95K … 9.00K 0 ( 0%) + 2.3% ± 6.9%
branch_misses 57.8K ± 1.21K 52.5K … 60.0K 2 ( 4%) ⚡- 16.8% ± 0.7%
Code used for benchmark:
prev
pub fn main() !void {
var gpa_instance = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa_instance.deinit() == .ok);
const gpa = gpa_instance.allocator();
var lib_dir = try std.fs.cwd().openDir("/home/ianic/Code/zig/lib", .{});
var out_dir = try std.fs.cwd().openDir("/home/ianic/Code/tmp", .{});
// previous version
for (0..10) |_| {
var out_file = try out_dir.createFile("sources.tar", .{});
defer out_file.close();
var w = out_file.writer();
var std_dir = try lib_dir.openDir("std", .{ .iterate = true });
defer std_dir.close();
var walker = try std_dir.walk(gpa);
defer walker.deinit();
while (try walker.next()) |entry| {
switch (entry.kind) {
.file => {
if (!std.mem.endsWith(u8, entry.basename, ".zig"))
continue;
if (std.mem.endsWith(u8, entry.basename, "test.zig"))
continue;
},
else => continue,
}
var file = try std_dir.openFile(entry.path, .{});
defer file.close();
const stat = try file.stat();
const padding = p: {
const remainder = stat.size % 512;
break :p if (remainder > 0) 512 - remainder else 0;
};
var file_header = std.tar.output.Header.init();
file_header.typeflag = .regular;
try file_header.setPath("std", entry.path);
try file_header.setSize(stat.size);
try file_header.updateChecksum();
try w.writeAll(std.mem.asBytes(&file_header));
try w.any().writeFile(file);
try w.writeByteNTimes(0, padding);
}
{
// Since this command is JIT compiled, the builtin module available in
// this source file corresponds to the user's host system.
const builtin_zig = @embedFile("builtin");
var file_header = std.tar.output.Header.init();
file_header.typeflag = .regular;
try file_header.setPath("builtin", "builtin.zig");
try file_header.setSize(builtin_zig.len);
try file_header.updateChecksum();
try w.writeAll(std.mem.asBytes(&file_header));
try w.writeAll(builtin_zig);
const padding = p: {
const remainder = builtin_zig.len % 512;
break :p if (remainder > 0) 512 - remainder else 0;
};
try w.writeByteNTimes(0, padding);
}
}
}
this
pub fn main() !void {
var gpa_instance = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa_instance.deinit() == .ok);
const gpa = gpa_instance.allocator();
var lib_dir = try std.fs.cwd().openDir("/home/ianic/Code/zig/lib", .{});
var out_dir = try std.fs.cwd().openDir("/home/ianic/Code/tmp", .{});
for (0..10) |_| {
var out_file = try out_dir.createFile("sources_new.tar", .{});
defer out_file.close();
var w = std.tar.writer(out_file.writer().any());
try w.setRoot("std");
var std_dir = try lib_dir.openDir("std", .{ .iterate = true });
defer std_dir.close();
var walker = try std_dir.walk(gpa);
defer walker.deinit();
while (try walker.next()) |entry| {
switch (entry.kind) {
.file => {
if (!std.mem.endsWith(u8, entry.basename, ".zig"))
continue;
if (std.mem.endsWith(u8, entry.basename, "test.zig"))
continue;
},
else => continue,
}
var file = try entry.dir.openFile(entry.basename, .{});
defer file.close();
try w.writeFile(entry.path, file);
}
{
// Since this command is JIT compiled, the builtin module available in
// this source file corresponds to the user's host system.
const builtin_zig = @embedFile("builtin");
w.prefix = "builtin";
try w.writeFileBytes("builtin.zig", builtin_zig, .{});
}
}
}
There were two use cases which pushed me into creating tar.writer:
- support for building tools like #14312
- use in tests where we can build tarball on the fly instead of embedding tarballs and adding them to the source
I started modifying output.Header to support tarballs without prefix. Then found a file while creating tarball of the zig source, which has too long name. Realized that we need to support pax header or gnu long names. Wanted to have api where user don't need to think about adding checksum to the header, adding padding at file content. So instead of modifying output.Header moved all that to tar.writer.
The computer is doing a lot more work than before. I have a strong suspicion that a different API could result in equivalent performance, while still providing the desired abstraction.
This version is also setting mode and mtime for tar files. Not needed in this case. I removed that and did some small optimization.
Benchmark 1 (49` runs): ./previous
measurement mean ± σ min … max outliers delta
wall_time 102ms ± 2.00ms 100ms … 109ms 6 (12%) 0%
peak_rss 0 ± 0 0 … 0 0 ( 0%) 0%
cpu_cycles 19.6M ± 378K 19.0M … 20.5M 0 ( 0%) 0%
instructions 25.5M ± 38.6 25.5M … 25.5M 0 ( 0%) 0%
cache_references 28.7K ± 974 26.8K … 30.7K 0 ( 0%) 0%
cache_misses 9.85K ± 719 8.81K … 12.4K 4 ( 8%) 0%
branch_misses 69.2K ± 1.16K 66.1K … 72.2K 2 ( 4%) 0%
Benchmark 2 (50 runs): ./this
measurement mean ± σ min … max outliers delta
wall_time 101ms ± 2.73ms 98.4ms … 114ms 4 ( 8%) - 1.1% ± 0.9%
peak_rss 0 ± 0 0 … 0 0 ( 0%) - nan% ± -nan%
cpu_cycles 19.4M ± 666K 16.5M … 20.3M 3 ( 6%) - 1.0% ± 1.1%
instructions 22.0M ± 549K 18.7M … 22.1M 3 ( 6%) ⚡- 13.6% ± 0.6%
cache_references 27.6K ± 1.07K 25.5K … 30.9K 0 ( 0%) ⚡- 3.7% ± 1.4%
cache_misses 9.94K ± 499 8.89K … 11.4K 5 (10%) + 1.0% ± 2.5%
branch_misses 94.3K ± 5.34K 61.1K … 98.1K 3 ( 6%) 💩+ 36.2% ± 2.2%
I did few optimizations. sources.tar generation benchmark (previous/this):
~/.local/bin/poop ./prev ./this2
Benchmark 1 (50 runs): ./prev
measurement mean ± σ min … max outliers delta
wall_time 101ms ± 2.60ms 96.5ms … 108ms 1 ( 2%) 0%
peak_rss 0 ± 0 0 … 0 0 ( 0%) 0%
cpu_cycles 19.6M ± 521K 18.6M … 21.1M 0 ( 0%) 0%
instructions 25.4M ± 228K 23.9M … 25.5M 4 ( 8%) 0%
cache_references 28.0K ± 2.37K 24.2K … 36.4K 1 ( 2%) 0%
cache_misses 9.46K ± 2.23K 6.39K … 14.7K 0 ( 0%) 0%
branch_misses 69.2K ± 1.37K 66.2K … 72.3K 0 ( 0%) 0%
Benchmark 2 (52 runs): ./this2
measurement mean ± σ min … max outliers delta
wall_time 97.8ms ± 2.23ms 93.3ms … 104ms 1 ( 2%) ⚡- 3.0% ± 0.9%
peak_rss 0 ± 0 0 … 0 0 ( 0%) - nan% ± -nan%
cpu_cycles 16.3M ± 469K 15.7M … 17.4M 0 ( 0%) ⚡- 16.9% ± 1.0%
instructions 17.9M ± 41.0 17.9M … 17.9M 1 ( 2%) ⚡- 29.5% ± 0.2%
cache_references 27.3K ± 1.85K 23.9K … 30.2K 0 ( 0%) - 2.4% ± 3.0%
cache_misses 8.89K ± 1.92K 6.40K … 11.5K 0 ( 0%) - 6.1% ± 8.6%
branch_misses 45.4K ± 807 43.9K … 47.4K 0 ( 0%) ⚡- 34.4% ± 0.6%
Thanks for the rebase :)