zig icon indicating copy to clipboard operation
zig copied to clipboard

std.tar: add writer

Open ianic opened this issue 1 year ago • 6 comments

Simplifies code in docs creation where we used std.tar.output.Header. Writer uses that Header internally and provides higher level interface. Updates checksum on write, handles long file names, allows setting mtime and file permission mode. Provides handy interface for passing Dir.WalkerEntry.

Tested that zig std and zig test -femit-docs are creating sources.tar as before this change.

ianic avatar Apr 10 '24 20:04 ianic

Can you share the performance difference before/after?

andrewrk avatar Apr 11 '24 21:04 andrewrk

Generating sources.tar 10 times in the loop:

before:

  Time (mean ± σ):     103.1 ms ±   2.2 ms    [User: 4.9 ms, System: 88.3 ms]
  Range (min … max):    99.4 ms … 109.5 ms    27 runs

after:

  Time (mean ± σ):     234.7 ms ±   4.0 ms    [User: 19.8 ms, System: 204.8 ms]
  Range (min … max):   228.9 ms … 242.3 ms    12 runs

The difference is because I used smaller buffer when writing file content 512 bytes instead of 4000 as in any().writeFile. When I change that buffer to 4000 I got same result:

  Time (mean ± σ):     103.3 ms ±   2.9 ms    [User: 4.7 ms, System: 88.9 ms]
  Range (min … max):    99.7 ms … 114.5 ms    27 runs

Let me do something about that.

ianic avatar Apr 11 '24 22:04 ianic

Comparing previous and this in creating sources.tar

$ poop ./prev ./this
Benchmark 1 (48 runs): ./prev
  measurement          mean ± σ            min … max           outliers         delta
  wall_time           104ms ± 2.32ms    99.2ms …  110ms          2 ( 4%)        0%
  peak_rss              0   ±    0         0   …    0            0 ( 0%)        0%
  cpu_cycles         19.8M  ±  371K     19.2M  … 20.3M           0 ( 0%)        0%
  instructions       25.5M  ±  897      25.5M  … 25.5M           3 ( 6%)        0%
  cache_references   26.2K  ± 1.18K     23.2K  … 28.2K           0 ( 0%)        0%
  cache_misses       7.23K  ± 1.27K     5.33K  … 11.6K           1 ( 2%)        0%
  branch_misses      69.4K  ± 1.10K     67.5K  … 72.4K           0 ( 0%)        0%
Benchmark 2 (48 runs): ./this
  measurement          mean ± σ            min … max           outliers         delta
  wall_time           104ms ± 2.66ms    99.1ms …  111ms          3 ( 6%)          +  0.6% ±  1.0%
  peak_rss              0   ±    0         0   …    0            0 ( 0%)          -  nan% ± -nan%
  cpu_cycles         20.7M  ±  583K     18.5M  … 21.9M           4 ( 8%)        💩+  5.0% ±  1.0%
  instructions       29.8M  ±  622K     26.9M  … 29.9M           6 (13%)        💩+ 16.8% ±  0.7%
  cache_references   24.8K  ± 1.52K     20.4K  … 28.0K           1 ( 2%)        ⚡-  5.2% ±  2.1%
  cache_misses       7.39K  ± 1.20K     4.95K  … 9.00K           0 ( 0%)          +  2.3% ±  6.9%
  branch_misses      57.8K  ± 1.21K     52.5K  … 60.0K           2 ( 4%)        ⚡- 16.8% ±  0.7%

Code used for benchmark:

prev

pub fn main() !void {
    var gpa_instance = std.heap.GeneralPurposeAllocator(.{}){};
    defer std.debug.assert(gpa_instance.deinit() == .ok);
    const gpa = gpa_instance.allocator();

    var lib_dir = try std.fs.cwd().openDir("/home/ianic/Code/zig/lib", .{});
    var out_dir = try std.fs.cwd().openDir("/home/ianic/Code/tmp", .{});

    // previous version
    for (0..10) |_| {
        var out_file = try out_dir.createFile("sources.tar", .{});
        defer out_file.close();
        var w = out_file.writer();

        var std_dir = try lib_dir.openDir("std", .{ .iterate = true });
        defer std_dir.close();

        var walker = try std_dir.walk(gpa);
        defer walker.deinit();

        while (try walker.next()) |entry| {
            switch (entry.kind) {
                .file => {
                    if (!std.mem.endsWith(u8, entry.basename, ".zig"))
                        continue;
                    if (std.mem.endsWith(u8, entry.basename, "test.zig"))
                        continue;
                },
                else => continue,
            }

            var file = try std_dir.openFile(entry.path, .{});
            defer file.close();

            const stat = try file.stat();
            const padding = p: {
                const remainder = stat.size % 512;
                break :p if (remainder > 0) 512 - remainder else 0;
            };

            var file_header = std.tar.output.Header.init();
            file_header.typeflag = .regular;
            try file_header.setPath("std", entry.path);
            try file_header.setSize(stat.size);
            try file_header.updateChecksum();
            try w.writeAll(std.mem.asBytes(&file_header));
            try w.any().writeFile(file);
            try w.writeByteNTimes(0, padding);
        }

        {
            // Since this command is JIT compiled, the builtin module available in
            // this source file corresponds to the user's host system.
            const builtin_zig = @embedFile("builtin");

            var file_header = std.tar.output.Header.init();
            file_header.typeflag = .regular;
            try file_header.setPath("builtin", "builtin.zig");
            try file_header.setSize(builtin_zig.len);
            try file_header.updateChecksum();
            try w.writeAll(std.mem.asBytes(&file_header));
            try w.writeAll(builtin_zig);
            const padding = p: {
                const remainder = builtin_zig.len % 512;
                break :p if (remainder > 0) 512 - remainder else 0;
            };
            try w.writeByteNTimes(0, padding);
        }
    }
}

this

pub fn main() !void {
    var gpa_instance = std.heap.GeneralPurposeAllocator(.{}){};
    defer std.debug.assert(gpa_instance.deinit() == .ok);
    const gpa = gpa_instance.allocator();

    var lib_dir = try std.fs.cwd().openDir("/home/ianic/Code/zig/lib", .{});
    var out_dir = try std.fs.cwd().openDir("/home/ianic/Code/tmp", .{});

    for (0..10) |_| {
        var out_file = try out_dir.createFile("sources_new.tar", .{});
        defer out_file.close();

        var w = std.tar.writer(out_file.writer().any());
        try w.setRoot("std");

        var std_dir = try lib_dir.openDir("std", .{ .iterate = true });
        defer std_dir.close();

        var walker = try std_dir.walk(gpa);
        defer walker.deinit();

        while (try walker.next()) |entry| {
            switch (entry.kind) {
                .file => {
                    if (!std.mem.endsWith(u8, entry.basename, ".zig"))
                        continue;
                    if (std.mem.endsWith(u8, entry.basename, "test.zig"))
                        continue;
                },
                else => continue,
            }
            var file = try entry.dir.openFile(entry.basename, .{});
            defer file.close();
            try w.writeFile(entry.path, file);
        }

        {
            // Since this command is JIT compiled, the builtin module available in
            // this source file corresponds to the user's host system.
            const builtin_zig = @embedFile("builtin");
            w.prefix = "builtin";
            try w.writeFileBytes("builtin.zig", builtin_zig, .{});
        }
    }
}

There were two use cases which pushed me into creating tar.writer:

  • support for building tools like #14312
  • use in tests where we can build tarball on the fly instead of embedding tarballs and adding them to the source

I started modifying output.Header to support tarballs without prefix. Then found a file while creating tarball of the zig source, which has too long name. Realized that we need to support pax header or gnu long names. Wanted to have api where user don't need to think about adding checksum to the header, adding padding at file content. So instead of modifying output.Header moved all that to tar.writer.

ianic avatar Apr 12 '24 21:04 ianic

The computer is doing a lot more work than before. I have a strong suspicion that a different API could result in equivalent performance, while still providing the desired abstraction.

andrewrk avatar Apr 12 '24 21:04 andrewrk

This version is also setting mode and mtime for tar files. Not needed in this case. I removed that and did some small optimization.

Benchmark 1 (49` runs): ./previous
  measurement          mean ± σ            min … max           outliers         delta
  wall_time           102ms ± 2.00ms     100ms …  109ms          6 (12%)        0%
  peak_rss              0   ±    0         0   …    0            0 ( 0%)        0%
  cpu_cycles         19.6M  ±  378K     19.0M  … 20.5M           0 ( 0%)        0%
  instructions       25.5M  ± 38.6      25.5M  … 25.5M           0 ( 0%)        0%
  cache_references   28.7K  ±  974      26.8K  … 30.7K           0 ( 0%)        0%
  cache_misses       9.85K  ±  719      8.81K  … 12.4K           4 ( 8%)        0%
  branch_misses      69.2K  ± 1.16K     66.1K  … 72.2K           2 ( 4%)        0%
Benchmark 2 (50 runs): ./this
  measurement          mean ± σ            min … max           outliers         delta
  wall_time           101ms ± 2.73ms    98.4ms …  114ms          4 ( 8%)          -  1.1% ±  0.9%
  peak_rss              0   ±    0         0   …    0            0 ( 0%)          -  nan% ± -nan%
  cpu_cycles         19.4M  ±  666K     16.5M  … 20.3M           3 ( 6%)          -  1.0% ±  1.1%
  instructions       22.0M  ±  549K     18.7M  … 22.1M           3 ( 6%)        ⚡- 13.6% ±  0.6%
  cache_references   27.6K  ± 1.07K     25.5K  … 30.9K           0 ( 0%)        ⚡-  3.7% ±  1.4%
  cache_misses       9.94K  ±  499      8.89K  … 11.4K           5 (10%)          +  1.0% ±  2.5%
  branch_misses      94.3K  ± 5.34K     61.1K  … 98.1K           3 ( 6%)        💩+ 36.2% ±  2.2%

ianic avatar Apr 12 '24 23:04 ianic

I did few optimizations. sources.tar generation benchmark (previous/this):

~/.local/bin/poop ./prev ./this2
Benchmark 1 (50 runs): ./prev
  measurement          mean ± σ            min … max           outliers         delta
  wall_time           101ms ± 2.60ms    96.5ms …  108ms          1 ( 2%)        0%
  peak_rss              0   ±    0         0   …    0            0 ( 0%)        0%
  cpu_cycles         19.6M  ±  521K     18.6M  … 21.1M           0 ( 0%)        0%
  instructions       25.4M  ±  228K     23.9M  … 25.5M           4 ( 8%)        0%
  cache_references   28.0K  ± 2.37K     24.2K  … 36.4K           1 ( 2%)        0%
  cache_misses       9.46K  ± 2.23K     6.39K  … 14.7K           0 ( 0%)        0%
  branch_misses      69.2K  ± 1.37K     66.2K  … 72.3K           0 ( 0%)        0%
Benchmark 2 (52 runs): ./this2
  measurement          mean ± σ            min … max           outliers         delta
  wall_time          97.8ms ± 2.23ms    93.3ms …  104ms          1 ( 2%)        ⚡-  3.0% ±  0.9%
  peak_rss              0   ±    0         0   …    0            0 ( 0%)          -  nan% ± -nan%
  cpu_cycles         16.3M  ±  469K     15.7M  … 17.4M           0 ( 0%)        ⚡- 16.9% ±  1.0%
  instructions       17.9M  ± 41.0      17.9M  … 17.9M           1 ( 2%)        ⚡- 29.5% ±  0.2%
  cache_references   27.3K  ± 1.85K     23.9K  … 30.2K           0 ( 0%)          -  2.4% ±  3.0%
  cache_misses       8.89K  ± 1.92K     6.40K  … 11.5K           0 ( 0%)          -  6.1% ±  8.6%
  branch_misses      45.4K  ±  807      43.9K  … 47.4K           0 ( 0%)        ⚡- 34.4% ±  0.6%

ianic avatar Apr 13 '24 19:04 ianic

Thanks for the rebase :)

andrewrk avatar Aug 16 '24 05:08 andrewrk