nix icon indicating copy to clipboard operation
nix copied to clipboard

Add a way to control inclusion of empty directories in `builtins.path` results

Open infinisil opened this issue 10 months ago • 3 comments

Is your feature request related to a problem? Please describe. It's currently not possible to efficiently import a path to the store while not including empty directories or directories that would be filtered out. The only way to make it work at all is using recursive readDir calls as done in e.g. https://github.com/NixOS/nixpkgs/pull/188301 and my recent file set combinator work in https://github.com/NixOS/nixpkgs/pull/222981 and https://github.com/NixOS/nixpkgs/pull/245623. Supporting this would also address https://github.com/NixOS/nix/issues/4585.

This issue is sponsored by Antithesis :sparkles:

Describe the solution you'd like A way to control whether empty directories end up in the result of builtins.path:

builtins.path {
  # Whether to include a directory that's empty or fully filtered out, defaulting to true for backwards compat
  includeEmptyDir = path: <bool>;
}

Priorities

Add :+1: to issues you find important.

infinisil avatar Aug 13 '23 02:08 infinisil

I guess this could be phrased as

-Whether to include a directory that's empty or fully filtered out, defaulting to true for backwards compat
+Whether to include a directory, even if it would be returned empty, similar to `git add`'s behavior. Default: true.

not possible to efficiently import a path

I would expect the loss of performance to be insignificant compared to the overhead of actually doing the store I/O. I guess we don't have a readdir cache, which could achieve a similar performance improvement without complicating the builtins.

The only way to make it work at all is using recursive readDir calls

I think filesets work just fine with it. Are you still encountering a problem because we lack this as a builtin feature? In my view these builtins are primitive operations that are best not to be used directly except in libraries and the Nixpkgs lib specifically. Keeping their interface and implementation simple is good for reproducibility and for alternative Nix implementations.

roberth avatar Aug 14 '23 14:08 roberth

I would expect the loss of performance to be insignificant compared to the overhead of actually doing the store I/O. I guess we don't have a readdir cache, which could achieve a similar performance improvement without complicating the builtins.

You're right that the time isn't greatly affected because it's I/O dominated, but memory is the main problem. It's not a blocker for file sets, but it's just inefficient when it could be much better with primop support.

To measure the potential impact I wrote an optimized Nix code implementation of such an extra proposed includeEmptyDir argument (kind of copied from https://github.com/NixOS/nixpkgs/pull/245623), and benchmarked the total memory usage by applying it to a recent master Nixpkgs with various filters, comparing it to the builtins.path primop. The number of paths included in the result is in parenthesis:

implementation\filter everything no default.nix random only directories
builtins.path 10MB (61055) 10MB (37639) 11MB (28905) 10MB (24411)
non-primop with empty 56MB (61055) 145MB (37639) 92MB (28905) 135MB (24411)
non-primop without empty 56MB (61055) 109MB (17858) 87MB (26046) 35MB (1)

Where:

  • everything: Includes all paths
  • no default.nix: Recurses into all directories, but only accepts non-default.nix files
  • random: Gives each path a 75% chance of being included
  • only directories: Only recurses into directories, never including any files

To reproduce, copy the following two files locally and run ./script.sh

script.sh
#!/usr/bin/env bash
set -e

for impl in {native,alt-with,alt-without}; do
    for filter in {everything,defaultNix,random,noFiles}; do
        storePath=$(NIX_SHOW_STATS=1 NIX_SHOW_STATS_PATH=stats.json \
            nix-instantiate --eval --read-write-mode test.nix \
            --argstr impl "$impl" \
            --argstr filter "$filter" | jq -r .)

        count=$(find "$storePath" | wc -l)
        bytes=$(jq .gc.totalBytes stats.json)

        echo -e "$impl with $filter:\t$((bytes / 1000000)) MB gc bytes with $count paths in $storePath"
    done
done
test.nix
let
  inherit (builtins)
    readDir
    all
    mapAttrs
    split
    substring
    isBool
    elemAt
    attrValues
    length
    stringLength
    ;

  alternateBuiltinsPath = args:
    let
      path = args.path;
      filterFun = args.filter or (_: _: true);
      includeEmptyDirFun = args.includeEmptyDir or (_: true);

      recurse = pathString:
        let
          entries = mapAttrs (name: type:
            if filterFun (pathString + "/${name}") type then
              if type == "directory" then
                recurse (pathString + "/${name}")
              else
                true
            else
              false
          ) (readDir pathString);
          values = attrValues entries;
        in
        if all (x: x == false) values && ! includeEmptyDirFun pathString then
          false
        else if all (x: x == true) values then
          true
        else
          entries;

      tree = recurse (toString path);

      pathLength =
        if dirOf path == path then
          1
        else
          stringLength (toString path) + 1;

      newFilter = pathString: type:
        let
          components = split "/" (substring pathLength (-1) pathString);

          recurse = index: localTree:
            if isBool localTree then
              localTree
            else
              index >= length components
              || recurse (index + 2) localTree.${elemAt components index};

        in recurse 0 tree;

    in
    builtins.path {
      name = args.name or "source";
      path = path;
      filter = newFilter;
    };
in
{
  path ? fetchTarball "https://github.com/NixOS/nixpkgs/tarball/b91a4d8db46ea70cf37a4acf3c3818a2b791ddfe",
  filter,
  impl,
}:
let
  pickedFilter = {
    everything = pathString: type:
      true;

    defaultNix = pathString: type:
      type == "directory" || baseNameOf pathString != "default.nix";

    noFiles = pathString: type:
      type == "directory";

    random = pathString: type:
      # 75% chance
      isNull (builtins.match
        "[0-3].*"
        (builtins.hashString "sha256" pathString)
      );
  }.${filter};
in {
  native = builtins.path {
    name = "source";
    inherit path;
    filter = pickedFilter;
  };
  alt-with = alternateBuiltinsPath {
    name = "source";
    inherit path;
    filter = pickedFilter;
    includeEmptyDir = pathString: true;
  };
  alt-without = alternateBuiltinsPath {
    name = "source";
    inherit path;
    filter = pickedFilter;
    includeEmptyDir = pathString: false;
  };
}.${impl}

infinisil avatar Aug 15 '23 00:08 infinisil

This issue has been mentioned on NixOS Discourse. There might be relevant details there:

https://discourse.nixos.org/t/filtering-source-trees-with-nix-and-nixpkgs/19148/4

nixos-discourse avatar May 01 '24 20:05 nixos-discourse