candle icon indicating copy to clipboard operation
candle copied to clipboard

candle-kernels fails to build in Nix due to read-only sources

Open gmacon opened this issue 1 year ago • 2 comments

In Nix, builds are isolated and have limited network access. The built artifacts are written into a read-only location known as the "store". It's common to build Rust applications using Crane which invokes cargo build with a cargo cache directory filled with symlinks into the store. This means that when the build script is run, the sources are read-only. The current build.rs in candle-kernels specifies that src/lib.rs, which is read-only in my application, be overwritten by bindgen_cuda. This behavior is discouraged by the Cargo book. They instead recommend a pattern where the generated code is written to the build script output directory ($OUT_DIR) and then include!d into the library. I think you should implement this pattern.

I also opened https://github.com/Narsil/bindgen_cuda/issues/10 requesting that bindgen_cuda change their documentation to avoid having future consumers fall into this same anti-pattern.

gmacon avatar Aug 28 '24 20:08 gmacon

Can you share your (crane) nix expression?

ahirner avatar Sep 14 '24 22:09 ahirner

This isn't necessarily the smallest possible reproducer, but it's a reasonably small diff of non-generated files from the Crane quick-start-simple template: https://github.com/gmacon/candle-build-failure-reproducer.

To reproduce: nix build. The failing derivation is /nix/store/hccbv08gly4xpc00a0fbkwx98yqm8ygs-quick-start-simple-deps-0.1.0.drv.

gmacon avatar Sep 16 '24 00:09 gmacon

I had to do the following to make candle-kernel work in my nix flake project

honestly @ahirner you can just specify out_dir in the build.rs where you build ptx.rs and use the patch code below to fix it, this as @gmacon said is in accordance with cargo book

`nix cargoVendorDir = craneLib.vendorCargoDeps { inherit src;

      # PART 1: Patch the `bindgen_cuda` Git dependency.
      overrideVendorGitCheckout = ps: drv:
        if builtins.any (p: p.name == "bindgen_cuda") ps then
          drv.overrideAttrs (_old: {
            postPatch = ''
              # Force bindgen_cuda to write to `$OUT_DIR/ptx.rs`.
              substituteInPlace src/lib.rs --replace-fail \
                'let mut file = std::fs::File::create(out).expect("Create lib in {out}");' \
                'let mut file = std::fs::File::create(std::path::Path::new(&std::env::var("OUT_DIR").unwrap()).join("ptx.rs")).expect("Create lib in {out}");'
            '';
          })
        else
          drv;

      # PART 2: Patch the `candle-kernels` crates.io dependency.
      overrideVendorCargoPackage = p: drv:
        if p.name == "candle-kernels" then
          drv.overrideAttrs (_old: {
            postPatch = ''
              # INJECT the generated code.
              # The original code has `mod ptx;`, which expects a physical file `src/ptx.rs`.
              # We replace that line to create the module and then `include!` the generated
              # file from its correct location in $OUT_DIR.
              substituteInPlace src/lib.rs --replace-fail \
                'mod ptx;' \
                'pub mod ptx { include!(concat!(env!("OUT_DIR"), "/ptx.rs")); }'
            '';
          })
        else
          drv;
    };

`

then

commonArgs = { inherit src; strictDeps = true; inherit cargoVendorDir; ....... ..... }

using the crande.dev patching

joeldsouzax avatar Aug 19 '25 11:08 joeldsouzax

I issued a pull request for this

https://github.com/huggingface/candle/pull/3059

joeldsouzax avatar Aug 19 '25 11:08 joeldsouzax

til the request is merged, I use this workaround to make it work, removed the bindgen_cuda patch and only patching the candle crates

cargoVendorDir = craneLib.vendorCargoDeps { inherit src; overrideVendorCargoPackage = p: drv: if p.name == "candle-kernels" then drv.overrideAttrs (_old: { postPatch = '' # ========================================================== # COMMAND 1: Overwrite the broken build.rs file. # This command starts withcatand ends withEOF`. # ========================================================== cat > build.rs << 'EOF' use std::env; use std::path::PathBuf;

              fn main() {
                  println!("cargo:rerun-if-changed=build.rs");
                  println!("cargo:rerun-if-changed=src/compatibility.cuh");
                  println!("cargo:rerun-if-changed=src/cuda_utils.cuh");
                  println!("cargo:rerun-if-changed=src/binary_op_macros.cuh");

                  let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
                  let ptx_path = out_dir.join("ptx.rs");
                  let builder = bindgen_cuda::Builder::default();
                  let bindings = builder.build_ptx().unwrap();
                  bindings.write(ptx_path).unwrap();
              }
              EOF

                              # ==========================================================
                              # COMMAND 2: Patch the lib.rs file.
                              # This is a separate command that runs after the one above.
                              # ==========================================================
                              substituteInPlace src/lib.rs --replace-fail \
                                'mod ptx;' \
                                'mod ptx { include!(concat!(env!("OUT_DIR"), "/ptx.rs")); }'
            '';

          })

        else if p.name == "candle-examples" then
          drv.overrideAttrs (_old: {
            postPatch = ''
                              # Step 2.1: Overwrite the broken build.rs with a corrected version.
                              cat > build.rs << 'EOF'
              use anyhow::Result;
              use std::env;
              use std::path::PathBuf;

              struct KernelDirectories {
                  kernel_glob: &'static str,
                  rust_target: PathBuf,
                  include_dirs: &'static [&'static str],
              }

              fn main() -> Result<()> {
                  println!("cargo:rerun-if-changed=build.rs");

                  let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
                  let kernel_dirs = [KernelDirectories {
                      kernel_glob: "examples/custom-ops/kernels/*.cu",
                      rust_target: out_dir.join("cuda_kernels.rs"),
                      include_dirs: &[],
                  }];

                  for kdir in kernel_dirs.iter() {
                      let builder = bindgen_cuda::Builder::default().kernel_paths_glob(kdir.kernel_glob);
                      let bindings = builder.build_ptx().unwrap();
                      bindings.write(&kdir.rust_target).unwrap()
                  }
                  Ok(())
              }
              EOF

                              # Step 2.2: Patch the library code to include the generated file.
                              substituteInPlace examples/custom-ops/main.rs --replace-fail \
                                "mod cuda_kernels;" \
                                "include!(concat!(env!(\"OUT_DIR\"), \"/cuda_kernels.rs\"));"
            '';
          })
        else
          drv;
    };

`

joeldsouzax avatar Aug 19 '25 14:08 joeldsouzax

Fixed in #3059

ivarflakstad avatar Nov 23 '25 21:11 ivarflakstad