[Next] Non-determinism in fsharp_binary or runfiles?
When I build a tar from an fsharp_binary, I find that the hash keeps changing:
$ bazel build //...
$ sha256sum ./bazel-bin/add-lambda/bundle-tar.tar
ac9631140af3662b5af4c6cfaf9c55c1e165c3e20c60334e1fc71d3e3831f36f ./bazel-bin/add-lambda/bundle-tar.tar
$ bazel clean
$ bazel build //...
$ sha256sum ./bazel-bin/add-lambda/bundle-tar.tar
50a69d0cd43298cc227ab0365f6d5e4a4c125b912b5475cc79a6f46ce96f633c ./bazel-bin/add-lambda/bundle-tar.tar
$ bazel clean
$ bazel build //...
$ sha256sum ./bazel-bin/add-lambda/bundle-tar.tar
fde70e9cb03a4b5c56890d82cc1a0111fc24e3f186fe2a1134b877c6b5db69ad ./bazel-bin/add-lambda/bundle-tar.tar
BUILD.bazel
load("@rules_pkg//:pkg.bzl", "pkg_tar", "pkg_zip")
load("@rules_dotnet//dotnet:defs.bzl", "fsharp_binary")
fsharp_binary(
name = "add-lambda",
srcs = [
"Lambda.fs",
],
target_frameworks = [ "net6.0" ],
deps = [
"@paket.main//microsoft.aspnetcore.app.ref",
"@paket.main//fsharp.core",
"@paket.main//amazon.lambda.core",
"@paket.main//amazon.lambda.runtimesupport",
"//math-utils",
],
visibility = [
"//visibility:public",
],
)
pkg_tar(
name = "bundle-tar",
include_runfiles = True,
srcs = [
":add-lambda",
],
)
WORKSPACE
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "rules_pkg",
url = "https://github.com/bazelbuild/rules_pkg/releases/download/0.7.0/rules_pkg-0.7.0.tar.gz",
sha256 = "8a298e832762eda1830597d64fe7db58178aa84cd5926d76d5b744d6558941c2",
)
load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies")
rules_pkg_dependencies()
http_archive(
name = "bazel_skylib",
sha256 = "c6966ec828da198c5d9adbaa94c05e3a1c7f21bd012a0b29ba8ddbccb2c93b0d",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.1.1/bazel-skylib-1.1.1.tar.gz",
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.1.1/bazel-skylib-1.1.1.tar.gz",
],
)
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
git_repository(
name = "rules_dotnet",
remote = "https://github.com/bazelbuild/rules_dotnet",
# branch = "next",
commit = "5fc875093b558a199fbe739f0717dfe09ff626e5",
shallow_since = "1655891408 +0000",
)
load(
"@rules_dotnet//dotnet:repositories.bzl",
"dotnet_register_toolchains",
"rules_dotnet_dependencies",
)
rules_dotnet_dependencies()
dotnet_register_toolchains("dotnet", "6.0.300")
load("//deps:paket.bzl", "paket")
paket()
$ bazel version
WARNING: Ignoring JAVA_HOME, because it must point to a JDK, not a JRE.
Build label: 5.1.1
Build target: bazel-out/k8-opt/bin/src/main/java/com/google/devtools/build/lib/bazel/BazelServer_deploy.jar
Build time: Fri Apr 8 15:49:48 2022 (1649432988)
Build timestamp: 1649432988
Build timestamp as int: 1649432988
Do you have an exact reproduction of this in some public repo? I can't reproduce this by just adding an fsharp_binary to a pkg_tar. Also, what OS are you using?
Also, could you try updating to Bazel 5.2? There were some cache invalidation bugs fixed in transitions that might be affecting this.
Unfortunately bumping to 5.2.0 did not fix things for me.
I am on Ubuntu:
$ lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 20.04.3 LTS
Release: 20.04
Codename: focal
This has a stable hash, so I think it is rules_dotnet:
genrule(
name = "hello",
outs = [ "hello.txt" ],
cmd = "echo 'Hello, world. ' > $@",
)
pkg_tar(
name = "bundle",
srcs = [
":hello",
],
include_runfiles = True,
)
Could you diff the contents of the tar files and see what files are different? Or if you have an repository with the code I can take a look at that as well. I can't reproduce this with the examples we have in the rules_dotnet repo.
Could you diff the contents of the tar files and see what files are different? Or if you have an repository with the code I can take a look at that as well. I can't reproduce this with the examples we have in the rules_dotnet repo.
Repro here: https://github.com/njlr/rules-dotnet-issue-296
The repo has a GitHub Action that reproduces the issue. There is a diff report attached to the build output: https://github.com/njlr/rules-dotnet-issue-296/actions/runs/2678212854
Hope it helps!


Thanks, I'll take a look at this!
@njlr Can you try out the latest next branch and see if this is still the case?
I updated the repo to the latest next but unfortunately the issue persists.
You can see the diff report here: https://github.com/njlr/rules-dotnet-issue-296/actions/runs/2965784764
Ok, can you try one more thing? Can you make sure that you have the --incompatible_strict_action_envflag set? If that does not fix anything I'll take a look as soon as I can.
Like this?
$ bazel clean
$ bazel build --incompatible_strict_action_env //...
$ sha256sum ./bazel-bin/bundle.tar
22b8484107937a16e5219f685159e3c54aa192268aea778efa60cda717d8648f ./bazel-bin/bundle.tar
$ bazel clean
$ bazel build --incompatible_strict_action_env //...
$ sha256sum ./bazel-bin/bundle.tar
4cb0f09b7e894a97a19acca6e01751883ecdeca64130bc950676f55e336b6c50 ./bazel-bin/bundle.tar
$ bazel clean
$ bazel build --incompatible_strict_action_env //...
$ sha256sum ./bazel-bin/bundle.tar
c77fd77fad1cba0ca182bb519a22eab208fcccfa730a6e790f50a1af26067584 ./bazel-bin/bundle.tar
I figured out what the issue is. There are absolute paths embedded into the DLLs. We need to use the --pathmap compiler option to make the paths relative and thus deterministic.
It will probably require a wrapper around the compilers because we would not want to add the execroot as an input into the action because that would make the caches not shareable. If we create an wrapper that looks up the execroot path and constructs the --pathmap options we can avoid having to add the execroot path as an input to the action.
I figured out what the issue is. There are absolute paths embedded into the DLLs. We need to use the --pathmap compiler option to make the paths relative and thus deterministic.
It will probably require a wrapper around the compilers because we would not want to add the execroot as an input into the action because that would make the caches not shareable. If we create an wrapper that looks up the execroot path and constructs the --pathmap options we can avoid having to add the execroot path as an input to the action.
When I run the build multiple times and check the hash, the paths have not been changed though?
The exec root changes when you do bazel clean. The reason for me not being able to reproduce this earlier is that in my bazelrc file I had
startup --output_user_root=/workspace/bazel_user_root
startup --output_base=/workspace/bazel_output_base
Which makes it so that the execroot does not change between bazel clean
We have the pathmap functionality implemented in our internal setup mainly for remote caching. The multiplex compiler server does it. I think we can look into migrating that.
Fixed this in the latest release: v0.8.4
I did not yet migrate the compiler server yet so I created bash/bat scripts to wrap around the compilers
The strangest thing - in my repro on GitHub Actions the issue has been resolved. However, on my own machine (also Ubuntu) the build is not reproducible.
Any ideas?
@njlr Did you update to v0.8.4? I used your repro to test this and it was failing before and fixed once I tested the latest release. Could you try doing bazel clean --expunge once after you upgraded the rules_dotnet repository?
@njlr Did you update to v0.8.4? I used your repro to test this and it was failing before and fixed once I tested the latest release. Could you try doing
bazel clean --expungeonce after you upgraded the rules_dotnet repository?
I did an expunge, yes.
WORKSPACE: https://github.com/njlr/rules-dotnet-issue-296/blob/master/WORKSPACE
diff <(xxd ./bundle-1/app.dll) <(xxd ./bundle-2/app.dll)
138c138
< 00000890: 626f 782f 3432 2f65 7865 6372 6f6f 742f box/42/execroot/
---
> 00000890: 626f 782f 3436 2f65 7865 6372 6f6f 742f box/46/execroot/
150c150
< 00000950: 6c69 6e75 782d 7361 6e64 626f 782f 3432 linux-sandbox/42
---
> 00000950: 6c69 6e75 782d 7361 6e64 626f 782f 3436 linux-sandbox/46
228c228
< 00000e30: 0020 4000 0000 0000 9436 f0aa 0001 4d50 . @......6....MP
---
> 00000e30: 0020 4000 0000 0000 2f02 c8ae 0001 4d50 . @...../.....MP
230c230
< 00000e50: 0000 0000 9436 f0aa 0100 0000 1300 0000 .....6..........
---
> 00000e50: 0000 0000 2f02 c8ae 0100 0000 1300 0000 ..../...........
233,234c233,234
< 00000e80: 542e 0000 5410 0000 5253 4453 f2b6 2c77 T...T...RSDS..,w
< 00000e90: 2779 1447 bbe4 6cdb 3912 94f8 0100 0000 'y.G..l.9.......
---
> 00000e80: 542e 0000 5410 0000 5253 4453 becc c304 T...T...RSDS....
> 00000e90: a6da 4445 8405 4ba3 de0c 995c 0100 0000 ..DE..K....\....
240c240
< 00000ef0: 7578 2d73 616e 6462 6f78 2f34 322f 6578 ux-sandbox/42/ex
---
> 00000ef0: 7578 2d73 616e 6462 6f78 2f34 362f 6578 ux-sandbox/46/ex
260,262c260,262
< 00001030: 3536 00f2 b62c 7727 7914 47fb e46c db39 56...,w'y.G..l.9
< 00001040: 1294 f894 36f0 aa37 9071 4c2f 8657 e8af ....6..7.qL/.W..
< 00001050: 00d4 d100 0000 0000 0000 0000 0000 0000 ................
---
> 00001030: 3536 00be ccc3 04a6 da44 9504 054b a3de 56.......D...K..
> 00001040: 0c99 5c2f 02c8 ae69 4875 caeb 25d6 d8f7 ..\/...iHu..%...
> 00001050: b881 1000 0000 0000 0000 0000 0000 0000 ................
Something to do with the execroot?
Did you copy the snippet form the release as it is here: https://github.com/bazelbuild/rules_dotnet/releases/tag/v0.8.4 ? Because the dlls you sent still have full paths in them but I can not reproduce that with the lates release
A few tweaks, but I have the correct tag and hash:
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "rules_pkg",
url = "https://github.com/bazelbuild/rules_pkg/releases/download/0.7.0/rules_pkg-0.7.0.tar.gz",
sha256 = "8a298e832762eda1830597d64fe7db58178aa84cd5926d76d5b744d6558941c2",
)
load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies")
rules_pkg_dependencies()
http_archive(
name = "bazel_skylib",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
],
sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728",
)
load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
bazel_skylib_workspace()
http_archive(
name = "rules_dotnet",
sha256 = "c2d20df062fa34fdc7103d54ae44a2e3d715e2728c1fc21f7df450061ffb7726",
strip_prefix = "rules_dotnet-0.8.4",
url = "https://github.com/bazelbuild/rules_dotnet/archive/refs/tags/v0.8.4.tar.gz",
)
load(
"@rules_dotnet//dotnet:repositories.bzl",
"dotnet_register_toolchains",
"rules_dotnet_dependencies",
)
rules_dotnet_dependencies()
dotnet_register_toolchains("dotnet", "6.0.300")
load("@rules_dotnet//dotnet:rules_dotnet_nuget_packages.bzl", "rules_dotnet_nuget_packages")
rules_dotnet_nuget_packages()
load("@rules_dotnet//dotnet:paket2bazel_dependencies.bzl", "paket2bazel_dependencies")
paket2bazel_dependencies()
load("//deps:paket.bzl", "paket")
paket()
What bazel version?
And any bazel flags you have enabled?
Ah! So sorry, I had a .bazerc in my home directory that I forgot about.
cat ~/.bazelrc
common --override_repository=rules_dotnet=/home/njlr/Documents/rules_dotnet
It works after removing that.