go
go copied to clipboard
x/tools/gopls: regtest flakes due to hanging go commands
#!watchflakes
post <- goos != "netbsd" && (pkg ~ `^golang.org/x/tools/gopls` || (repo == "tools" && pkg == "")) &&
(`detected hanging go command` || log ~ `detected hanging go command`)
greplogs --dashboard -md -l -e 'runCmdContext' --since=2022-07-14
2022-08-15T17:42:12-987de34-1f833e4/darwin-amd64-12_0 2022-08-14T00:06:23-35f806b-59865f1/netbsd-amd64-9_0 2022-08-12T20:40:05-bebd890-2f6783c/netbsd-386-9_0 2022-08-12T18:15:28-bebd890-b6f87b0/netbsd-amd64-9_0 2022-08-12T12:39:26-88d981e-f67c766/netbsd-amd64-9_0 2022-08-12T00:04:29-c4ec74a-a5cd894/netbsd-386-9_0 2022-08-11T19:05:54-c4ec74a-62654df/netbsd-amd64-9_0 2022-08-11T17:53:50-37a81b6-a526ec1/netbsd-amd64-9_0 2022-08-11T16:19:14-37a81b6-2340d37/netbsd-amd64-9_0 2022-08-11T12:53:59-b2156b5-3c200d6/netbsd-386-9_0 2022-08-10T22:22:48-b2156b5-6b80b62/netbsd-amd64-9_0 2022-08-10T17:41:25-0ad49fd-f19f6c7/netbsd-amd64-9_0 2022-08-10T15:08:24-3950865-c81dfdd/netbsd-amd64-9_0 2022-08-10T02:14:09-6fa767d-5531838/plan9-386-0intro 2022-08-09T14:33:24-92d58ea-0981d9f/openbsd-386-70 2022-08-09T14:12:01-92d58ea-662a729/netbsd-amd64-9_0 2022-08-09T13:39:27-92d58ea-9e8020b/netbsd-386-9_0 2022-08-09T11:28:56-92d58ea-0f8dffd/netbsd-amd64-9_0 2022-08-08T18:10:56-fff6d6d-4bcc138/netbsd-amd64-9_0 2022-08-08T15:33:45-06d96ee-0581d69/netbsd-amd64-9_0 2022-08-08T15:07:46-06d96ee-cd54ef1/netbsd-amd64-9_0 2022-08-08T14:12:21-06d96ee-e761556/netbsd-amd64-9_0 2022-08-08T06:16:59-06d96ee-0f6ee42/darwin-amd64-11_0 2022-08-08T06:16:59-06d96ee-0f6ee42/netbsd-386-9_0 2022-08-06T15:20:00-06d96ee-0c4db1e/plan9-386-0intro 2022-08-05T19:51:08-06d96ee-4fb7e22/plan9-386-0intro 2022-08-04T20:05:03-81c7dc4-39728f4/netbsd-386-9_0 2022-08-04T20:05:03-81c7dc4-39728f4/netbsd-amd64-9_0 2022-08-04T20:04:16-3519aa2-39728f4/netbsd-386-9_0 2022-08-04T19:57:25-763f65c-39728f4/netbsd-386-9_0 2022-08-04T18:51:46-99fd76f-39728f4/openbsd-386-70 2022-08-04T17:05:18-3e0a503-fb1bfd4/netbsd-amd64-9_0 2022-08-04T15:50:11-3e0a503-fcdd099/netbsd-386-9_0 2022-08-04T15:50:11-3e0a503-44ff9bf/netbsd-amd64-9_0 2022-08-04T15:31:49-87f47bb-44ff9bf/plan9-386-0intro 2022-08-04T14:58:59-87f47bb-4345620/netbsd-386-9_0 2022-08-04T10:32:51-3e0a503-a10afb1/linux-386-buster 2022-08-03T21:02:27-8b9a1fb-f28fa95/plan9-386-0intro 2022-08-03T21:02:27-8b9a1fb-4345620/netbsd-386-9_0 2022-08-03T18:07:40-d08f5dc-fcdd099/netbsd-386-9_0 2022-08-03T13:50:38-ddb90ec-c6a2dad/dragonfly-amd64-622 2022-08-03T13:50:38-ddb90ec-c6a2dad/plan9-386-0intro 2022-08-03T12:09:24-ddb90ec-29b9a32/plan9-386-0intro 2022-08-02T18:52:36-0d04f65-29b9a32/plan9-386-0intro 2022-08-02T18:19:01-d025cce-be59153/netbsd-amd64-9_0 2022-08-02T18:16:22-10cb435-d723df7/netbsd-amd64-9_0 2022-08-02T18:07:14-4d0b383-d723df7/netbsd-386-9_0 2022-08-02T18:07:14-4d0b383-d723df7/netbsd-amd64-9_0 2022-08-02T17:23:42-4d0b383-1b7e71e/darwin-amd64-nocgo 2022-08-02T16:05:48-4d0b383-f2a9f3e/netbsd-amd64-9_0 2022-07-29T20:19:23-9580c84-9240558/windows-arm64-11 2022-07-28T20:06:00-8ea5687-d9242f7/darwin-amd64-nocgo 2022-07-27T15:04:58-39a4e36-4248146/freebsd-386-13_0 2022-07-26T18:43:08-6c8a6c4-d9242f7/aix-ppc64 2022-07-25T20:44:49-2a6393f-24dc27a/darwin-amd64-10_14 2022-07-25T18:11:01-4375b29-795a88d/plan9-386-0intro 2022-07-25T14:16:17-178fdf9-64f2829/plan9-386-0intro 2022-07-22T20:12:19-7b605f4-c5da4fb/plan9-386-0intro 2022-07-21T20:11:06-ec1f924-c4a6d30/plan9-386-0intro 2022-07-15T15:11:26-22d1494-2aa473c/windows-386-2008-newcc 2022-07-15T14:27:36-1a4e02f-4651ebf/windows-arm64-10 2022-07-15T14:20:24-db8f89b-4651ebf/windows-arm64-10 2022-07-14T21:03:14-db8f89b-783ff7d/windows-arm64-11 2022-07-14T21:01:58-db8f89b-aa80228/darwin-arm64-11 2022-07-14T19:05:09-db8f89b-a906d3d/windows-arm64-10 2022-07-14T15:54:36-db8f89b-266c70c/windows-arm64-10 2022-07-14T01:47:39-db8f89b-558785a/windows-arm64-11
We recently started waiting for all go command invocations when shutting down gopls regtests. It appears that sometimes we kill the go command and still don't get a result from cmd.Wait()
. For example, here:
https://build.golang.org/log/00046e0b005c7660d676a3a415561950048f756a
In that failure, the test runner looks otherwise healthy (other tests ran fast), and yet the goroutine stack clearly shows a go command hanging for 9 minutes here: https://cs.opensource.google/go/x/tools/+/master:internal/gocommand/invoke.go;l=260;drc=f38573358cbedf46d64c5759ef41b72afcf0c5c0
@bcmills do you happen to have any idea of what might cause this?
Two observations:
- We're not checking the error from
Kill
. If killing failed, I don't think we can expect Wait to return. - Discussion at https://groups.google.com/g/golang-nuts/c/pUaEjqd5wo8
The only reason the kill system call can fail (at least in this situation) is when the child process has already exited, so failure of kill is unlikely to be the culprit. More likely kill terminated the go process itself, but not the tree of processes rooted at it. If one of them (a test?) retains an open file descriptor to the stdout pipe created by os/exec then the cmd.Run operation will hang indefinitely.
To dig further, we could add logic to run during the failure (on linux) that does ps -e f
and ls -l /proc/*/fd/*
to show the process tree and their open files.
Please do check the error from Kill
— that would at least give a useful data point for diagnostics. (This may be closely related to #53400 #50187.)
greplogs -l -e 'panic: test timed out(?:.*\n)*goroutine \d+ \[.*, \d+ minutes\]:\n.*runCmdContext' --since=2022-08-15 --details
2022-09-13T12:58:04-ec74389-dad2966/netbsd-386-9_0 2022-09-13T12:38:14-ec74389-c7a0b15/netbsd-amd64-9_0 2022-09-13T12:37:11-ec74389-f4a587b/netbsd-amd64-9_0 2022-09-11T21:08:46-e71c338-00ece11/netbsd-amd64-9_0 2022-09-09T18:42:57-0eebaab-b244b89/netbsd-386-9_0 2022-09-09T15:36:26-eeaf4eb-600db8a/netbsd-386-9_0 2022-09-09T15:10:10-4754f75-9a5574a/netbsd-amd64-9_0 2022-09-08T15:55:44-a630751-cd8aa40/netbsd-amd64-9_0 2022-09-08T14:33:45-a630751-e5ed221/netbsd-386-9_0 2022-09-08T13:27:43-a630751-51fdfeb/darwin-arm64-11 2022-09-07T16:37:07-dd1bab2-3e75859/darwin-arm64-12 2022-09-07T14:47:36-c1dd25e-d81ed4d/netbsd-386-9_0 2022-09-07T14:47:36-c1dd25e-d81ed4d/netbsd-amd64-9_0 2022-09-07T14:33:56-c1dd25e-0752f4a/netbsd-386-9_0 2022-09-07T00:49:41-83d7619-8fd2073/netbsd-amd64-9_0 2022-09-06T22:14:56-83d7619-c82304b/netbsd-386-9_0 2022-09-06T17:36:47-83d7619-e0e0c8f/netbsd-amd64-9_0 2022-09-06T16:17:26-83d7619-4a4127b/netbsd-amd64-9_0 2022-09-06T16:04:07-83d7619-60ef4b2/netbsd-386-9_0 2022-09-06T12:35:13-d815cba-8a32354/netbsd-amd64-9_0 2022-09-03T06:26:16-d815cba-9fc09d2/netbsd-386-9_0 2022-09-02T18:51:37-be9eab1-5b1fbfb/netbsd-amd64-9_0 2022-09-02T18:17:25-5ba8541-0cf996a/netbsd-amd64-9_0 2022-09-02T15:27:18-eb8352e-321a220/netbsd-386-9_0 2022-09-02T05:05:13-33c1ddd-8e35910/darwin-arm64-12 2022-09-02T05:05:13-33c1ddd-8e35910/netbsd-amd64-9_0 2022-09-02T00:06:00-40cfaff-0592ce5/netbsd-amd64-9_0 2022-09-01T23:18:06-40cfaff-1280ae7/netbsd-amd64-9_0 2022-09-01T22:37:04-40cfaff-aa5ff29/netbsd-386-9_0 2022-09-01T18:08:05-f16be35-ef84141/netbsd-386-9_0 2022-09-01T15:27:30-6c10975-86e9e0e/netbsd-386-9_0 2022-09-01T03:24:42-49ab44d-91ef076/netbsd-amd64-9_0 2022-09-01T00:42:27-550e1f5-64b260d/netbsd-386-9_0 2022-08-31T23:52:00-550e1f5-ca634fa/netbsd-amd64-9_0 2022-08-31T22:22:43-550e1f5-e4b624e/netbsd-amd64-9_0 2022-08-31T22:10:52-550e1f5-33a7e5a/netbsd-386-9_0 2022-08-31T21:08:24-4ccc73c-ce77a46/netbsd-386-9_0 2022-08-31T21:08:24-4ccc73c-889d326/netbsd-amd64-9_0 2022-08-31T16:26:07-41c3a9b-d2d5929/netbsd-386-9_0 2022-08-31T01:16:54-248c34b-ee0e40a/netbsd-amd64-9_0 2022-08-30T21:30:15-248c34b-bd56cb9/netbsd-amd64-9_0 2022-08-30T00:49:19-248c34b-629ae1c/netbsd-amd64-9_0 2022-08-28T16:04:49-717a671-846c378/netbsd-386-9_0 2022-08-26T19:15:02-717a671-897ad2f/netbsd-amd64-9_0 2022-08-26T18:28:14-717a671-bf812b3/netbsd-386-9_0 2022-08-26T17:48:20-7f23307-296c40d/netbsd-amd64-9_0 2022-08-26T15:36:36-7c5e035-951d2c6/netbsd-386-9_0 2022-08-25T19:17:14-d35bb19-f64f12f/netbsd-386-9_0 2022-08-25T17:31:33-db6a62c-83b5fe6/netbsd-386-9_0 2022-08-25T14:56:18-db6a62c-e4be2ac/netbsd-amd64-9_0 2022-08-25T04:00:07-db6a62c-8c8429f/netbsd-386-9_0 2022-08-25T04:00:07-db6a62c-8c8429f/netbsd-amd64-9_0 2022-08-24T21:20:11-db6a62c-e4bed41/netbsd-amd64-9_0 2022-08-24T17:56:56-587a153-cfae70c/netbsd-386-9_0 2022-08-24T15:37:38-587a153-d5aa088/netbsd-amd64-9_0 2022-08-24T14:31:08-c837a30-f983a93/netbsd-amd64-9_0 2022-08-24T12:12:12-587a153-b5a9459/netbsd-amd64-9_0 2022-08-23T20:32:50-587a153-fc0d423/netbsd-386-9_0 2022-08-19T16:55:03-e55fb40-7dad1d2/netbsd-386-9_0 2022-08-19T16:17:50-e55fb40-5729419/netbsd-386-9_0 2022-08-19T15:53:47-e55fb40-f324355/netbsd-amd64-9_0 2022-08-19T03:09:05-e55fb40-a409356/netbsd-386-9_0 2022-08-15T20:41:00-938e162-de0f4d1/netbsd-amd64-9_0 2022-08-15T20:02:31-938e162-7b45edb/netbsd-amd64-9_0 2022-08-15T19:17:20-8c83056-7b45edb/netbsd-amd64-9_0 2022-08-15T17:42:12-987de34-1f833e4/darwin-amd64-12_0
(66 matching logs)
Interesting, I was debugging this in https://go.dev/cl/424075. On windows, our call to Process.Kill() is failing with "invalid argument": https://storage.googleapis.com/go-build-log/e2f60200/windows-386-2008_71ad7007.log
A bit of googling suggests that this is because we can't kill subprocesses on windows.
@bcmills any advice for how to properly kill the go command on windows?
After reading the source a bit more: this is EINVAL, which appears to mean that the Process.wait() has exited and the handle released, so this is a race, although it is surprising that we hit it so reliably.
Change https://go.dev/cl/424075 mentions this issue: internal/gocommand: add instrumentation for hanging go commands
any advice for how to properly kill the go command on windows?
Can't be done without creating a whole extra process group, unfortunately. (Probably we should add a side-channel — perhaps an open file descriptor or a pidfile? — to request clean shutdown on Windows.)
Change https://go.dev/cl/431075 mentions this issue: internal/gocommand: tweak debugging for hanging go commands
A hit! https://build.golang.org/log/4eea0e5bfe425fcd097a49fcb92fd015fa88f383
Nice. Well, that test process seems very much alive, falsifying my hypothesis.
13032 12910 (compile)
9802 13032 go list -modfile=/tmp/workdir/tmp/go.8f5d4cc245da9790bda529e64ae3e7
Looks like the hanging go command is in the middle of a compile. Wish we had the full subprocesss command line -- I'll look into that.
Not sure how to interpret the fstat output.
That's a dead cmd/compile process: there's no command because argv has been destroyed along with the rest of the address space. Perhaps the go list parent simply hasn't called waitpid yet, so the process table entry has to be retained. I suspect the problem is in go list.
Aha, thanks (excuse my ps noobness).
Note that we instrumented this panic in two places: once before Kill()
and once after. This is before, so I don't think it falsifies your hypothesis, if I'm understanding correctly.
That one is netbsd
, which could plausibly be explained as #50138. (I'd rather we wait for one of these on a platform without a known issue for Wait
before we do too much digging.)
greplogs --dashboard -md -l -e 'HANGING GO' --since=2022-09-15
2022-09-17T02:56:51-4d18923-cc1b20e/netbsd-amd64-9_0 2022-09-16T17:59:31-a61f20e-a84f46a/netbsd-amd64-9_0 2022-09-16T16:33:11-a61f20e-8451529/netbsd-386-9_0 2022-09-16T14:49:13-a61f20e-b35b4a3/netbsd-amd64-9_0
Still only netbsd. Posting the greplogs
query here for future reference.
Ooh, nice! https://go.dev/issue/55323#issuecomment-1254107802 has a darwin/arm64
hang. 😃
As expected, cmd/go
itself has already exited at the point of the hang:
2022/09/20 08:42:37 error killing the Go command: os: process already finished
But there are an awful lot of processes in that dump, and nothing that obviously stands out as a subprocess of cmd/go
that would have access to stdout
or stderr
. 😕
Here's an off-the-wall theory: maybe the file descriptors actually are closed, and something in the parent Go process (i.e. gopls
) isn't waking up from the end-of-stream event..?
#53434 could be related.
To check that hypothesis, two suggestions:
-
gopls
should log exactly which PID was the stuckgo
command, just in case it left a remnant in theps
orlsof
output that we can track down. -
gopls
should invokedebug.SetTraceback("system")
before the terminalpanic
here.
A simpler explanation is that gopls sent SIGINT, waited for 1s, then sent SIGKILL, and the child (go) exited during the 1s grace period, causing kill(2) to fail with ESRCH (-> ErrProcessDone = "process already finished"). Seems like that's to be expected if a busy machine causes a delay in responding to SIGINT. Perhaps the debugging logic should treat ErrProcessDone as success, not a reason to debug?
@adonovan but in this case the panic indicates that we waited up to 10 seconds after that kill. The error from Kill is merely logged.
Per @bcmills comment on the CL, we should expect that cmd.Wait
returns quickly once the process is done.
the panic indicates that we waited up to 10 seconds after that kill. The error from Kill is merely logged.
Good point.
So the sequence of events that we know must have happened from the log is:
- ctx cancelled
- SIGINT
- wait 1s
- SIGKILL => ErrProcessDone
- wait 10s
- dump shows a go process still alive
ErrProcessDone means that either waitpid returned ESRCH or that this was the second call to Process.signal after a first one set the 'done' event at the end of a successful call to Process.wait. In either case, the process must already have terminated.
Is it possible that the dump shows a different go process? Seems improbable, but I feel like we've eliminated the impossible. We should log the pid of cmd.Process, like @bcmills suggested.
Found new dashboard test flakes for:
#!watchflakes
post <- pkg ~ `^golang.org/x/tools/gopls` &&
`DETECTED A HANGING GO COMMAND`
2022-09-20 12:40 darwin-arm64-12 tools@df2eb938 go@1eeb257b x/tools/gopls/internal/regtest/diagnostics (log)
2022/09/20 08:42:37 error killing the Go command: os: process already finished
DETECTED A HANGING GO COMMAND
The gopls test runner has detected a hanging go command. In order to debug
this, the output of ps and lsof/fstat is printed below.
See golang/go#54461 for more details.
ps axo ppid,pid,command:
-------------------------
...
panic: detected hanging go command: see golang/go#54461 for more details
goroutine 17901 [running]:
golang.org/x/tools/internal/gocommand.HandleHangingGoCommand()
/tmp/buildlet/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:333 +0x34c
golang.org/x/tools/internal/gocommand.runCmdContext({0x100f61f60, 0x14004c8d9e0}, 0x14005848000)
/tmp/buildlet/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:294 +0x2f0
golang.org/x/tools/internal/gocommand.(*Invocation).run(0x1400645d148, {0x100f61f60, 0x14004c8d9e0}, {0x100f5a548?, 0x14003f0d380}, {0x100f5a548?, 0x14003f0d3b0})
/tmp/buildlet/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:234 +0xcc4
golang.org/x/tools/internal/gocommand.(*Invocation).runWithFriendlyError(0x1400645d110?, {0x100f61f60, 0x14004c8d9e0}, {0x100f5a548?, 0x14003f0d380?}, {0x100f5a548?, 0x14003f0d3b0?})
...
golang.org/x/tools/gopls/internal/lsp/mod.Diagnostics({0x100f61f98, 0x14003fda3c0}, {0x100f6dd78, 0x14002971540})
/tmp/buildlet/gopath/src/golang.org/x/tools/gopls/internal/lsp/mod/diagnostics.go:24 +0x128
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnose(0x14005298a20, {0x100f61f98, 0x14003fadd40}, {0x100f6dd78, 0x14002971540}, 0x0)
/tmp/buildlet/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:229 +0x24c
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshot(0x14005298a20, {0x100f6dd78, 0x14002971540}, {0x14006365ea0, 0x1, 0x1}, 0xe0?)
/tmp/buildlet/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:156 +0x238
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshots.func1({0x100f6dd78?, 0x14002971540?}, {0x14006365ea0?, 0x14002e2cc60?, 0x140053cd900?})
/tmp/buildlet/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:133 +0x78
created by golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshots
/tmp/buildlet/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:131 +0x6c
2022-09-21 16:48 darwin-amd64-12_0 tools@2f047133 go@d7df8722 x/tools/gopls/internal/regtest/completion (log)
2022/09/21 18:03:09 error killing the Go command: os: process already finished
DETECTED A HANGING GO COMMAND
The gopls test runner has detected a hanging go command. In order to debug
this, the output of ps and lsof/fstat is printed below.
See golang/go#54461 for more details.
ps axo ppid,pid,command:
-------------------------
...
panic: detected hanging go command: see golang/go#54461 for more details
goroutine 7465 [running]:
golang.org/x/tools/internal/gocommand.HandleHangingGoCommand()
/Users/gopher/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:333 +0x391
golang.org/x/tools/internal/gocommand.runCmdContext({0x1cfb720, 0xc0021a4540}, 0xc0004c42c0)
/Users/gopher/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:294 +0x3ca
golang.org/x/tools/internal/gocommand.(*Invocation).run(0xc0004dd7e8, {0x1cfb720, 0xc0021a4540}, {0x1cf3ae0?, 0xc000310db0}, {0x1cf3ae0?, 0xc000310de0})
/Users/gopher/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:234 +0xef9
golang.org/x/tools/internal/gocommand.(*Invocation).runWithFriendlyError(0xc0004dd7b0?, {0x1cfb720, 0xc0021a4540}, {0x1cf3ae0?, 0xc000310db0?}, {0x1cf3ae0?, 0xc000310de0?})
...
golang.org/x/tools/gopls/internal/lsp/cache.(*snapshot).PackagesForFile(0xc002814000?, {0x1cfb758, 0xc00155d6b0}, {0xc001aee850, 0x6a}, 0x0?, 0x0?)
/Users/gopher/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/cache/snapshot.go:627 +0x185
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseChangedFiles(0xc00203b0e0, {0x1cfb758, 0xc00155d4d0}, {0x1d07fb8, 0xc002814000}, {0xc0004a36e0, 0x1, 0x35418e635?}, 0x0)
/Users/gopher/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:188 +0x3e5
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshot(0xc00203b0e0, {0x1d07fb8, 0xc002814000}, {0xc0004a36e0, 0x1, 0x1}, 0x98?)
/Users/gopher/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:153 +0x229
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshots.func1({0x1d07fb8?, 0xc002814000?}, {0xc0004a36e0?, 0xc002523060?, 0xc0013f59b0?})
/Users/gopher/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:133 +0x85
created by golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshots
/Users/gopher/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:131 +0x90
Change https://go.dev/cl/434637 mentions this issue: internal/gocommand: show pid of process
Found new dashboard test flakes for:
#!watchflakes
post <- pkg ~ `^golang.org/x/tools/gopls` &&
`DETECTED A HANGING GO COMMAND`
2022-10-12 14:51 netbsd-386-9_0 tools@b20ae4bc go@5bef9381 x/tools/gopls/internal/regtest/misc (log)
serve.go:438: debug server listening at http://localhost:65454
serve.go:438: debug server listening at http://localhost:65453
DETECTED A HANGING GO COMMAND
The gopls test runner has detected a hanging go command. In order to debug
this, the output of ps and lsof/fstat is printed below.
See golang/go#54461 for more details.
ps axo ppid,pid,command:
...
panic: detected hanging go command (pid 12624): see golang/go#54461 for more details
goroutine 6978 [running]:
golang.org/x/tools/internal/gocommand.HandleHangingGoCommand(0x9d59500)
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:333 +0x519
golang.org/x/tools/internal/gocommand.runCmdContext({0x8d25e34, 0x96357c0}, 0xbc8cc60)
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:261 +0x1a5
golang.org/x/tools/internal/gocommand.(*Invocation).run(0xa1ecd48, {0x8d25e34, 0x96357c0}, {0x8d20ed4, 0xb24ca98}, {0x8d20ed4, 0xb24cab0})
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:234 +0xd2f
golang.org/x/tools/internal/gocommand.(*Invocation).runWithFriendlyError(0xa1ecd48, {0x8d25e34, 0x96357c0}, {0x8d20ed4, 0xb24ca98}, {0x8d20ed4, 0xb24cab0})
...
golang.org/x/tools/gopls/internal/lsp/cache.(*snapshot).load(0xbbb2c00, {0x8d25df4, 0xac81fb0}, 0x1, {0xa586700, 0x2, 0x2})
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/cache/load.go:120 +0xd56
golang.org/x/tools/gopls/internal/lsp/cache.(*snapshot).loadWorkspace(0xbbb2c00, {0x8d25df4, 0xac81fb0}, 0x1)
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/cache/view.go:714 +0x235
golang.org/x/tools/gopls/internal/lsp/cache.(*snapshot).initialize(0xbbb2c00, {0x8d25df4, 0xac81fb0}, 0x1)
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/cache/view.go:648 +0x141
golang.org/x/tools/gopls/internal/lsp/cache.(*Session).createView.func5()
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/cache/session.go:301 +0x47
created by golang.org/x/tools/gopls/internal/lsp/cache.(*Session).createView
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/cache/session.go:299 +0xe0c
2022-10-12 14:51 netbsd-amd64-9_0 tools@b20ae4bc go@947091d3 x/tools/gopls/internal/regtest/diagnostics (log)
DETECTED A HANGING GO COMMAND
The gopls test runner has detected a hanging go command. In order to debug
this, the output of ps and lsof/fstat is printed below.
See golang/go#54461 for more details.
ps axo ppid,pid,command:
-------------------------
PPID PID COMMAND
...
panic: detected hanging go command (pid 14897): see golang/go#54461 for more details
goroutine 14379 [running]:
golang.org/x/tools/internal/gocommand.HandleHangingGoCommand(0xc0060525a0)
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:333 +0x3d9
golang.org/x/tools/internal/gocommand.runCmdContext({0x117c588, 0xc005f7ff80}, 0xc005acf8c0)
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:261 +0x1c5
golang.org/x/tools/internal/gocommand.(*Invocation).run(0xc0068d0900, {0x117c588, 0xc005f7ff80}, {0x11742a0?, 0xc004a8bc50}, {0x11742a0?, 0xc004a8bc80})
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:234 +0x104d
golang.org/x/tools/internal/gocommand.(*Invocation).runWithFriendlyError(0xc0068d08c8?, {0x117c588, 0xc005f7ff80}, {0x11742a0?, 0xc004a8bc50?}, {0x11742a0?, 0xc004a8bc80?})
...
golang.org/x/tools/gopls/internal/lsp/protocol.ServerHandler.func1({0x117c5c0, 0xc00552f020}, 0xc004475d40, {0x117c908, 0xc00552ef60})
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/protocol/protocol.go:157 +0x90
golang.org/x/tools/gopls/internal/lsp/lsprpc.handshaker.func1({0x117c5c0, 0xc00552f020}, 0xc004475d40, {0x117c908?, 0xc00552ef60?})
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/lsprpc/lsprpc.go:515 +0xa43
golang.org/x/tools/internal/jsonrpc2.MustReplyHandler.func1({0x117c5c0, 0xc00552f020}, 0xc00473ff80, {0x117c908?, 0xc00552ef60?})
/tmp/workdir/gopath/src/golang.org/x/tools/internal/jsonrpc2/handler.go:35 +0xf6
golang.org/x/tools/internal/jsonrpc2.AsyncHandler.func1.2()
/tmp/workdir/gopath/src/golang.org/x/tools/internal/jsonrpc2/handler.go:103 +0xa3
created by golang.org/x/tools/internal/jsonrpc2.AsyncHandler.func1
/tmp/workdir/gopath/src/golang.org/x/tools/internal/jsonrpc2/handler.go:100 +0x20a
2022-10-12 14:51 netbsd-amd64-9_0 tools@b20ae4bc go@28a6f9cf x/tools/gopls/internal/regtest/diagnostics (log)
DETECTED A HANGING GO COMMAND
The gopls test runner has detected a hanging go command. In order to debug
this, the output of ps and lsof/fstat is printed below.
See golang/go#54461 for more details.
ps axo ppid,pid,command:
-------------------------
PPID PID COMMAND
...
panic: detected hanging go command (pid 10646): see golang/go#54461 for more details
goroutine 3056 [running]:
golang.org/x/tools/internal/gocommand.HandleHangingGoCommand(0xc00238c960)
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:333 +0x3d9
golang.org/x/tools/internal/gocommand.runCmdContext({0x116f150, 0xc0021ad7a0}, 0xc002079b80)
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:261 +0x19a
golang.org/x/tools/internal/gocommand.(*Invocation).run(0xc0034c17a0, {0x116f150, 0xc0021ad7a0}, {0x1166c60?, 0xc0013d0090}, {0x1166c60?, 0xc0013d0180})
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:234 +0xef9
golang.org/x/tools/internal/gocommand.(*Invocation).runWithFriendlyError(0xc0034c1768?, {0x116f150, 0xc0021ad7a0}, {0x1166c60?, 0xc0013d0090?}, {0x1166c60?, 0xc0013d0180?})
...
golang.org/x/tools/gopls/internal/lsp/cache.(*snapshot).PackagesForFile(0xc0034ae140?, {0x116f188, 0xc00154ecc0}, {0xc001221570, 0x64}, 0x0?, 0x0?)
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/cache/snapshot.go:627 +0x185
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseChangedFiles(0xc00026ef00, {0x116f188, 0xc00154eb10}, {0x117bf18, 0xc0034ae140}, {0xc000383c00, 0x1, 0x40ef87?}, 0x0)
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:189 +0x3e3
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshot(0xc00026ef00, {0x117bf18, 0xc0034ae140}, {0xc000383c00, 0x1, 0x1}, 0x88?)
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:154 +0x229
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshots.func1({0x117bf18?, 0xc0034ae140?}, {0xc000383c00?, 0xc000003ba8?, 0xc002a13fb8?})
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:134 +0x85
created by golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshots
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:132 +0x90
Is it possible that the dump shows a different go process? Seems improbable, but I feel like we've eliminated the impossible. We should log the pid of cmd.Process, like @bcmills suggested.
The pid in the last log above clearly shows that that the go list process is the child of the test, not some other process in the container. Huh...
We still haven't seen a single log with both (a) the "error killing the Go command" message and (b) a pid showing that 'go' is the child of the test. So it's possible that the log with just (a) was merely a very slow (netbsd) machine and the log with just (b) was showing a different go process in the ps output. If both were to occur in the same log it would indicate a bug in the system call.
merely a very slow (netbsd) machine
The NetBSD machines aren't particularly slow; they're buggy (#50138 up until yesterday, and #56180 after that).
Found new dashboard test flakes for:
#!watchflakes
post <- pkg ~ `^golang.org/x/tools/gopls` &&
`DETECTED A HANGING GO COMMAND`
2022-10-13 15:07 netbsd-amd64-9_0 tools@b2533141 go@4a0ce469 x/tools/gopls/internal/regtest/misc (log)
serve.go:438: debug server listening at http://localhost:65443
serve.go:438: debug server listening at http://localhost:65442
DETECTED A HANGING GO COMMAND
The gopls test runner has detected a hanging go command. In order to debug
this, the output of ps and lsof/fstat is printed below.
See golang/go#54461 for more details.
ps axo ppid,pid,command:
...
panic: detected hanging go command (pid 13914): see golang/go#54461 for more details
goroutine 32352 [running]:
golang.org/x/tools/internal/gocommand.HandleHangingGoCommand(0xc003546d20)
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:333 +0x3d9
golang.org/x/tools/internal/gocommand.runCmdContext({0x11dc940, 0xc001ac99e0}, 0xc005ad3b80)
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:261 +0x19a
golang.org/x/tools/internal/gocommand.(*Invocation).run(0xc005025850, {0x11dc940, 0xc001ac99e0}, {0x11d4380?, 0xc0026a0c00}, {0x11d4380?, 0xc0026a0cc0})
/tmp/workdir/gopath/src/golang.org/x/tools/internal/gocommand/invoke.go:234 +0xef9
golang.org/x/tools/internal/gocommand.(*Invocation).runWithFriendlyError(0xc005025818?, {0x11dc940, 0xc001ac99e0}, {0x11d4380?, 0xc0026a0c00?}, {0x11d4380?, 0xc0026a0cc0?})
...
golang.org/x/tools/gopls/internal/lsp/cache.(*snapshot).PackagesForFile(0xc003fb7180?, {0x11dc978, 0xc0012b9140}, {0xc0054d5f20, 0x60}, 0x0?, 0x0?)
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/cache/snapshot.go:627 +0x185
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseChangedFiles(0xc000b40700, {0x11dc978, 0xc0012b8d50}, {0x11eb198, 0xc003fb7180}, {0xc001480e30, 0x1, 0xc007442630?}, 0x1)
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:189 +0x3e3
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshot(0xc000b40700, {0x11eb198, 0xc003fb7180}, {0xc001480e30, 0x1, 0x1}, 0xc0?)
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:154 +0x229
golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshots.func1({0x11eb198?, 0xc003fb7180?}, {0xc001480e30?, 0x56a84a?, 0xc0030c3860?})
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:134 +0x85
created by golang.org/x/tools/gopls/internal/lsp.(*Server).diagnoseSnapshots
/tmp/workdir/gopath/src/golang.org/x/tools/gopls/internal/lsp/diagnostics.go:132 +0x90