flatcar-linux-update-operator
flatcar-linux-update-operator copied to clipboard
pkg/updateengine: ReceiveStatuses() hangs when update_engine GetStatus call returns error
Likely caused by: https://github.com/godbus/dbus/issues/244.
If this happens, agent will get stuck without clear message why.
This may happen e.g. when agent has no permissions to get status from update engine, if dbus policies are malformed.
The workaround we could do is to use CallWithContext() instead of plain Call() and set some upper timeout on the call.
Also related, if we plan to replace Reboot() call, we should take that into account as well: https://github.com/flatcar-linux/flatcar-linux-update-operator/issues/19.
The fix for this could be:
diff --git a/pkg/updateengine/client.go b/pkg/updateengine/client.go
index 6306077e..bda237c6 100644
--- a/pkg/updateengine/client.go
+++ b/pkg/updateengine/client.go
@@ -15,6 +15,7 @@
package updateengine
import (
+ "context"
"fmt"
"os"
"strconv"
@@ -106,22 +107,38 @@ func (c *Client) Close() error {
func (c *Client) ReceiveStatuses(rcvr chan<- Status, stop <-chan struct{}) {
// If there is an error getting the current status, ignore it and just
// move onto the main loop.
- st, _ := c.getStatus()
+
+ ctx, cancel := context.WithCancel(context.Background())
+
+ stopCh := make(chan struct{})
+
+ go func() {
+ <-stop
+ cancel()
+ close(stopCh)
+ }()
+
+ st, _ := c.getStatus(ctx)
rcvr <- st
for {
select {
case <-stop:
return
case signal := <-c.ch:
rcvr <- NewStatus(signal.Body)
}
}
}
// getStatus gets the current status from update_engine.
-func (c *Client) getStatus() (Status, error) {
- call := c.object.Call(DBusInterface+"."+DBusMethodNameGetStatus, 0)
+func (c *Client) getStatus(ctx context.Context) (Status, error) {
+ call := c.object.CallWithContext(ctx, DBusInterface+"."+DBusMethodNameGetStatus, 0)
if call.Err != nil {
return Status{}, call.Err
}