iDRAC-Telemetry-Reference-Tools
iDRAC-Telemetry-Reference-Tools copied to clipboard
Add support for Redfish Alert Events
I saw the function redfish.go::GetEventsSSE is there already. So I started building this out in my own env but I'm not too confident in the code. Here are some functions to get you started. I plan on pushing Alerts into Moogsoft using their Custom Integration feature.
redfish.go
func (r *RedfishClient) ListenForAlerts(Ctx context.Context, event chan<- *RedfishEvent) {
ret := new(RedfishEvent)
serviceRoot, err := r.GetUri("/redfish/v1")
if err == nil {
eventService, err := serviceRoot.GetPropertyByName("EventService")
if err == nil {
if eventService.Object["ServerSentEventUri"] != nil {
ret.Err = r.GetEventSSE(Ctx, event, eventService)
} else {
log.Println("Don't support POST back yet!")
ret.Err = errors.New("Don't support POST back yet!")
}
} else {
ret.Err = err
}
} else {
log.Println("Unable to get service root!", err)
ret.Err = err
}
if ret.Err != nil {
event <- ret
}
}
redfishread.go
func (r *RedfishDevice) RestartAlertEventListener() {
go r.Redfish.ListenForAlerts(r.Ctx, r.Events)
}
// StartAlertEventListener Directly responsible for receiving SSE events from iDRAC. Will parse received reports or issue a
// message in the log indicating it received an unknown SSE event.
func (r *RedfishDevice) StartAlertEventListener(dataBusService *databus.DataBusService) {
if r.Events == nil {
r.Events = make(chan *redfish.RedfishEvent, 10)
}
timer := time.AfterFunc(time.Minute*5, r.RestartAlertEventListener)
log.Printf("%s: Starting alert event listener...\n", r.SystemID)
go r.Redfish.ListenForAlerts(r.Ctx, r.Events)
for {
event := <-r.Events
//log.Printf("DEBUG: redfishread->StartAlertEventListener event %#v\n", event)
if event == nil {
log.Printf("%s: Got SSE nil event \n", r.SystemID)
continue
}
if event.Err != nil { // SSE connect failure , retry connection
log.Printf("%s: Got SSE error %s\n", r.SystemID, event.Err)
if strings.Contains(event.Err.Error(), "connection error") {
// Wait for 5 minutes before restarting, so that the iDRAC can be rebooted
// and SSE connection can be re-established
log.Printf("Sleep 5 minutes before restarting SSE connection for %s\n", r.SystemID)
time.Sleep(time.Minute * 5)
}
r.RestartAlertEventListener()
continue
}
//log.Printf("StartAlertEventListener %#v\n", event.Payload)
timer.Reset(time.Minute * 5)
r.LastEvent = time.Now()
if event != nil && event.Payload != nil && event.Payload.Object["@odata.id"] != nil {
if !strings.Contains(event.Payload.Object["@odata.type"].(string), "Event") {
continue
}
log.Printf("%s: Got new alert for %s\n", r.SystemID, event.Payload.Object["@odata.id"].(string))
parseAlert(event.Payload, r.SystemID, r.HostTags, r.Redfish.Hostname, dataBusService)
} else {
//log.Printf("%s: Got unknown SSE event %v\n", r.SystemID, event.Payload)
log.Printf("%s: Got unknown SSE event \n", r.SystemID)
}
}
}
// getAlerts Starts the service which will listen for SSE events (alerts) from the iDRAC
func getAlerts(r *RedfishDevice, eventService *redfish.RedfishPayload, dataBusService *databus.DataBusService) {
r.State = databus.RUNNING
r.StartAlertEventListener(dataBusService)
}
// Responsible for taking the report received from SSE, getting its component parts, and then sending it along the
// data bus
func parseAlert(alertEvents *redfish.RedfishPayload, systemid string, hosttags string, hostname string, dataBusService *databus.DataBusService) {
alertValues, err := alertEvents.GetPropertyByName("Events")
if err != nil {
log.Printf("%s: Unable to get alert events: %v %v", systemid, err, alertEvents)
return
}
group := new(databus.DataGroup)
group.ID = alertEvents.Object["Id"].(string)
group.Label = "Redfish Alert Event"
group.HostTags = hosttags
group.HostName = hostname
valuesSize := alertValues.GetArraySize()
for j := 0; j < valuesSize; j++ {
metricValue, err := alertValues.GetPropertyByIndex(j)
if err != nil {
log.Printf("Unable to get alert event %d: %v", j, err)
continue
}
if metricValue.Object["MessageId"] != nil {
data := new(databus.DataValue)
data.ID = metricValue.Object["EventId"].(string)
data.Value = metricValue.Object["MessageId"].(string)
data.Message = metricValue.Object["Message"].(string)
data.Severity = metricValue.Object["Severity"].(string)
data.MessageId = metricValue.Object["MessageId"].(string)
data.EventType = metricValue.Object["EventType"].(string)
if metricValue.Object["EventTimestamp"] == nil {
t := time.Now()
data.Timestamp = t.Format(time.RFC3339)
} else {
timestamp, err := time.Parse("2006-01-02T15:04:05-0700", metricValue.Object["EventTimestamp"].(string))
// time.RFC1123Z
if err != nil {
log.Printf("Error parsing timestamp as RFC1123Z for point %s: %v", metricValue.Object["EventTimestamp"].(string), err)
continue
}
//data.Timestamp = metricValue.Object["EventTimestamp"].(string)
data.Timestamp = timestamp.Format(time.RFC3339)
}
data.System = systemid
group.Values = append(group.Values, *data)
}
}
dataBusService.SendGroup(*group)
dataGroupsMu.Lock()
if dataGroups[systemid] == nil {
dataGroups[systemid] = make(map[string]*databus.DataGroup)
}
dataGroups[systemid][group.ID] = group
dataGroupsMu.Unlock()
}
influxpump.go
if group.Label == "Redfish Alert Event" {
log.Printf("Writing Redfish Alert Event\n")
r := write.NewPointWithMeasurement("alerts").
AddTag("RedfishSystem", value.System).
AddTag("RedfishContext", value.Context).
AddTag("RedfishLabel", value.Label).
AddTag("EventId", value.ID).
AddTag("EventType", value.EventType).
AddTag("Severity", value.Severity).
AddField("MessageId", value.MessageId).
AddField("Message", value.Message).
SetTime(timestamp)
log.Printf("DEBUG: WritePoint value %#v\n", r)
// automatically batches things behind the scenes
writeAPI.WritePoint(r)
}