feat: Add CpuAttributesSpanAppender
fixes #1142
What is this change?
This PR implements a new SpanProcessor (CpuAttributesSpanAppender) that appends the following attributes to spans:
-
process.cpu.avg_utilization- The relative, average CPU utilization for the app process during the span -
process.cpu_elapsed_time_start- The elapsed CPU time at the start of this span (in millis) -
process.cpu_elapsed_time_end- The elapsed CPU time at the end of this span (in millis)
How was this tested?
Tested manually with the demo app. Here is an example of the telemetry generated:
{
"data": [
{
"traceID": "1b720aae66b3959b06eb5335247ada5b",
"spans": [
{
"traceID": "1b720aae66b3959b06eb5335247ada5b",
"spanID": "e53cbd939dce77af",
"operationName": "Created",
"references": [],
"startTime": 1759468632716000,
"duration": 46198,
"tags": [
{
"key": "otel.library.name",
"type": "string",
"value": "io.opentelemetry.lifecycle"
},
{
"key": "activity.name",
"type": "string",
"value": "AstronomyShopActivity"
},
{
"key": "last.screen.name",
"type": "string",
"value": "MainActivity"
},
{
"key": "network.connection.type",
"type": "string",
"value": "wifi"
},
{
"key": "process.cpu.avg_utilization",
"type": "float64",
"value": 37.5
},
{
"key": "process.cpu.elapsed_time_end",
"type": "int64",
"value": 1827
},
{
"key": "process.cpu.elapsed_time_start",
"type": "int64",
"value": 1758
},
{
"key": "screen.name",
"type": "string",
"value": "AstronomyShopActivity"
},
{
"key": "session.id",
"type": "string",
"value": "43e73022b738dfbeb9aaa2674806fb38"
},
{
"key": "toolkit",
"type": "string",
"value": "jetpack compose"
},
{
"key": "span.kind",
"type": "string",
"value": "internal"
},
{
"key": "internal.span.format",
"type": "string",
"value": "otlp"
}
],
"logs": [
{
"timestamp": 1759468632716095,
"fields": [
{
"key": "event",
"type": "string",
"value": "activityPreCreated"
}
]
},
{
"timestamp": 1759468632724703,
"fields": [
{
"key": "event",
"type": "string",
"value": "activityCreated"
}
]
},
{
"timestamp": 1759468632759650,
"fields": [
{
"key": "event",
"type": "string",
"value": "activityPostCreated"
}
]
},
{
"timestamp": 1759468632760196,
"fields": [
{
"key": "event",
"type": "string",
"value": "activityPreStarted"
}
]
},
{
"timestamp": 1759468632760275,
"fields": [
{
"key": "event",
"type": "string",
"value": "activityStarted"
}
]
},
{
"timestamp": 1759468632760738,
"fields": [
{
"key": "event",
"type": "string",
"value": "activityPostStarted"
}
]
},
{
"timestamp": 1759468632760962,
"fields": [
{
"key": "event",
"type": "string",
"value": "activityPreResumed"
}
]
},
{
"timestamp": 1759468632760985,
"fields": [
{
"key": "event",
"type": "string",
"value": "activityResumed"
}
]
},
{
"timestamp": 1759468632762171,
"fields": [
{
"key": "event",
"type": "string",
"value": "activityPostResumed"
}
]
}
],
"processID": "p1",
"warnings": null
}
],
"processes": {
"p1": {
"serviceName": "OpenTelemetryDemoApp",
"tags": [
{
"key": "device.manufacturer",
"type": "string",
"value": "Google"
},
{
"key": "device.model.identifier",
"type": "string",
"value": "sdk_gphone64_arm64"
},
{
"key": "device.model.name",
"type": "string",
"value": "sdk_gphone64_arm64"
},
{
"key": "os.description",
"type": "string",
"value": "Android Version 16 (Build BP22.250221.010 API level 36)"
},
{
"key": "os.name",
"type": "string",
"value": "Android"
},
{
"key": "os.type",
"type": "string",
"value": "linux"
},
{
"key": "os.version",
"type": "string",
"value": "16"
},
{
"key": "rum.sdk.version",
"type": "string",
"value": "0.16.0-alpha-SNAPSHOT"
},
{
"key": "service.version",
"type": "string",
"value": "1.0"
},
{
"key": "telemetry.sdk.language",
"type": "string",
"value": "java"
},
{
"key": "telemetry.sdk.name",
"type": "string",
"value": "opentelemetry"
},
{
"key": "telemetry.sdk.version",
"type": "string",
"value": "1.54.1"
}
]
}
},
"warnings": null
}
],
"total": 0,
"limit": 0,
"offset": 0,
"errors": null
}
Android phone are similar to multi-tenant systems in that the app is not responsible for scheduling, and thus have no control on the cores it gets assigned or whether or not they are throttled by the OS. As well, the main thread is often blocked by binder calls, which means it's just waiting for the OS to respond, which would affect average utilization
Given that, these metrics will be missing key aspects to contextualize why the numbers are the way they are, which would result in pretty noisy data.
BTW, what is the plan of how this data will be consumed, and what problems do you think it'll find?
@Doohl This has been sitting here for a couple months now. Are you able to come back to it? Thanks!