traceId is not correct processing in the json protocol
api: /otel/v1/traces https://docs.guance.com/en/integrations/opentelemetry/
opentelemetry definition of traceId and spanId on json protocol: https://opentelemetry.io/docs/specs/otlp/#json-protobuf-encoding
The traceId and spanId byte arrays are represented as case-insensitive hex-encoded strings; they are not base64-encoded as is defined in the standard Protobuf JSON Mapping. Hex encoding is used for traceId and spanId fields in all OTLP Protobuf messages, e.g., the Span, Link, LogRecord, etc. messages. For example, the traceId field in a Span can be represented like this: { “traceId”: “5B8EFFF798038103D269B633813FC60C”, … }
datakit: protojson.Unmarshal() byte on json protocol data is base64-encoded, but opentelemetry definition that traceId and spanId is hex-encoded, so it does not correctly process hex-encoded traceId and spanId https://github.com/GuanceCloud/datakit/blob/9d19db5b4d9e677fee6889dda327dd1010bf79f7/internal/plugins/inputs/opentelemetry/otel_http.go#L65-L98
func (h *httpConfig) handleOTELTrace(resp http.ResponseWriter, req *http.Request) {
media, _, buf, err := itrace.ParseTracerRequest(req)
if err != nil {
log.Error(err.Error())
resp.WriteHeader(http.StatusBadRequest)
return
}
tsreq := &trace.ExportTraceServiceRequest{}
switch media {
case "application/x-protobuf":
err = proto.Unmarshal(buf, tsreq)
case "application/json":
err = protojson.Unmarshal(buf, tsreq)
default:
log.Error("unrecognized Content-Type")
resp.WriteHeader(http.StatusBadRequest)
return
}
if err != nil {
log.Error(err.Error())
resp.WriteHeader(http.StatusBadRequest)
return
}
if h.afterGatherRun != nil {
if dktraces := h.input.parseResourceSpans(tsreq.ResourceSpans); len(dktraces) != 0 {
h.afterGatherRun.Run(inputName, dktraces)
}
}
}
opentelemetry: opentelemetry collector implementation: use custom Unmarshal function to correctly process hex-encoded traceId and spanId etc...
https://github.com/open-telemetry/opentelemetry-collector/blob/37a3ace6274cc0a721d262c659170b207a2fb722/pdata/internal/generated_wrapper_span.go#L197-L249
// UnmarshalJSONOrigSpan unmarshals all properties from the current struct from the source iterator.
func UnmarshalJSONOrigSpan(orig *otlptrace.Span, iter *json.Iterator) {
for f := iter.ReadObject(); f != ""; f = iter.ReadObject() {
switch f {
case "traceId", "trace_id":
UnmarshalJSONOrigTraceID(&orig.TraceId, iter)
case "spanId", "span_id":
UnmarshalJSONOrigSpanID(&orig.SpanId, iter)
case "traceState", "trace_state":
orig.TraceState = iter.ReadString()
case "parentSpanId", "parent_span_id":
UnmarshalJSONOrigSpanID(&orig.ParentSpanId, iter)
case "flags":
orig.Flags = iter.ReadUint32()
case "name":
orig.Name = iter.ReadString()
case "kind":
orig.Kind = otlptrace.Span_SpanKind(iter.ReadEnumValue(otlptrace.Span_SpanKind_value))
case "startTimeUnixNano", "start_time_unix_nano":
orig.StartTimeUnixNano = iter.ReadUint64()
case "endTimeUnixNano", "end_time_unix_nano":
orig.EndTimeUnixNano = iter.ReadUint64()
case "attributes":
for iter.ReadArray() {
orig.Attributes = append(orig.Attributes, otlpcommon.KeyValue{})
UnmarshalJSONOrigKeyValue(&orig.Attributes[len(orig.Attributes)-1], iter)
}
case "droppedAttributesCount", "dropped_attributes_count":
orig.DroppedAttributesCount = iter.ReadUint32()
case "events":
for iter.ReadArray() {
orig.Events = append(orig.Events, NewOrigSpan_Event())
UnmarshalJSONOrigSpan_Event(orig.Events[len(orig.Events)-1], iter)
}
case "droppedEventsCount", "dropped_events_count":
orig.DroppedEventsCount = iter.ReadUint32()
case "links":
for iter.ReadArray() {
orig.Links = append(orig.Links, NewOrigSpan_Link())
UnmarshalJSONOrigSpan_Link(orig.Links[len(orig.Links)-1], iter)
}
case "droppedLinksCount", "dropped_links_count":
orig.DroppedLinksCount = iter.ReadUint32()
case "status":
UnmarshalJSONOrigStatus(&orig.Status, iter)
default:
iter.Skip()
}
}
}
https://github.com/open-telemetry/opentelemetry-collector/blob/37a3ace6274cc0a721d262c659170b207a2fb722/pdata/internal/data/traceid.go#L75-L78
// UnmarshalJSONIter decodes TraceID from hex string.
func (tid *TraceID) UnmarshalJSONIter(iter *json.Iterator) {
*tid = [profileIDSize]byte{}
unmarshalJSON(tid[:], iter)
}
https://github.com/open-telemetry/opentelemetry-collector/blob/37a3ace6274cc0a721d262c659170b207a2fb722/pdata/internal/data/bytesid.go#L12-L30
// unmarshalJSON inflates trace id from hex string, possibly enclosed in quotes.
// Called by Protobuf JSON deserialization.
func unmarshalJSON(dst []byte, iter *json.Iterator) {
src := iter.ReadStringAsSlice()
if len(src) == 0 {
return
}
if len(dst) != hex.DecodedLen(len(src)) {
iter.ReportError("ID.UnmarshalJSONIter", "length mismatch")
return
}
_, err := hex.Decode(dst, src)
if err != nil {
iter.ReportError("ID.UnmarshalJSONIter", err.Error())
return
}
}
more info: https://github.com/open-telemetry/opentelemetry-proto/blob/main/examples/trace.json
从21年开始 java和go的sdk都移除了http/json这个选项,用http/protobuf更好,或者使用 grpc协议。
已经换成 http/protobuf.
但是 node 的 http/json sdk 依然存在,前两个星期还在更新:https://github.com/open-telemetry/opentelemetry-js/tree/main/experimental/packages/exporter-trace-otlp-http
如果 datakit 打算不支持 http/json 了,希望把它彻底关掉,不然用到 http/json 协议,数据写入又不对,排查起来非常棘手,不希望别人再碰到一样的坑。