opentelemetry-js icon indicating copy to clipboard operation
opentelemetry-js copied to clipboard

Nestjs istio context propagation is not working as expected

Open z9fr opened this issue 2 weeks ago • 0 comments

I have been trying to setup opentelemetry in a nestjs application the way my configuration works is I have istio setup in my cluster and the request goes to istio which generates the trace headers and then comes to the nestjs API.

Below is my tracing.ts file which configures otel nodejs lib

import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-base';
import { NodeSDK } from '@opentelemetry/sdk-node';
import  { AsyncHooksContextManager} from '@opentelemetry/context-async-hooks';
import * as process from 'process';
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
import { CompositePropagator } from '@opentelemetry/core';
import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
import {
  SemanticResourceAttributes,
  TelemetrySdkLanguageValues,
} from '@opentelemetry/semantic-conventions';
import { Resource } from '@opentelemetry/resources';
import { TraceContextPropagator } from 'src/utils/trace-propagator';
import { UndiciInstrumentation } from '@opentelemetry/instrumentation-undici'

const traceExporter = new OTLPTraceExporter({
  //url: `http://otel-collector.tracing.svc:4318/v1/traces`,
  url: 'http://localhost:4318/v1/traces',
});

const serviceName = 'slocoach-api';

const spanProcessor = new BatchSpanProcessor(traceExporter);

const otelSDK = new NodeSDK({
  resource: new Resource({
    [SemanticResourceAttributes.SERVICE_NAME]: serviceName,
    [SemanticResourceAttributes.TELEMETRY_SDK_LANGUAGE]: TelemetrySdkLanguageValues.NODEJS,
  }),
  spanProcessor: spanProcessor,
  instrumentations: [
    getNodeAutoInstrumentations({
      '@opentelemetry/instrumentation-http': { enabled: true },
      '@opentelemetry/instrumentation-express': { enabled: true },
    }),
    new UndiciInstrumentation(), 
  ],
});

export default otelSDK;
// You can also use the shutdown method to gracefully shut down the SDK before process shutdown
// or on some operating system signal.
process.on('SIGTERM', () => {
  otelSDK
    .shutdown()
    .then(
      () => console.log('SDK shut down successfully'),
      (err) => console.log('Error shutting down SDK', err)
    )
    .finally(() => process.exit(0));
});

And in my main.ts I have called this like below

global['fetch'] = require('node-fetch');

async function bootstrap() {
  await otelSDK.start();

  const app = await NestFactory.create(AppModule, {
    logger,
  });
}

The problem is I can see the spans when I create though nestjs API But they are compleetely seperated traces, if I search for the traceId generated from Istio it only has my gateway request this service will create new traces for the request and not propagating the headers as expected.

Is there some sort of configuration issue I have which results in this ? I looked in to this online and this seems to be the general implementation so I'm not too sure why this happens.

I have also tried implementing a propagator which shown below

import {
  Context,
  isSpanContextValid,
  SpanContext,
  TextMapGetter,
  TextMapPropagator,
  TextMapSetter,
  trace,
  TraceFlags,
} from '@opentelemetry/api';
import { TraceState } from './TraceState';
export const TRACE_PARENT_HEADER = 'traceparent';
export const TRACE_STATE_HEADER = 'tracestate';

const VERSION = '00';
const VERSION_PART = '(?!ff)[\\da-f]{2}';
const TRACE_ID_PART = '(?![0]{32})[\\da-f]{32}';
const PARENT_ID_PART = '(?![0]{16})[\\da-f]{16}';
const FLAGS_PART = '[\\da-f]{2}';
const TRACE_PARENT_REGEX = new RegExp(
  `^\\s?(${VERSION_PART})-(${TRACE_ID_PART})-(${PARENT_ID_PART})-(${FLAGS_PART})(-.*)?\\s?$`
);

/**
 * Parses information from the [traceparent] span tag and converts it into {@link SpanContext}
 * @param traceParent - A meta property that comes from server.
 *     It should be dynamically generated server side to have the server's request trace Id,
 *     a parent span Id that was set on the server's request span,
 *     and the trace flags to indicate the server's sampling decision
 *     (01 = sampled, 00 = not sampled).
 *     for example: '{version}-{traceId}-{spanId}-{sampleDecision}'
 *     For more information see {@link https://www.w3.org/TR/trace-context/}
 */
export function parseTraceParent(traceParent: string): SpanContext | null {
  const match = TRACE_PARENT_REGEX.exec(traceParent);
  if (!match) return null;

  // According to the specification the implementation should be compatible
  // with future versions. If there are more parts, we only reject it if it's using version 00
  // See https://www.w3.org/TR/trace-context/#versioning-of-traceparent
  if (match[1] === '00' && match[5]) return null;

  return {
    traceId: match[2],
    spanId: match[3],
    traceFlags: parseInt(match[4], 16),
  };
}

/**
 * Propagates {@link SpanContext} through Trace Context format propagation.
 *
 * Based on the Trace Context specification:
 * https://www.w3.org/TR/trace-context/
 */
export class TraceContextPropagator implements TextMapPropagator {
  inject(context: Context, carrier: unknown, setter: TextMapSetter): void {
    console.log('trace context inject');

    const spanContext = trace.getSpanContext(context);
    if (!spanContext || !isSpanContextValid(spanContext)) return;

    const traceParent = `${VERSION}-${spanContext.traceId}-${spanContext.spanId}-0${Number(
      spanContext.traceFlags || TraceFlags.NONE
    ).toString(16)}`;

    console.log(`trace parent ${traceParent}`);

    setter.set(carrier, TRACE_PARENT_HEADER, traceParent);
    if (spanContext.traceState) {
      console.log('span context state state avaible');
      setter.set(carrier, TRACE_STATE_HEADER, spanContext.traceState.serialize());
    }
  }

  extract(context: Context, carrier: unknown, getter: TextMapGetter): Context {
    console.log('trace context extract');
    const traceParentHeader = getter.get(carrier, TRACE_PARENT_HEADER);

    console.log(`trace parent ${traceParentHeader}`);

    if (!traceParentHeader) return context;
    const traceParent = Array.isArray(traceParentHeader) ? traceParentHeader[0] : traceParentHeader;
    if (typeof traceParent !== 'string') return context;
    const spanContext = parseTraceParent(traceParent);
    if (!spanContext) return context;

    spanContext.isRemote = true;

    const traceStateHeader = getter.get(carrier, TRACE_STATE_HEADER);
    if (traceStateHeader) {
      // If more than one `tracestate` header is found, we merge them into a
      // single header.
      const state = Array.isArray(traceStateHeader) ? traceStateHeader.join(',') : traceStateHeader;
      const traceState = new TraceState(typeof state === 'string' ? state : undefined);

      console.log(`trace state ${traceState}`);

      spanContext.traceState = traceState;
    }
    return trace.setSpanContext(context, spanContext);
  }

  fields(): string[] {
    return [TRACE_PARENT_HEADER, TRACE_STATE_HEADER];
  }
}

But that did not work as wel

image

And below is the service generated trace

image

  • [ ] This only affects the JavaScript OpenTelemetry library
  • [x] This may affect other libraries, but I would like to get opinions here first

z9fr avatar Jun 20 '24 06:06 z9fr