opentelemetry-js icon indicating copy to clipboard operation
opentelemetry-js copied to clipboard

Performance slowdown using http-instrumentation

Open dsope05 opened this issue 1 year ago • 9 comments
trafficstars

What happened?

Steps to Reproduce

create a server running on main thread, offloads requests to node worker threads. implement open telemetry on main thread, requests/sec is reduced by 80% when http-instrumentation is applied to the main thread.

Expected Result

http-instrumentation shouldn't decrease performance by 80% when implemented.

Actual Result

http-instrumentation decreases requests/second performance by 80%

Additional Details

OpenTelemetry Setup Code

const os = require('os');
const { isMainThread } = require('worker_threads');
const { trace, propagation } = require('@opentelemetry/api');
const { NodeTracerProvider } = require('@opentelemetry/sdk-trace-node');
const { BatchSpanProcessor } = require('@opentelemetry/sdk-trace-base');
const {
  CollectorTraceExporter,
} = require('@opentelemetry/exporter-collector-proto');
const { registerInstrumentations } = require('@opentelemetry/instrumentation');
const { HttpInstrumentation } = require('@opentelemetry/instrumentation-http');
const {
  FastifyInstrumentation,
} = require('@opentelemetry/instrumentation-fastify');
const {
  SemanticResourceAttributes,
} = require('@opentelemetry/semantic-conventions');
const { Resource } = require('@opentelemetry/resources');
const {
  CompositePropagator,
  W3CTraceContextPropagator,
} = require('@opentelemetry/core');
const {
  B3Propagator,
  B3InjectEncoding,
} = require('@opentelemetry/propagator-b3');

const fastify = require('../../server/fastify');
const { serviceName, version } = require('../server-info');
const { register } = require('../../utils/register-to-close');
const { SemanticELFAttributes } = require('../SemanticELFAttributes');

propagation.setGlobalPropagator(
  new CompositePropagator({
    propagators: [
      new B3Propagator({ injectEncoding: B3InjectEncoding.MULTI_HEADER }),
      new W3CTraceContextPropagator(),
    ],
  })
);

const provider = new NodeTracerProvider({
  resource: new Resource({
    [SemanticResourceAttributes.SERVICE_NAME]: serviceName,
    [SemanticResourceAttributes.SERVICE_NAMESPACE]: 'xxxxx',
    [SemanticResourceAttributes.SERVICE_INSTANCE_ID]: os.hostname(),
    [SemanticResourceAttributes.SERVICE_VERSION]: version,
    [SemanticELFAttributes.SERVICE_CAR_ID]: 'xxxxxx',
    [SemanticELFAttributes.DEPLOYMENT_ENVIRONMENT]:
      process.env.EPAAS_ENV
      || (process.env.NODE_ENV === 'development' ? 'E0' : undefined),
  }),
});

provider.addSpanProcessor(
  new BatchSpanProcessor(
    new CollectorTraceExporter({
      // url: is configured via OTEL_EXPORTER_OTLP_TRACES_ENDPOINT env var
      serviceName,
      concurrencyLimit: 10, // pending requests limit, dropped after that
    }),
    {
      // basing numbers off of 700rps
      // past this size spans are dropped
      maxQueueSize: 400,
      // maximum batch size of every export (maxExportBatchSize <= maxQueueSize)
      maxExportBatchSize: 100,
      scheduledDelayMillis: 500, // interval between two consecutive exports
      exportTimeoutMillis: 15e3, // cancelled on failure
    }
  )
);

const tracer = trace.getTracer(serviceName, version);

const httpInstrumentationOptions = {
  ignoreIncomingPaths: ['/ReadMetrics.v1', '/ReadResponseCode.v1', '/favicon.ico'],
};

if (isMainThread) {
  // Initialie instrumentations on worker only
  const { httpRequestHook, buildHTTPResponseHook } = require('../hooks');

  Object.assign(httpInstrumentationOptions, {
    requestHook: httpRequestHook,
    responseHook: buildHTTPResponseHook(tracer),
  });

  registerInstrumentations({
    tracerProvider: provider,
    instrumentations: [
      new HttpInstrumentation(httpInstrumentationOptions),
      new FastifyInstrumentation({
        ignoreUrls: ['/ReadMetrics.v1', '/ReadResponseCode.v1', '/favicon.ico'],
        requestHook: (span, info) => {
          span.setAttribute('http.method', info.request.method);
        },
      }),
    ],
  });
}
  provider.register();
  register(() => provider.shutdown());

  fastify.log.info(`Tracing initialized for ${isMainThread ? 'server thread' : 'worker thread'}`);
module.exports = tracer;

package.json

{
  "name": "xxxx",
  "version": "1.2.7",
  "description": "",
  "main": "server.js",
  "scripts": {
    "test": "npm run test:lint && npm run test:unit",
    "test:unit": "echo test",
    "start": "node src/server",
    "test:lint": "eslint --ignore-path .gitignore --ext js,jsx,snap ."
  },
  "bin": {
    "server": "./bin/server.js"
  },
  "author": "",
  "license": "ISC",
  "dependencies": {
    "@babel/polyfill": "^7.12.1",
    "@fastify/cors": "^8.2.1",
    "@fastify/helmet": "^10.1.0",
    "@opentelemetry/api": "^1.4.1",
    "@opentelemetry/core": "^1.5.0",
    "@opentelemetry/exporter-collector-proto": "^0.25.0",
    "@opentelemetry/exporter-prometheus": "^0.36.1",
    "@opentelemetry/instrumentation": "^0.41.2",
    "@opentelemetry/instrumentation-fastify": "^0.31.1",
    "@opentelemetry/instrumentation-http": "^0.45.1",
    "@opentelemetry/metrics": "^0.24.0",
    "@opentelemetry/propagator-b3": "^1.13.0",
    "@opentelemetry/resources": "^1.13.0",
    "@opentelemetry/sdk-metrics": "^1.10.1",
    "@opentelemetry/sdk-trace-base": "^1.10.0",
    "@opentelemetry/sdk-trace-node": "^1.10.1",
    "@opentelemetry/semantic-conventions": "^1.10.1",
    "decompress": "^4.2.1",
    "decompress-targz": "^4.1.1",
    "eslint": "^8.46.0",
    "etcd3": "^1.1.0",
    "fastify": "^4.17.0",
    "helmet": "^5.0.2",
    "ip": "^1.1.8",
    "jest": "^28.1.3",
    "jks-js": "^1.0.2",
    "node-fetch": "^2.6.7",
    "opentelemetry-node-metrics": "^3.0.0",
    "os": "^0.1.2",
    "path": "^0.12.7",
    "pino": "^8.14.1",
    "pino-opentelemetry-transport": "^0.2.0",
    "pino-pretty": "^10.2.0",
    "stream": "0.0.2",
    "util": "^0.12.4",
    "uuid": "^8.3.2"
  },
  "overrides": {
    "core-js": "3.28.0",
    "import-in-the-middle": "1.4.2",
    "protobufjs": "6.8.9"
  },
  "engines": {
    "node": "=18.x.x",
    "npm": "=9.x.x"
  }
}

Relevant log output

No response

dsope05 avatar Dec 15 '23 23:12 dsope05

Hey! I can help with this. May I be assigned?

zzadxz avatar Jan 18 '24 23:01 zzadxz

Hey! I can help with this. May I be assigned?

Great! It's yours, thanks for picking this up :slightly_smiling_face:

pichlermarc avatar Jan 26 '24 06:01 pichlermarc

Hello, any updates on this issue? I've noticed 2x slow down using open telemetry with node and graphql app

olafkrawczyk avatar May 17 '24 12:05 olafkrawczyk