apm-agent-nodejs
apm-agent-nodejs copied to clipboard
could we log.warn if `require.cache` already has modules we typically instrument at `agent.start()` time?
A reasonably common mistake when using the APM agent is to .start() it after having require()d some modules that the agent would have instrumented. We could probably look through require.cache at Agent#start()-time to see if it already had imported modules that the agent would have instrumented (modulo active and disableInstrumentations), and if so, then emit a log.warn('yo, you need to apm.start() earlier'). Then that log message could be in the troubleshooting docs to explain the issue.
This could be interesting in combination with https://github.com/elastic/apm-agent-nodejs/issues/1967
If/when this is done, having a reference to this in https://www.elastic.co/guide/en/apm/agent/nodejs/current/starting-the-agent.html might be worthwhile as well.
OTel does this in @otel/instrumentation:
_warnOnPreloadedModules() {
this._modules.forEach((module) => {
const { name } = module;
try {
const resolvedModule = require.resolve(name);
if (require.cache[resolvedModule]) {
// Module is already cached, which means the instrumentation hook might not work
this._diag.warn(`Module ${name} has been loaded before ${this.instrumentationName} so it might not work, please initialize it before requiring ${name}`);
}
}
catch (_a) {
// Module isn't available, we can simply skip
}
});
}
modules we instrument that we also load in the agent
Here is a quick patch to index.js to list the modules that we instrument that we also load as part of the APM agent.
diff --git a/index.js b/index.js
index 04711888..8899840d 100644
--- a/index.js
+++ b/index.js
@@ -6,6 +6,44 @@
'use strict'
+function intersection (setA, setB) {
+ const _intersection = new Set()
+ for (const elem of setB) {
+ if (setA.has(elem)) {
+ _intersection.add(elem)
+ }
+ }
+ return _intersection
+}
+
+var modPathsBefore = new Set(Object.keys(require.cache))
+
var Agent = require('./lib/agent')
module.exports = new Agent()
+
+var modPathsAfter = new Set(Object.keys(require.cache))
+function difference (setA, setB) {
+ const _difference = new Set(setA)
+ for (const elem of setB) {
+ _difference.delete(elem)
+ }
+ return _difference
+}
+var diff = difference(modPathsAfter, modPathsBefore)
+// console.log('XXX modPaths loaded:', diff)
+
+const moduleDetailsFromPath = require('module-details-from-path')
+var modNames = new Set()
+diff.forEach(p => {
+ var details = moduleDetailsFromPath(p)
+ if (details) {
+ modNames.add(details.name)
+ }
+})
+// console.log('XXX modNames: ', modNames)
+
+var modsWeInstrument = new Set(require('./lib/instrumentation').modules.flat())
+
+const inter = intersection(modNames, modsWeInstrument)
+console.log('XXX modsWeInstrument that we have loaded: ', inter)
Currently that includes:
XXX modsWeInstrument that we have loaded: Set(2) { '@opentelemetry/api', '@opentelemetry/sdk-metrics' }
This is relevant for modNamesWeRequireOurself below.
first stab
diff --git a/lib/instrumentation/index.js b/lib/instrumentation/index.js
index b721b064..b7c06ff7 100644
--- a/lib/instrumentation/index.js
+++ b/lib/instrumentation/index.js
@@ -228,6 +228,28 @@ Instrumentation.prototype.start = function (runContextClass) {
}
}
+ const modNamesWeRequireOurself = new Set(['@opentelemetry/api', '@opentelemetry/sdk-metrics'])
+ const modNamesAlreadyRequired = new Set()
+ this._patches.keys.forEach(modName => {
+ if (modNamesWeRequireOurself.has(modName)) {
+ return
+ }
+ try {
+ const resolvePath = require.resolve(modName)
+ if (require.cache[resolvePath]) {
+ modNamesAlreadyRequired.add(modName)
+ }
+ } catch (_resolveErr) {
+ // skip
+ }
+ })
+ if (modNamesAlreadyRequired.size > 0) {
+ this._log.warn('modules have been `require`d before the APM agent started, ' +
+ 'instrumentation might not work for them: %s ' +
+ '(see https://www.elastic.co/guide/en/apm/agent/nodejs/current/starting-the-agent.html)',
+ Array.from(modNamesAlreadyRequired).join(', '))
+ }
+
this._runCtxMgr.enable()
this._startHook()
This results in a warning like the following in a script that requires "express" before starting the APM agent:
[2023-06-09T17:22:33.240Z] WARN (elastic-apm-node): modules have been `require`d before the APM agent started, instrumentation might not work for them: express, finalhandler (see https://www.elastic.co/guide/en/apm/agent/nodejs/current/starting-the-agent.html)
checklist of suggested improvements on this first take
- [ ] We should really do https://github.com/elastic/apm-agent-nodejs/issues/2992 first so that, for example, if
@node-redis/client/dist/lib/clientis found to be loaded too early that we can warn about the redis module, or next ifnext/dist/server/api-utils/nodeis loaded too early. This could be done later, but if not, we'll have a confusing warning in some cases that won't be that actionable for the user.
nice to have
It would be nice to be able to warn appropriately for @opentelemetry/{api,sdk-metrics} but doing so would require one of:
- Us using a namespaced (i.e. slightly forked?) copy of those modules.
- Us removing those modules from
require.cacheafter we load them. - Instrumenting
requireafter we load the agent, but before we.start().
Of those, option (2) might be easiest. I'm not sure the other two are worth the complexity.