Crash when using the REQ-ROUTER-ROUTER-REQ example from the docs
Describe the bug Node crashes somewhere in its memory management routines (stacktrace below).
worker waiting
client sent
backend
front <Buffer 00 6b 8b 45 67>
backend <Buffer 00 e4 3c 98 69>
worker received: X request=1
worker waiting
backend <Buffer 00 e4 3c 98 69>
client received: X request=1: reply 1
client sent
front <Buffer 00 6b 8b 45 67>
worker received: X request=1: reply 1 request=2
worker waiting
backend <Buffer 00 e4 3c 98 69>
client received: X request=1: reply 1 request=2: reply 2
client sent
front <Buffer 00 6b 8b 45 67>
worker received: X request=1: reply 1 request=2: reply 2 request=3
worker waiting
backend <Buffer 00 e4 3c 98 69>
client received: X request=1: reply 1 request=2: reply 2 request=3: reply 3
client sent
front <Buffer 00 6b 8b 45 67>
worker received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4
worker waiting
backend <Buffer 00 e4 3c 98 69>
client received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4
client sent
front <Buffer 00 6b 8b 45 67>
worker received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4 request=5
worker waiting
backend <Buffer 00 e4 3c 98 69>
client received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4 request=5: reply 5
client sent
front <Buffer 00 6b 8b 45 67>
worker received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4 request=5: reply 5 request=6
worker waiting
backend <Buffer 00 e4 3c 98 69>
client received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4 request=5: reply 5 request=6: reply 6
client sent
front <Buffer 00 6b 8b 45 67>
worker received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4 request=5: reply 5 request=6: reply 6 request=7
worker waiting
backend <Buffer 00 e4 3c 98 69>
#
# Fatal error in , line 0
# Check failed: result.second.
#
#
#
#FailureMessage Object: 0x7ffea726d000
1: 0xb6f151 [node]
2: 0x1bf56f4 V8_Fatal(char const*, ...) [node]
3: 0xfc3f61 v8::internal::GlobalBackingStoreRegistry::Register(std::shared_ptr<v8::internal::BackingStore>) [node]
4: 0xd151c8 v8::ArrayBuffer::GetBackingStore() [node]
5: 0xab71b0 napi_get_typedarray_info [node]
6: 0x7fe25a5410d7 [/home/kirill/Sandboxes/github/zeromq/node_modules/zeromq/prebuilds/linux-x64/node.napi.glibc.node]
7: 0x7fe25a542c70 zmq::IncomingMsg::IntoBuffer(Napi::Env const&) [/home/kirill/Sandboxes/github/zeromq/node_modules/zeromq/prebuilds/linux-x64/node.napi.glibc.node]
8: 0x7fe25a538944 zmq::Socket::Poller::ReadableCallback() [/home/kirill/Sandboxes/github/zeromq/node_modules/zeromq/prebuilds/linux-x64/node.napi.glibc.node]
9: 0x7fe25a538b75 [/home/kirill/Sandboxes/github/zeromq/node_modules/zeromq/prebuilds/linux-x64/node.napi.glibc.node]
10: 0x7fe25a5475ef zmq::Poller<zmq::Socket::Poller>::Callback(uv_poll_s*, int, int) [/home/kirill/Sandboxes/github/zeromq/node_modules/zeromq/prebuilds/linux-x64/node.napi.glibc.node]
11: 0x156c804 [node]
12: 0x155acc8 uv_run [node]
13: 0xa3ffe5 node::SpinEventLoop(node::Environment*) [node]
14: 0xb45056 node::NodeMainInstance::Run(node::EnvSerializeInfo const*) [node]
15: 0xac67e2 node::Start(int, char**) [node]
16: 0x7fe26925db75 __libc_start_main [/lib64/libc.so.6]
17: 0xa3bfbc [node]
[1] 696352 trace trap (core dumped) node test1.js 2>&1 > out
* thread #1, name = 'node', stop reason = signal SIGTRAP
* frame #0: 0x0000000001bff8b2 node`v8::base::OS::Abort() + 18
frame #1: 0x0000000001bf5705 node`V8_Fatal(char const*, ...) + 357
frame #2: 0x0000000000fc3f61 node`v8::internal::GlobalBackingStoreRegistry::Register(std::shared_ptr<v8::internal::BackingStore>) + 657
frame #3: 0x0000000000d151c8 node`v8::ArrayBuffer::GetBackingStore() + 88
frame #4: 0x0000000000ab71b0 node`napi_get_typedarray_info + 256
frame #5: 0x00007ffff4f6a0d7 node.napi.glibc.node`___lldb_unnamed_symbol3871 + 87
frame #6: 0x00007ffff4f6bc70 node.napi.glibc.node`zmq::IncomingMsg::IntoBuffer(Napi::Env const&) + 416
frame #7: 0x00007ffff4f61944 node.napi.glibc.node`zmq::Socket::Poller::ReadableCallback() + 308
frame #8: 0x00007ffff4f61b75 node.napi.glibc.node`___lldb_unnamed_symbol3809 + 101
frame #9: 0x00007ffff4f705ef node.napi.glibc.node`zmq::Poller<zmq::Socket::Poller>::Callback(uv_poll_s*, int, int) + 143
frame #10: 0x000000000156c804 node`uv__io_poll(loop=0x0000000004883c20, timeout=<unavailable>) at epoll.c:374:11
frame #11: 0x000000000155acc8 node`uv_run(loop=0x0000000004883c20, mode=UV_RUN_DEFAULT) at core.c:389:5
frame #12: 0x0000000000a3ffe5 node`node::SpinEventLoop(node::Environment*) + 309
frame #13: 0x0000000000b45056 node`node::NodeMainInstance::Run(node::EnvSerializeInfo const*) + 406
frame #14: 0x0000000000ac67e2 node`node::Start(int, char**) + 546
frame #15: 0x00007ffff7a57b75 libc.so.6`__libc_start_main + 213
frame #16: 0x0000000000a3bfbc node`_start + 41
Reproducing
const zmq = require("zeromq");
async function router() {
let frontend = new zmq.Router();
let backend = new zmq.Router();
await frontend.bind("inproc://front");
await backend.bind("inproc://back");
let workers = [];
let f3 = (async () => {
for await (let [identity, dummy, msg] of frontend) {
console.log("front", identity);
while (workers.length == 0)
await new Promise(r => setTimeout(r, 10));
await backend.send([workers[0], [], identity, [], msg]);
workers = workers.slice(1);
}
})();
let f4 = (async () => {
console.log("backend");
for await (let [worker, dummy, ...msg] of backend) {
workers.push(worker);
console.log("backend", worker);
if (msg.length == 1 && msg[0].length == 0) continue;
await frontend.send(msg);
}
})();
await Promise.all([f3, f4]);
}
async function worker() {
let worker = new zmq.Request();
worker.connect("inproc://back");
await worker.send("");
for (let q = 1; ; q++) {
console.log("worker waiting");
let [identity, dummy, msg] = await worker.receive();
console.log(`worker received: ${msg}`);
await worker.send([identity, dummy, `${msg}: reply ${q}`]);
}
}
async function client() {
let client = new zmq.Request();
await client.connect("inproc://front");
let msg = "X";
for (let q = 1; ; q++) {
await client.send(msg + ` request=${q}`);
console.log("client sent");
[msg] = await client.receive();
msg = msg.toString("utf8");
console.log(`client received: ${msg}`);
}
}
(async () => {
await Promise.all([router(), client(), worker()]);
})();
Expected behavior not crash
Tested on
- OS:
Linux newton 5.13.4-200.fc34.x86_64 #1 SMP Tue Jul 20 20:27:29 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux - ZeroMQ.js version:
[email protected]
The only think I could find was a similar issue in grpc (https://github.com/grpc/grpc-node/issues/1407), there are also some links there. It seems node at some point decided to no longer allow aliasing of ArrayBuffer pointers, but I'm not completely sure because I use node quite rarely.
I also notice that the crash happens exactly at around the message size of zero_copy_threshold (https://github.com/zeromq/zeromq.js/blob/e2821f002eebec2428e83614d8b5309388c79342/src/incoming_msg.cc#L29)
Here is the source for that example in the zmq guide: https://zguide.zeromq.org/docs/chapter3/#A-Load-Balancing-Message-Broker
Node runs out of memory because you are concatenating the reply string in the request.
await worker.send([identity, dummy, `${msg}: reply ${q}`]);
if you replace that line (41) with:
await worker.send([identity, dummy, `reply ${q}`]);
It runs fine. I don't think this is a zeromq/req/router bug.
You can see the messages explicitly in the output, they don't reach even 1KB.
The output stream is maybe way behind (so the string is already a lot bigger). If you want to know for sure place in a delay so you are sure the output is flushed. Anyway, if your string gets bigger in an unlimited loop you can be sure to get a crash sooner or later.
Here is the output of strace -e trace=memory node test1.js, you can see that no large mmap calls happen around the time it crashes. Furthermore, it only takes 0.04s to crash, so I find it hard to believe it node+zmq would manage to allocate and write 32GB in that time.
... truncated, starting around the first line of console.log
worker waiting
client sent
backend
mmap(0x3ceb3a600000, 520192, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x3ceb3a600000
munmap(0x3ceb3a640000, 258048) = 0
mprotect(0x3ceb3a600000, 262144, PROT_READ|PROT_WRITE) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
backend: { workers: [ <Buffer 00 e4 3c 98 69> ] }
1
worker received: X request=1
worker waiting
client received: X request=1: reply 1
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
client sent
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
1
worker received: X request=1: reply 1 request=2
worker waiting
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
client received: X request=1: reply 1 request=2: reply 2
client sent
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
1
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
worker received: X request=1: reply 1 request=2: reply 2 request=3
worker waiting
client received: X request=1: reply 1 request=2: reply 2 request=3: reply 3
client sent
1
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
worker received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4
worker waiting
client received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4
client sent
1
worker received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4 request=5
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
worker waiting
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
client received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4 request=5: reply 5
client sent
1
worker received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4 request=5: reply 5 request=6
worker waiting
client received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4 request=5: reply 5 request=6: reply 6
client sent
mmap(0x8e1adc00000, 520192, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x8e1adc00000
munmap(0x8e1adc40000, 258048) = 0
mprotect(0x8e1adc00000, 262144, PROT_READ|PROT_WRITE) = 0
mmap(0x24c2f2f40000, 520192, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x24c2f2f40000
munmap(0x24c2f2f80000, 258048) = 0
mprotect(0x24c2f2f40000, 262144, PROT_READ|PROT_WRITE) = 0
mmap(0x1c5ee7f00000, 520192, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x1c5ee7f00000
munmap(0x1c5ee7f40000, 258048) = 0
mprotect(0x1c5ee7f00000, 262144, PROT_READ|PROT_WRITE) = 0
mmap(0x225b26800000, 520192, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x225b26800000
munmap(0x225b26840000, 258048) = 0
mprotect(0x225b26800000, 262144, PROT_READ|PROT_WRITE) = 0
1
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
mprotect(0x7feae0043000, 245760, PROT_READ|PROT_EXEC) = 0
worker received: X request=1: reply 1 request=2: reply 2 request=3: reply 3 request=4: reply 4 request=5: reply 5 request=6: reply 6 request=7
worker waiting
#
# Fatal error in , line 0
# Check failed: result.second.
#
#
#
#FailureMessage Object: 0x7fff753609e0
1: 0xb6f151 [node]
2: 0x1bf56f4 V8_Fatal(char const*, ...) [node]
3: 0xfc3f61 v8::internal::GlobalBackingStoreRegistry::Register(std::shared_ptr<v8::internal::BackingStore>) [node]
4: 0xd151c8 v8::ArrayBuffer::GetBackingStore() [node]
5: 0xab71b0 napi_get_typedarray_info [node]
6: 0x7feaedb120d7 [/home/kirill/Sandboxes/SportsModelling/crawler/zeromq-testing/node_modules/zeromq/prebuilds/linux-x64/node.napi.glibc.node]
7: 0x7feaedb13c70 zmq::IncomingMsg::IntoBuffer(Napi::Env const&) [/home/kirill/Sandboxes/SportsModelling/crawler/zeromq-testing/node_modules/zeromq/prebuilds/linux-x64/node.napi.glibc.node]
8: 0x7feaedb09944 zmq::Socket::Poller::ReadableCallback() [/home/kirill/Sandboxes/SportsModelling/crawler/zeromq-testing/node_modules/zeromq/prebuilds/linux-x64/node.napi.glibc.node]
9: 0x7feaedb09b75 [/home/kirill/Sandboxes/SportsModelling/crawler/zeromq-testing/node_modules/zeromq/prebuilds/linux-x64/node.napi.glibc.node]
10: 0x7feaedb185ef zmq::Poller<zmq::Socket::Poller>::Callback(uv_poll_s*, int, int) [/home/kirill/Sandboxes/SportsModelling/crawler/zeromq-testing/node_modules/zeromq/prebuilds/linux-x64/node.napi.glibc.node]
11: 0x156c804 [node]
12: 0x155acc8 uv_run [node]
13: 0xa3ffe5 node::SpinEventLoop(node::Environment*) [node]
14: 0xb45056 node::NodeMainInstance::Run(node::EnvSerializeInfo const*) [node]
15: 0xac67e2 node::Start(int, char**) [node]
16: 0x7feaf05ffb75 __libc_start_main [/lib64/libc.so.6]
17: 0xa3bfbc [node]
--- SIGTRAP {si_signo=SIGTRAP, si_code=SI_KERNEL} ---
+++ killed by SIGTRAP (core dumped) +++