undici icon indicating copy to clipboard operation
undici copied to clipboard

bench: add websockets

Open tsctx opened this issue 9 months ago • 9 comments

Part of #3201

tsctx avatar May 05 '24 10:05 tsctx

Codecov Report

All modified and coverable lines are covered by tests :white_check_mark:

Project coverage is 94.17%. Comparing base (5d54543) to head (53de211). Report is 1 commits behind head on main.

Additional details and impacted files
@@           Coverage Diff           @@
##             main    #3203   +/-   ##
=======================================
  Coverage   94.17%   94.17%           
=======================================
  Files          90       90           
  Lines       24432    24432           
=======================================
  Hits        23009    23009           
  Misses       1423     1423           

:umbrella: View full report in Codecov by Sentry.
:loudspeaker: Have feedback on the report? Share it here.

codecov-commenter avatar May 05 '24 10:05 codecov-commenter

To have reliable benchmarks you should use two processes, one for the server and one for the client and ensure that the server is faster than the client, otherwise you might end up benchmarking the server instead of the client. See the discussion in https://github.com/nodejs/node/pull/50586.

lpinca avatar May 05 '24 11:05 lpinca

Could I help with that pr somehow @tsctx ?

DarkGL avatar May 31 '24 11:05 DarkGL

> $ node ./benchmarks/websocket-benchmark.mjs
(node:9028) [UNDICI-WSS] Warning: WebSocketStream is experimental! Expect it to change at any time.
(Use `node --trace-warnings ...` to show where the warning was created)
undici [binary]: transferred 102.46MiB/s
undici [string]: transferred 99.43MiB/s
undici - stream [binary]: transferred 95.38MiB/s
undici - stream [string]: transferred 86.72MiB/s
ws [binary]: transferred 100.69MiB/s
ws [string]: transferred 95.50MiB/s

tsctx avatar Oct 01 '24 12:10 tsctx

@tsctx I'm a bit skeptical about the results

// server.js
const uws = require('uWebSockets.js');
const app = uws.App();

app.ws('/*', {
  compression: uws.DISABLED,
  maxPayloadLength: 512 * 1024 * 1024,
  maxBackpressure: 128 * 1024,
  message: (ws, message, isBinary) => {
    ws.send(message, isBinary);
  }
});

app.listen(8080, (listenSocket) => {
  if (listenSocket) {
    console.log('Server listening to port 8080');
  }
});
// ws-client.js
'use strict';

const { WebSocket } = require('ws');

const messages = +process.argv[2];
const payloadLength = +process.argv[3];
const data = Buffer.alloc(payloadLength, '_');

const ws = new WebSocket('ws://127.0.0.1:8080');

ws.binaryType = 'arraybuffer';

ws.on('open', function () {
  console.time(`${messages} messages of ${payloadLength} bytes`);
  ws.send(data);
});

let count = 0;

ws.on('message', function () {
  if (++count === messages) {
    console.timeEnd(`${messages} messages of ${payloadLength} bytes`);
    ws.close();
  } else {
    ws.send(data);
  }
});
// undici-client.js
'use strict';

const messages = +process.argv[2];
const payloadLength = +process.argv[3];
const data = Buffer.alloc(payloadLength, '_');

const ws = new WebSocket('ws://127.0.0.1:8080');

ws.binaryType = 'arraybuffer';

ws.addEventListener('open', function () {
  console.time(`${messages} messages of ${payloadLength} bytes`);
  ws.send(data);
});

let count = 0;

ws.addEventListener('message', function () {
  if (++count === messages) {
    console.timeEnd(`${messages} messages of ${payloadLength} bytes`);
    ws.close();
  } else {
    ws.send(data);
  }
});
$ node ws-client.js 100000 125
100000 messages of 125 bytes: 8.972s
$ node undici-client.js 100000 125
100000 messages of 125 bytes: 9.446s

$ node ws-client.js 100000 1024
100000 messages of 1024 bytes: 9.474s
$ node undici-client.js 100000 1024
100000 messages of 1024 bytes: 10.430s

$ node ws-client.js 100000 262144
100000 messages of 262144 bytes: 1:33.381 (m:ss.mmm)
$ node undici-client.js 100000 262144
100000 messages of 262144 bytes: 3:06.370 (m:ss.mmm)

This is without binary addons.

lpinca avatar Oct 01 '24 16:10 lpinca

Same here, but I'm happily surprised undici is that close in both benchmarks (other than very large messages).

KhafraDev avatar Oct 01 '24 16:10 KhafraDev

actually, masking seem to be the performance bottleneck, which I could improve based on this benchmarks...

Uzlopak avatar Oct 01 '24 17:10 Uzlopak

diff --git a/benchmarks/websocket/generate-mask.mjs b/benchmarks/websocket/generate-mask.mjs
index 032f05d8..c74cab08 100644
--- a/benchmarks/websocket/generate-mask.mjs
+++ b/benchmarks/websocket/generate-mask.mjs
@@ -1,20 +1,8 @@
-import { randomFillSync, randomBytes } from 'node:crypto'
+import { randomBytes } from 'node:crypto'
 import { bench, group, run } from 'mitata'
+import { generateMask } from "../../lib/web/websocket/frame.js"
 
-const BUFFER_SIZE = 16384
-
-const buf = Buffer.allocUnsafe(BUFFER_SIZE)
-let bufIdx = BUFFER_SIZE
-
-function generateMask () {
-  if (bufIdx === BUFFER_SIZE) {
-    bufIdx = 0
-    randomFillSync(buf, 0, BUFFER_SIZE)
-  }
-  return [buf[bufIdx++], buf[bufIdx++], buf[bufIdx++], buf[bufIdx++]]
-}
-
-group('generate', () => {
+group(function ()  {
   bench('generateMask', () => generateMask())
   bench('crypto.randomBytes(4)', () => randomBytes(4))
 })
diff --git a/lib/web/websocket/frame.js b/lib/web/websocket/frame.js
index e773b33e..c0b5d779 100644
--- a/lib/web/websocket/frame.js
+++ b/lib/web/websocket/frame.js
@@ -4,6 +4,8 @@ const { maxUnsigned16Bit, opcodes } = require('./constants')
 
 const BUFFER_SIZE = 8 * 1024
 
+const FIN = /** @type {const} */ (0x80)
+
 /** @type {import('crypto')} */
 let crypto
 let buffer = null
@@ -59,10 +61,7 @@ class WebsocketFrameSend {
 
     const buffer = Buffer.allocUnsafe(bodyLength + offset)
 
-    // Clear first 2 bytes, everything else is overwritten
-    buffer[0] = buffer[1] = 0
-    buffer[0] |= 0x80 // FIN
-    buffer[0] = (buffer[0] & 0xF0) + opcode // opcode
+    buffer[0] = FIN + opcode
 
     /*! ws. MIT License. Einar Otto Stangvik <[email protected]> */
     buffer[offset - 4] = maskKey[0]
@@ -70,21 +69,38 @@ class WebsocketFrameSend {
     buffer[offset - 2] = maskKey[2]
     buffer[offset - 1] = maskKey[3]
 
-    buffer[1] = payloadLength
+    buffer[1] = payloadLength | 0x80
 
-    if (payloadLength === 126) {
-      buffer.writeUInt16BE(bodyLength, 2)
-    } else if (payloadLength === 127) {
-      // Clear extended payload length
-      buffer[2] = buffer[3] = 0
-      buffer.writeUIntBE(bodyLength, 4, 6)
+    if (payloadLength > 125) {
+      if (payloadLength === 126) {
+        buffer.writeUInt16BE(bodyLength, 2)
+      } else if (payloadLength === 127) {
+        // Clear extended payload length
+        buffer[2] = buffer[3] = 0
+        buffer.writeUIntBE(bodyLength, 4, 6)
+      }
     }
 
-    buffer[1] |= 0x80 // MASK
+    const rest = bodyLength & 3
+    const p4 = bodyLength - rest
 
+    let i = 0
     // mask body
-    for (let i = 0; i < bodyLength; ++i) {
-      buffer[offset + i] = frameData[i] ^ maskKey[i & 3]
+    while (i < p4) {
+      buffer[offset + i] = frameData[i++] ^ maskKey[0]
+      buffer[offset + i] = frameData[i++] ^ maskKey[1]
+      buffer[offset + i] = frameData[i++] ^ maskKey[2]
+      buffer[offset + i] = frameData[i++] ^ maskKey[3]
+      i += 4
+    }
+
+    switch (rest) {
+      case 3:
+        buffer[offset + i + 2] = frameData[i + 2] ^ maskKey[2]
+      case 2:
+        buffer[offset + i + 1] = frameData[i + 1] ^ maskKey[1]
+      case 1:
+        buffer[offset + i] = frameData[i] ^ maskKey[0]
     }
 
     return buffer
@@ -134,5 +150,6 @@ class WebsocketFrameSend {
 }
 
 module.exports = {
+  generateMask,
   WebsocketFrameSend
 }

Uzlopak avatar Oct 01 '24 17:10 Uzlopak

@lpinca There was a difference in performance due to the number of iterations, and increasing the number to 512 would have produced more correct results.

> $ node ./benchmarks/websocket-benchmark.mjs
(node:3264) [UNDICI-WSS] Warning: WebSocketStream is experimental! Expect it to change at any time.
(Use `node --trace-warnings ...` to show where the warning was created)
undici [binary]: transferred 98.04MiB Bytes/s
undici [string]: transferred 99.85MiB Bytes/s
undici - stream [binary]: transferred 100.05MiB Bytes/s
undici - stream [string]: transferred 94.04MiB Bytes/s
ws [binary]: transferred 119.42MiB Bytes/s
ws [string]: transferred 111.87MiB Bytes/s

tsctx avatar Oct 02 '24 10:10 tsctx