node-datachannel icon indicating copy to clipboard operation
node-datachannel copied to clipboard

Incoming DataChannels do not always close which leaks memory

Open achingbrain opened this issue 7 months ago • 7 comments

I've noticed that sometimes incoming DataChannels do not always emit a close event and/or get garbage collected which leaks memory. It happens when closing PeerConnections with open channels, but also with channels that have been closed explicitly.

Here's a reproduction. It creates two RTCPeerConnections and connects them together. One PeerConnection opens a number of channels and sends data over them, the other just echoes any data sent back over any channels that have been opened. When all the data has been sent and received the connections are closed and the event loop should exit.

It doesn't happen every time but if you use until-death to run the script repeatedly it'll fail to exit eventually and why-is-node-running will then print the open handles that are stopping node from exiting.

import why from 'why-is-node-running'
import { RTCPeerConnection } from 'node-datachannel/polyfill'

process.stdout.write('.')

// how many channels to open
const numChannels = 20

// how much data to send on each one
const send = 1024 * 1024

// the chunk size to send - needs to divide `send` with no remainder
const chunkSize = 1024

const peer1 = new RTCPeerConnection()
const peer2 = new RTCPeerConnection()

// track channel status
const channelStatus = {}

// echo any data back to the sender
peer2.addEventListener('datachannel', (evt) => {
  const channel = evt.channel
  const label = channel.label

  channelStatus[`incoming-${label}`] = 'open'

  channel.addEventListener('message', (evt) => {
    channel.send(evt.data)
  })
  channel.addEventListener('close', (evt) => {
    delete channelStatus[`incoming-${label}`]
  })
})

const channels = []

// create channels
for (let i = 0; i < numChannels; i++) {
  channels.push(peer1.createDataChannel(`c-${i}`))
}

// ensure peers are connected
await connectPeers(peer1, peer2)

// send data over each channel in parallel
await Promise.all(
  channels.map(async channel => {
    channel.binaryType = 'arraybuffer'
    const label = channel.label

    await isOpen(channel)

    channelStatus[`outgoing-${label}`] = 'open'

    // send data and wait for it to be echoed back
    return new Promise((resolve, reject) => {
      let received = 0
      let sent = 0

      channel.addEventListener('message', (evt) => {
        received += evt.data.byteLength

        // all data has been echoed back to us
        if (received === send) {
          // this makes no difference
          channel.close()
          resolve()
        }
      })

      channel.addEventListener('close', (evt) => {
        delete channelStatus[`outgoing-${label}`]
      })

      while (sent !== send) {
        channel.send(new Uint8Array(chunkSize))
        sent += chunkSize
      }
    })
  })
)

// close connections
peer1.close()
peer2.close()

// print open handles after 5s - unref so this timeout doesn't keep the event loop running
setTimeout(() => {
  console.info('\n-- channels')
  console.info(JSON.stringify(channelStatus, null, 2))
  console.info('')

  why()
}, 5_000).unref()

export async function connectPeers (peer1, peer2) {
  const peer1Offer = await peer1.createOffer();
  await peer2.setRemoteDescription(peer1Offer);

  const peer2Answer = await peer2.createAnswer();
  await peer1.setRemoteDescription(peer2Answer);

  peer1.addEventListener('icecandidate', (e) => {
    peer2.addIceCandidate(e.candidate);
  });

  peer2.addEventListener('icecandidate', (e) => {
    peer1.addIceCandidate(e.candidate);
  });

  await Promise.all([
    isConnected(peer1),
    isConnected(peer2)
  ])
}

async function isConnected (peer) {
  return new Promise((resolve, reject) => {
    peer.addEventListener('connectionstatechange', () => {
      if (peer.connectionState === 'connected') {
        resolve()
      }

      if (peer.connectionState === 'failed') {
        reject(new Error('Connection failed'))
      }
    })
  })
}

async function isOpen (dc) {
  if (dc.readyState === 'open') {
    return
  }

  return new Promise((resolve, reject) => {
    dc.addEventListener('open', () => {
      resolve()
    })
    dc.addEventListener('error', () => {
      reject(new Error('Channel error'))
    })
  })
}
% npx until-death node index.js
....
-- channels
{
  "incoming-c-0": "open"
}

There are 4 handle(s) keeping the process running.

# TTYWRAP
index.js:4 - process.stdout.write('.')

# ThreadSafeCallback callback
node_modules/node-datachannel/dist/esm/polyfill/RTCDataChannel.mjs:45    - __privateGet(this, _dataChannel).onClosed(() => {
node_modules/node-datachannel/dist/esm/polyfill/RTCPeerConnection.mjs:92 - const dc = new RTCDataChannel(channel);

# TTYWRAP
(unknown stack trace)

# TickObject
(unknown stack trace)

achingbrain avatar Apr 16 '25 06:04 achingbrain

I can confirm this.

mertushka avatar Apr 17 '25 19:04 mertushka

@murat-dogan @paullouisageneau do you have any thoughts on this?

achingbrain avatar May 01 '25 09:05 achingbrain

Hi @achingbrain ,

I will take a look this weekend.

murat-dogan avatar May 02 '25 14:05 murat-dogan

Here are the referenced issues that may be relevant:

https://github.com/mertushka/haxball.js/issues/49 https://github.com/mertushka/haxball.js/issues/53

Also, the <DataChannel>.readyState inconsistency caused by this race condition—which I identified four months ago: https://github.com/murat-dogan/node-datachannel/issues/326

mertushka avatar May 02 '25 15:05 mertushka

Thank you @achingbrain for the example code.

It seems there is a race condition in libdatachannel. I opened a PR.

Could you please also test it?

Maybe we can create a folder in the test part, to save @achingbrain code for future reference and testing.

murat-dogan avatar May 03 '25 16:05 murat-dogan

Looks like @paullouisageneau merged your PR, I'll wait for it to be released before spending more time here.

As a side benefit if he performs a release it will mean we can finally land #256. It would be nice to get that in before the PR celebrates it's first birthday.

achingbrain avatar May 07 '25 08:05 achingbrain

I have published a new version including the latest changes https://github.com/murat-dogan/node-datachannel/releases/tag/v0.28.0

murat-dogan avatar Jun 09 '25 15:06 murat-dogan