Memory leak when calling decode_arithmetic()
Currently using @farcaster/shuttle and experiencing a gnarly memory leak. I loaded up a heapdump and narrowed it down to a dynamically growing array or possibly a few variants.
export function decode_arithmetic(bytes) {
let pos = 0;
function u16() { return (bytes[pos++] << 8) | bytes[pos++]; }
// decode the frequency table
let symbol_count = u16();
let total = 1;
let acc = [0, 1]; // first symbol has frequency 1
for (let i = 1; i < symbol_count; i++) {
acc.push(total += u16()); // Potential memory growth issue
}
// skip the sized-payload that the last 3 symbols index into
let skip = u16();
let pos_payload = pos;
pos += skip;
let read_width = 0;
let read_buffer = 0;
function read_bit() {
if (read_width == 0) {
read_buffer = (read_buffer << 8) | bytes[pos++]; // Potential out-of-bounds read
read_width = 8;
}
return (read_buffer >> --read_width) & 1;
}
const N = 31;
const FULL = 2**N;
const HALF = FULL >>> 1;
const QRTR = HALF >> 1;
const MASK = FULL - 1;
// fill register
let register = 0;
for (let i = 0; i < N; i++) register = (register << 1) | read_bit();
let symbols = []; // Potential memory growth issue
let low = 0;
let range = FULL; // treat like a float
while (true) {
let value = Math.floor((((register - low + 1) * total) - 1) / range);
let start = 0;
let end = symbol_count;
while (end - start > 1) { // binary search loop
let mid = (start + end) >>> 1;
if (value < acc[mid]) {
end = mid;
} else {
start = mid;
}
}
if (start == 0) break; // first symbol is end mark
symbols.push(start); // Potential memory growth issue
let a = low + Math.floor(range * acc[start] / total);
let b = low + Math.floor(range * acc[start+1] / total) - 1
while (((a ^ b) & HALF) == 0) {
register = (register << 1) & MASK | read_bit();
a = (a << 1) & MASK;
b = (b << 1) & MASK | 1;
}
while (a & ~b & QRTR) {
register = (register & HALF) | ((register << 1) & (MASK >>> 1)) | read_bit();
a = (a << 1) ^ HALF;
b = ((b ^ HALF) << 1) | HALF | 1;
}
low = a;
range = 1 + b - a;
}
let offset = symbol_count - 4;
return symbols.map(x => { // Potential out-of-bounds access
switch (x - offset) {
case 3: return offset + 0x10100 + ((bytes[pos_payload++] << 16) | (bytes[pos_payload++] << 8) | bytes[pos_payload++]);
case 2: return offset + 0x100 + ((bytes[pos_payload++] << 8) | bytes[pos_payload++]);
case 1: return offset + bytes[pos_payload++];
default: return x - 1;
}
});
}
Steps to reproduce:
- Run https://github.com/christopherwxyz/enterprise-shuttle in a local environment (happy to share this on a call since setup is non-trivial).
- Observe heapdump after 5 minutes of run.
- Log out to Chrome devtools.
That function shouldn't run more than once per blob, of which there are 2.
Let me see if I can run this.
There's not enough information here for me to run this, can you provide steps?
- Not sure if it's minimally reproducible. It's being called from enterprise-shuttle -> @farcaster/hub-nodejs -> @farcaster/core -> viem -> ens-normalizer
- You'll need to run a shuttle instance and let it expand the dynamic array out to a size large enough to prevent garbage collection.
- I ran this in the dockerfile container locally, then heapdump to a local file to analyze.
I temporarily added an deinit() function and called init(); deinit(); in a loop while observing process.memoryUsage() and see no leak. IMO, it very unlikely that there is a leak at this level, as I'm only building dag-like structures with Set, Map, and primitives. I also took special care to avoid large spreads to avoid JS engine limitations.
However, this library does have a non-zero setup cost to decompress the full spec and build the appropriate structures. Is your project (or some part of it) using a short-lived worker?
Can you provide me a zip or the JS bundle of whatever code is actually being executed? Possibly the inline base64 blob is getting mangled during your build process? decode_arithmetic() will certainty do something silly if the incoming data is non-sense (eg. zip-bomb equivalent expansion)