FastestSmallestTextEncoderDecoder icon indicating copy to clipboard operation
FastestSmallestTextEncoderDecoder copied to clipboard

encodeInto Implementation does not pass web platform tests for encodeInto

Open marcusdarmstrong opened this issue 2 years ago • 3 comments

We noticed encoding issues using this library in production, and after some investigation it turns out there's a correctness issue in the library. I took the official WPT testcases and threw a loose harness around them to make them runnable:

// META: global=window,worker
// META: script=/common/sab.js
delete TextEncoder;
require('fastestsmallesttextencoderdecoder-encodeinto/EncoderDecoderTogether.min');
self = globalThis;

function createBuffer(t, s) {
  return new ArrayBuffer(s);
}

function assert_equals(a, b) {
  if (a !== b) {
    throw new Error(`Assertion failed: ${a} is not equal to ${b}`);
  }
}

function assert_throws_js(e, c) {
  try {
    c();
  } catch (err) {
    if (!(err instanceof e)) {
      throw err;
    }
  }
}

function test(t, d) {
  try {
    t();
  } catch (e) {
    console.log('Test failed with error', e);
  }
}

[
  {
    input: 'Hi',
    read: 0,
    destinationLength: 0,
    written: [],
  },
  {
    input: 'A',
    read: 1,
    destinationLength: 10,
    written: [0x41],
  },
  {
    input: '\u{1D306}', // "\uD834\uDF06"
    read: 2,
    destinationLength: 4,
    written: [0xf0, 0x9d, 0x8c, 0x86],
  },
  {
    input: '\u{1D306}A',
    read: 0,
    destinationLength: 3,
    written: [],
  },
  {
    input: '\uD834A\uDF06A¥Hi',
    read: 5,
    destinationLength: 10,
    written: [0xef, 0xbf, 0xbd, 0x41, 0xef, 0xbf, 0xbd, 0x41, 0xc2, 0xa5],
  },
  {
    input: 'A\uDF06',
    read: 2,
    destinationLength: 4,
    written: [0x41, 0xef, 0xbf, 0xbd],
  },
  {
    input: '¥¥',
    read: 2,
    destinationLength: 4,
    written: [0xc2, 0xa5, 0xc2, 0xa5],
  },
].forEach((testData) => {
  [
    {
      bufferIncrease: 0,
      destinationOffset: 0,
      filler: 0,
    },
    {
      bufferIncrease: 10,
      destinationOffset: 4,
      filler: 0,
    },
    {
      bufferIncrease: 0,
      destinationOffset: 0,
      filler: 0x80,
    },
    {
      bufferIncrease: 10,
      destinationOffset: 4,
      filler: 0x80,
    },
    {
      bufferIncrease: 0,
      destinationOffset: 0,
      filler: 'random',
    },
    {
      bufferIncrease: 10,
      destinationOffset: 4,
      filler: 'random',
    },
  ].forEach((destinationData) => {
    ['ArrayBuffer', 'SharedArrayBuffer'].forEach((arrayBufferOrSharedArrayBuffer) => {
      test(() => {
        // Setup
        const bufferLength = testData.destinationLength + destinationData.bufferIncrease;
        const destinationOffset = destinationData.destinationOffset;
        const destinationLength = testData.destinationLength;
        const destinationFiller = destinationData.filler;
        const encoder = new TextEncoder();
        const buffer = createBuffer(arrayBufferOrSharedArrayBuffer, bufferLength);
        const view = new Uint8Array(buffer, destinationOffset, destinationLength);
        const fullView = new Uint8Array(buffer);
        const control = new Array(bufferLength);
        let byte = destinationFiller;
        for (let i = 0; i < bufferLength; i++) {
          if (destinationFiller === 'random') {
            byte = Math.floor(Math.random() * 256);
          }
          control[i] = byte;
          fullView[i] = byte;
        }

        // It's happening
        const result = encoder.encodeInto(testData.input, view);

        // Basics
        assert_equals(view.byteLength, destinationLength);
        assert_equals(view.length, destinationLength);

        // Remainder
        assert_equals(result.read, testData.read);
        assert_equals(result.written, testData.written.length);
        for (let i = 0; i < bufferLength; i++) {
          if (i < destinationOffset || i >= destinationOffset + testData.written.length) {
            assert_equals(fullView[i], control[i]);
          } else {
            assert_equals(fullView[i], testData.written[i - destinationOffset]);
          }
        }
      }, 'encodeInto() into ' + arrayBufferOrSharedArrayBuffer + ' with ' + testData.input + ' and destination length ' + testData.destinationLength + ', offset ' + destinationData.destinationOffset + ', filler ' + destinationData.filler);
    });
  });
});

[
  'DataView',
  'Int8Array',
  'Int16Array',
  'Int32Array',
  'Uint16Array',
  'Uint32Array',
  'Uint8ClampedArray',
  'BigInt64Array',
  'BigUint64Array',
  'Float32Array',
  'Float64Array',
].forEach((type) => {
  ['ArrayBuffer', 'SharedArrayBuffer'].forEach((arrayBufferOrSharedArrayBuffer) => {
    test(() => {
      const viewInstance = new self[type](createBuffer(arrayBufferOrSharedArrayBuffer, 0));
      assert_throws_js(TypeError, () => new TextEncoder().encodeInto('', viewInstance));
    }, 'Invalid encodeInto() destination: ' + type + ', backed by: ' + arrayBufferOrSharedArrayBuffer);
  });
});

['ArrayBuffer', 'SharedArrayBuffer'].forEach((arrayBufferOrSharedArrayBuffer) => {
  test(() => {
    assert_throws_js(TypeError, () =>
      new TextEncoder().encodeInto('', createBuffer(arrayBufferOrSharedArrayBuffer, 10)),
    );
  }, 'Invalid encodeInto() destination: ' + arrayBufferOrSharedArrayBuffer);
});

test(() => {
  const buffer = new ArrayBuffer(10),
    view = new Uint8Array(buffer);
  let { read, written } = new TextEncoder().encodeInto('', view);
  assert_equals(read, 0);
  assert_equals(written, 0);
  new MessageChannel().port1.postMessage(buffer, [buffer]);
  ({ read, written } = new TextEncoder().encodeInto('', view));
  assert_equals(read, 0);
  assert_equals(written, 0);
  ({ read, written } = new TextEncoder().encodeInto('test', view));
  assert_equals(read, 0);
  assert_equals(written, 0);
}, 'encodeInto() and a detached output buffer');

This then fails with a bunch of failed a assertions: Test failed with error Error: Assertion failed: 1 is not equal to 2. Commenting out the delete TextEncoder line then allows us to verify that the tests pass when executed with the native TextEncoder.

marcusdarmstrong avatar Dec 20 '22 13:12 marcusdarmstrong

It's specifically failing the read count for this case:

    input: '\u{1D306}', // "\uD834\uDF06"
    read: 2,
    destinationLength: 4,
    written: [0xf0, 0x9d, 0x8c, 0x86],
  },```

marcusdarmstrong avatar Dec 20 '22 13:12 marcusdarmstrong

hi @marcusdarmstrong did you fix this issue or en up using another lib?

landabaso avatar Jan 14 '24 07:01 landabaso

In our case we were using encodeInto out of theoretical correctness rather than need; our underlying stream was being buffered anyway so we simply swapped over to encode.

marcusdarmstrong avatar Jan 14 '24 12:01 marcusdarmstrong