FastestSmallestTextEncoderDecoder icon indicating copy to clipboard operation
FastestSmallestTextEncoderDecoder copied to clipboard

encodeInto Implementation does not pass web platform tests for encodeInto

Open marcusdarmstrong opened this issue 1 year ago • 3 comments

We noticed encoding issues using this library in production, and after some investigation it turns out there's a correctness issue in the library. I took the official WPT testcases and threw a loose harness around them to make them runnable:

// META: global=window,worker
// META: script=/common/sab.js
delete TextEncoder;
require('fastestsmallesttextencoderdecoder-encodeinto/EncoderDecoderTogether.min');
self = globalThis;

function createBuffer(t, s) {
  return new ArrayBuffer(s);
}

function assert_equals(a, b) {
  if (a !== b) {
    throw new Error(`Assertion failed: ${a} is not equal to ${b}`);
  }
}

function assert_throws_js(e, c) {
  try {
    c();
  } catch (err) {
    if (!(err instanceof e)) {
      throw err;
    }
  }
}

function test(t, d) {
  try {
    t();
  } catch (e) {
    console.log('Test failed with error', e);
  }
}

[
  {
    input: 'Hi',
    read: 0,
    destinationLength: 0,
    written: [],
  },
  {
    input: 'A',
    read: 1,
    destinationLength: 10,
    written: [0x41],
  },
  {
    input: '\u{1D306}', // "\uD834\uDF06"
    read: 2,
    destinationLength: 4,
    written: [0xf0, 0x9d, 0x8c, 0x86],
  },
  {
    input: '\u{1D306}A',
    read: 0,
    destinationLength: 3,
    written: [],
  },
  {
    input: '\uD834A\uDF06A¥Hi',
    read: 5,
    destinationLength: 10,
    written: [0xef, 0xbf, 0xbd, 0x41, 0xef, 0xbf, 0xbd, 0x41, 0xc2, 0xa5],
  },
  {
    input: 'A\uDF06',
    read: 2,
    destinationLength: 4,
    written: [0x41, 0xef, 0xbf, 0xbd],
  },
  {
    input: '¥¥',
    read: 2,
    destinationLength: 4,
    written: [0xc2, 0xa5, 0xc2, 0xa5],
  },
].forEach((testData) => {
  [
    {
      bufferIncrease: 0,
      destinationOffset: 0,
      filler: 0,
    },
    {
      bufferIncrease: 10,
      destinationOffset: 4,
      filler: 0,
    },
    {
      bufferIncrease: 0,
      destinationOffset: 0,
      filler: 0x80,
    },
    {
      bufferIncrease: 10,
      destinationOffset: 4,
      filler: 0x80,
    },
    {
      bufferIncrease: 0,
      destinationOffset: 0,
      filler: 'random',
    },
    {
      bufferIncrease: 10,
      destinationOffset: 4,
      filler: 'random',
    },
  ].forEach((destinationData) => {
    ['ArrayBuffer', 'SharedArrayBuffer'].forEach((arrayBufferOrSharedArrayBuffer) => {
      test(() => {
        // Setup
        const bufferLength = testData.destinationLength + destinationData.bufferIncrease;
        const destinationOffset = destinationData.destinationOffset;
        const destinationLength = testData.destinationLength;
        const destinationFiller = destinationData.filler;
        const encoder = new TextEncoder();
        const buffer = createBuffer(arrayBufferOrSharedArrayBuffer, bufferLength);
        const view = new Uint8Array(buffer, destinationOffset, destinationLength);
        const fullView = new Uint8Array(buffer);
        const control = new Array(bufferLength);
        let byte = destinationFiller;
        for (let i = 0; i < bufferLength; i++) {
          if (destinationFiller === 'random') {
            byte = Math.floor(Math.random() * 256);
          }
          control[i] = byte;
          fullView[i] = byte;
        }

        // It's happening
        const result = encoder.encodeInto(testData.input, view);

        // Basics
        assert_equals(view.byteLength, destinationLength);
        assert_equals(view.length, destinationLength);

        // Remainder
        assert_equals(result.read, testData.read);
        assert_equals(result.written, testData.written.length);
        for (let i = 0; i < bufferLength; i++) {
          if (i < destinationOffset || i >= destinationOffset + testData.written.length) {
            assert_equals(fullView[i], control[i]);
          } else {
            assert_equals(fullView[i], testData.written[i - destinationOffset]);
          }
        }
      }, 'encodeInto() into ' + arrayBufferOrSharedArrayBuffer + ' with ' + testData.input + ' and destination length ' + testData.destinationLength + ', offset ' + destinationData.destinationOffset + ', filler ' + destinationData.filler);
    });
  });
});

[
  'DataView',
  'Int8Array',
  'Int16Array',
  'Int32Array',
  'Uint16Array',
  'Uint32Array',
  'Uint8ClampedArray',
  'BigInt64Array',
  'BigUint64Array',
  'Float32Array',
  'Float64Array',
].forEach((type) => {
  ['ArrayBuffer', 'SharedArrayBuffer'].forEach((arrayBufferOrSharedArrayBuffer) => {
    test(() => {
      const viewInstance = new self[type](createBuffer(arrayBufferOrSharedArrayBuffer, 0));
      assert_throws_js(TypeError, () => new TextEncoder().encodeInto('', viewInstance));
    }, 'Invalid encodeInto() destination: ' + type + ', backed by: ' + arrayBufferOrSharedArrayBuffer);
  });
});

['ArrayBuffer', 'SharedArrayBuffer'].forEach((arrayBufferOrSharedArrayBuffer) => {
  test(() => {
    assert_throws_js(TypeError, () =>
      new TextEncoder().encodeInto('', createBuffer(arrayBufferOrSharedArrayBuffer, 10)),
    );
  }, 'Invalid encodeInto() destination: ' + arrayBufferOrSharedArrayBuffer);
});

test(() => {
  const buffer = new ArrayBuffer(10),
    view = new Uint8Array(buffer);
  let { read, written } = new TextEncoder().encodeInto('', view);
  assert_equals(read, 0);
  assert_equals(written, 0);
  new MessageChannel().port1.postMessage(buffer, [buffer]);
  ({ read, written } = new TextEncoder().encodeInto('', view));
  assert_equals(read, 0);
  assert_equals(written, 0);
  ({ read, written } = new TextEncoder().encodeInto('test', view));
  assert_equals(read, 0);
  assert_equals(written, 0);
}, 'encodeInto() and a detached output buffer');

This then fails with a bunch of failed a assertions: Test failed with error Error: Assertion failed: 1 is not equal to 2. Commenting out the delete TextEncoder line then allows us to verify that the tests pass when executed with the native TextEncoder.

marcusdarmstrong avatar Dec 20 '22 13:12 marcusdarmstrong