mp4box.js icon indicating copy to clipboard operation
mp4box.js copied to clipboard

tx3g codec for texttrack: sometimes starts with strange characters

Open publicocean0 opened this issue 8 years ago • 3 comments

if (sample.description.type === "mett" || sample.description.type === "sbtt" || sample.description.type === "stxt"||sample.description.type === "tx3g") { sampleParser = new Textin4Parser(); if (sample.description.txtC && j===0) { if (sample.description.txtC.config) { } else { sample.description.txtC.config = sampleParser.parseConfig(sample.description.txtC.data); } logger("parser Configuration: ", sample.description.txtC.config); texttrack.config = sample.description.txtC.config; } var textSample = sampleParser.parseSample(sample); logger("parsed text sample at time "+Log.getDurationString(sample.dts,sample.timescale)+" :", textSample); options.onCue({isRap:sample.is_rap,trackId:id,startTime:sample.dts/sample.timescale,endTime:(sample.dts+sample.duration)/sample.timescale,text:textSample});
}

publicocean0 avatar Jul 20 '16 23:07 publicocean0

i found the error

MP4BoxStream.prototype.readString

this conversion is not utf8. Is there string in not utf8 encoding?

in utf8 first 2 bytes are the length of string.

then you can read the bytearray then

arrayBufferToString=function(array) {
    var out, i, len, c;
    var char2, char3;
    out = "";
    len = array.length;
    i = 0;
    while(i > 4)
    { 
      case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
        // 0xxxxxxx
        out += String.fromCharCode(c);
        break;
      case 12: case 13:
        // 110x xxxx   10xx xxxx
        char2 = array[i++];
        out += String.fromCharCode(((c & 0x1F) 

publicocean0 avatar Jul 21 '16 09:07 publicocean0

I confirmed the coding is utf8 . using this function is OK. The question now is

"mett" || sample.description.type === "sbtt" || sample.description.type === "stxt" is correct with the old coding?

publicocean0 avatar Jul 21 '16 11:07 publicocean0

Solved by changing the MP4BoxStream method "readString" to this:

MP4BoxStream.prototype.readString = function(length) {
  if (this.position + length <= this.uint8.length) {
    var s = [];
    for (var i = 0; i < length; i++) {
      s.push(this.readUint8());
    }
    return this.Utf8ArrayToStr(s);
  } else {
    throw ("Not enough bytes in buffer");
  }
}
MP4BoxStream.prototype.Utf8ArrayToStr = function(data) {
  var str = '', i;
  for (i = 2; i < data.length; i++) {
    var value = data[i];
    if (value < 0x80) {
      str += String.fromCharCode(value);
    } else if (value > 0xBF && value < 0xE0) {
      str += String.fromCharCode((value & 0x1F) << 6 | data[i + 1] & 0x3F);
      i += 1;
    } else if (value > 0xDF && value < 0xF0) {
      str += String.fromCharCode((value & 0x0F) << 12 | (data[i + 1] & 0x3F) << 6 | data[i + 2] & 0x3F);
      i += 2;
    } else {
      // surrogate pair
      var charCode = ((value & 0x07) << 18 | (data[i + 1] & 0x3F) << 12 | (data[i + 2] & 0x3F) << 6 | data[i + 3] & 0x3F) - 0x010000;
      str += String.fromCharCode(charCode >> 10 | 0xD800, charCode & 0x03FF | 0xDC00);
      i += 3;
    }
  }
  return str;
}

jhintringer avatar Mar 19 '18 13:03 jhintringer