mp4box.js
mp4box.js copied to clipboard
tx3g codec for texttrack: sometimes starts with strange characters
if (sample.description.type === "mett" || sample.description.type === "sbtt" || sample.description.type === "stxt"||sample.description.type === "tx3g") {
sampleParser = new Textin4Parser();
if (sample.description.txtC && j===0) {
if (sample.description.txtC.config) {
} else {
sample.description.txtC.config = sampleParser.parseConfig(sample.description.txtC.data);
}
logger("parser Configuration: ", sample.description.txtC.config);
texttrack.config = sample.description.txtC.config;
}
var textSample = sampleParser.parseSample(sample);
logger("parsed text sample at time "+Log.getDurationString(sample.dts,sample.timescale)+" :", textSample);
options.onCue({isRap:sample.is_rap,trackId:id,startTime:sample.dts/sample.timescale,endTime:(sample.dts+sample.duration)/sample.timescale,text:textSample});
}
i found the error
MP4BoxStream.prototype.readString
this conversion is not utf8. Is there string in not utf8 encoding?
in utf8 first 2 bytes are the length of string.
then you can read the bytearray then
arrayBufferToString=function(array) { var out, i, len, c; var char2, char3; out = ""; len = array.length; i = 0; while(i > 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: // 0xxxxxxx out += String.fromCharCode(c); break; case 12: case 13: // 110x xxxx 10xx xxxx char2 = array[i++]; out += String.fromCharCode(((c & 0x1F)
I confirmed the coding is utf8 . using this function is OK. The question now is
"mett" || sample.description.type === "sbtt" || sample.description.type === "stxt" is correct with the old coding?
Solved by changing the MP4BoxStream method "readString" to this:
MP4BoxStream.prototype.readString = function(length) {
if (this.position + length <= this.uint8.length) {
var s = [];
for (var i = 0; i < length; i++) {
s.push(this.readUint8());
}
return this.Utf8ArrayToStr(s);
} else {
throw ("Not enough bytes in buffer");
}
}
MP4BoxStream.prototype.Utf8ArrayToStr = function(data) {
var str = '', i;
for (i = 2; i < data.length; i++) {
var value = data[i];
if (value < 0x80) {
str += String.fromCharCode(value);
} else if (value > 0xBF && value < 0xE0) {
str += String.fromCharCode((value & 0x1F) << 6 | data[i + 1] & 0x3F);
i += 1;
} else if (value > 0xDF && value < 0xF0) {
str += String.fromCharCode((value & 0x0F) << 12 | (data[i + 1] & 0x3F) << 6 | data[i + 2] & 0x3F);
i += 2;
} else {
// surrogate pair
var charCode = ((value & 0x07) << 18 | (data[i + 1] & 0x3F) << 12 | (data[i + 2] & 0x3F) << 6 | data[i + 3] & 0x3F) - 0x010000;
str += String.fromCharCode(charCode >> 10 | 0xD800, charCode & 0x03FF | 0xDC00);
i += 3;
}
}
return str;
}