kaitai_struct_webide
kaitai_struct_webide copied to clipboard
Strings in UTF-16 encodings does not decoded correctly
I develop a KSY for new Windows event log file format (*.evtx). The part of that KSY is utf16_str
type to use in switch-on
type:
types:
utf16_str:
-webide-representation: '{value}'
seq:
- id: value
type: str
encoding: UTF-16LE
size-eos: true
That KSY generates the following code:
var Utf16Str = WindowsEvtxLog.Utf16Str = (function() {
function Utf16Str(_io, _parent, _root) {
this._io = _io;
this._parent = _parent;
this._root = _root || this;
this._read();
}
Utf16Str.prototype._read = function() {
this.value = KaitaiStream.bytesToStr(this._io.readBytesFull(), "UTF-16LE");
}
return Utf16Str;
})();
But in WebIDE (both old and new) the results are one U+FFFD
character:
@Mingun:
But in WebIDE (both old and new) the results are one
U+FFFD
character:
If you look at what bytes the utf16_str
type actually parses by doing:
types:
utf16_str:
-webide-representation: '{value}'
seq:
- id: value
- type: str
- encoding: UTF-16LE
size-eos: true
..., what bytes do you see?
Yes, the problem in that switch-on
not correctly interacts with size-constrained fields. Testcase:
meta:
id: bug
seq:
- id: values
size: 4
type:
switch-on: _index
cases:
0x01: container
repeat: expr
repeat-expr: 2
types:
container:
seq:
- id: field
size-eos: true
Generated code:
// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
define(['kaitai-struct/KaitaiStream'], factory);
} else if (typeof module === 'object' && module.exports) {
module.exports = factory(require('kaitai-struct/KaitaiStream'));
} else {
root.Bug = factory(root.KaitaiStream);
}
}(typeof self !== 'undefined' ? self : this, function (KaitaiStream) {
var Bug = (function() {
function Bug(_io, _parent, _root) {
this._io = _io;
this._parent = _parent;
this._root = _root || this;
this._read();
}
Bug.prototype._read = function() {
this._raw_values = [];
this.values = [];
for (var i = 0; i < 2; i++) {
switch (i) {
case 1:
this._raw_values.push(this._io.readBytes(4));
var _io__raw_values = new KaitaiStream(this._raw_values[i]);
this.values.push(new Container(_io__raw_values, this, this._root));
break;
default:
this.values.push(this._io.readBytes(4));
break;
}
}
}
var Container = Bug.Container = (function() {
function Container(_io, _parent, _root) {
this._io = _io;
this._parent = _parent;
this._root = _root || this;
this._read();
}
Container.prototype._read = function() {
this.field = this._io.readBytesFull();
}
return Container;
})();
return Bug;
})();
return Bug;
}));
Result:
{
"values": [
[69, 108, 102, 70],
{
"field": [0]
}
]
}
this._raw_values
increases their length only in some iterations, but it is always readed by the index of the iteration: the element pushed by this._raw_values.push()
not at index i
Actually, the origin of problem is that size
incorrectly bound to types instead of to fields, as I already stated in https://github.com/kaitai-io/kaitai_struct/issues/788#issuecomment-666195081.
This KSY should generate something like this:
Bug.prototype._read = function() {
this._raw_values = [];
this.values = [];
for (var i = 0; i < 2; i++) {
var _raw = this._io.readBytes(4);
var _io = new KaitaiStream(_raw);
this._raw_values.push(_raw);
switch (i) {
case 1:
this.values.push(new Container(_io, this, this._root));
break;
default:
this.values.push(_io.readBytes(4));
break;
}
}