carpg
carpg copied to clipboard
UTF-8 support
- loading text files encoded as ascii or utf8 (or utf8 only), there should be check for BOM to inform there is something wrong if other encoding is used
- draw text with utf-8
- input text box support, delete character with backspace (for character encoding with multiple bytes)
- currently trim crashes when there is non ascii letters (because of isspace)
- force utf8 bom ? (and then check it)
- remove hex encoding in script dialogs \xB3
- locale info in system
- console output
- utf8 changes from lobby api (from nexr version)
Some text characters: ęĘóÓąĄśŚłŁżŻźŹćĆńŃ à â ç é è ê ë î ï ô û ù ü ÿ
http://utf8everywhere.org/ http://programmers.stackexchange.com/questions/102205/should-utf-16-be-considered-harmful
Some code for tokenizer to check encoding:
void Tokenizer::CheckBom(cstring path)
{
if(str->length() < 2)
return;
byte b0 = (byte)str->at(0);
byte b1 = (byte)str->at(1);
byte b2 = (str->length() >= 3 ? (byte)str->at(2) : 1);
byte b3 = (str->length() >= 4 ? (byte)str->at(3) : 1);
int bytes_to_skip = 0;
cstring incorrect_encoding = nullptr;
if(b0 == 0xFF && b1 == 0xFE)
{
if(b2 == 0 && b3 == 0)
{
incorrect_encoding = "utf-32 little endian";
bytes_to_skip = 4;
}
else
{
incorrect_encoding = "utf-16 little endian";
bytes_to_skip = 2;
}
}
else if(b0 == 0xFE && b1 == 0xFF)
{
incorrect_encoding = "utf-16 big endian";
bytes_to_skip = 2;
}
else if(b0 == 0 && b1 == 0 && b2 == 0xFE && b3 == 0xFF)
{
incorrect_encoding = "utf-32 big endian";
bytes_to_skip = 4;
}
else if(b0 == 0xEF && b1 == 0xBB && b2 == 0xBF)
{
// utf-8
bytes_to_skip = 3;
}
else
{
// unknown encoding or ascii
}
if(incorrect_encoding)
WARN(Format("Invalid encoding '%s' for file '%s'.", incorrect_encoding, path));
pos = bytes_to_skip;
}