deep-learning-toolkit
deep-learning-toolkit copied to clipboard
Wrong conf file encoding on windows
The app writes the conf files on windows with UTF-8-BOM Encoding. This leads to errors in the parse method in the conf_files_splunk_client.py
Possible solution would be to detect the encoding before parsing the files:
# from requests.utils (guess_json_utf(data))
def detect_encoding(self, path):
_null = "\x00".encode("ascii") # encoding to ASCII for Python 3
_null2 = _null * 2
_null3 = _null * 3
with open(path, "rb") as fp:
data = fp.readline()
sample = data[:4]
if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
return "utf-32" # BOM included
if sample[:3] == codecs.BOM_UTF8:
return "utf-8-sig" # BOM included, MS style (discouraged)
if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
return "utf-16" # BOM included
nullcount = sample.count(_null)
if nullcount == 0:
return "utf-8"
if nullcount == 2:
if sample[::2] == _null2: # 1st and 3rd are null
return "utf-16-be"
if sample[1::2] == _null2: # 2nd and 4th are null
return "utf-16-le"
# Did not detect 2 valid UTF-16 ascii-range characters
if nullcount == 3:
if sample[:3] == _null3:
return "utf-32-be"
if sample[1:] == _null3:
return "utf-32-le"
# Did not detect a valid UTF-32 ascii-range character
return "utf-8"
def parse(self, path):
if os.path.exists(path):
parser = ConfigParser(
delimiters=("="),
strict=False,
default_section="__default__",
)
encoding = self.detect_encoding(path)
with open(path, "r", encoding=encoding) as fp:
content = fp.read()
content = content.replace("\\\n", "")
parser.read_string(content)
return parser
else:
return None