atomium
atomium copied to clipboard
Atomimu should not depend on `strptime` for parse date in PDB
For Bug Reports
Expected behaviour
Read PDB file if using non English locale
Actual behaviour
~/.pyenv/versions/3.8.3/envs/partseg3.8/lib/python3.8/site-packages/atomium/utilities.py in open(path, *args, **kwargs)
39 except:
40 with builtins.open(path, "rb") as f: filestring = f.read()
---> 41 return parse_string(filestring, path, *args, **kwargs)
42
43
~/.pyenv/versions/3.8.3/envs/partseg3.8/lib/python3.8/site-packages/atomium/utilities.py in parse_string(filestring, path, file_dict, data_dict)
122 parsed = file_func(filestring)
123 if not file_dict:
--> 124 parsed = data_func(parsed)
125 if not data_dict:
126 filetype = data_func.__name__.split("_")[0].replace("mmc", "c")
~/.pyenv/versions/3.8.3/envs/partseg3.8/lib/python3.8/site-packages/atomium/pdb.py in pdb_dict_to_data_dict(pdb_dict)
80 "geometry": {"assemblies": [], "crystallography": {}}, "models": []
81 }
---> 82 update_description_dict(pdb_dict, data_dict)
83 update_experiment_dict(pdb_dict, data_dict)
84 update_quality_dict(pdb_dict, data_dict)
~/.pyenv/versions/3.8.3/envs/partseg3.8/lib/python3.8/site-packages/atomium/pdb.py in update_description_dict(pdb_dict, data_dict)
95 :param dict data_dict: The data dictionary to update."""
96
---> 97 extract_header(pdb_dict, data_dict["description"])
98 extract_title(pdb_dict, data_dict["description"])
99 extract_keywords(pdb_dict, data_dict["description"])
~/.pyenv/versions/3.8.3/envs/partseg3.8/lib/python3.8/site-packages/atomium/pdb.py in extract_header(pdb_dict, description_dict)
174 line = pdb_dict["HEADER"][0]
175 if line[50:59].strip():
--> 176 description_dict["deposition_date"] = datetime.strptime(
177 line[50:59], "%d-%b-%y"
178 ).date()
~/.pyenv/versions/3.8.3/lib/python3.8/_strptime.py in _strptime_datetime(cls, data_string, format)
566 """Return a class cls instance based on the input string and the
567 format string."""
--> 568 tt, fraction, gmtoff_fraction = _strptime(data_string, format)
569 tzname, gmtoff = tt[-2:]
570 args = tt[:6] + (fraction,)
~/.pyenv/versions/3.8.3/lib/python3.8/_strptime.py in _strptime(data_string, format)
347 found = format_regex.match(data_string)
348 if not found:
--> 349 raise ValueError("time data %r does not match format %r" %
350 (data_string, format))
351 if len(data_string) != found.end():
ValueError: time data '24-JUL-96' does not match format '%d-%b-%y'
For my case
In[11]: date(1996, 6, 15).strftime("%d-%b-%y")
Out[11]: '15-cze-96'
Not '15-Jun-96'
Example code to reproduce
On non-English OS:
import atomium
import locale
locale.setlocale(local.LC_ALL, "")
atomium.open("/home/czaki/Pobrane/tmp/3mht.pdb")
https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes directly told that strptime will use local abrev.
Python Version/Operating System
Python 3.8.3, Ubuntu 21.10
Thanks for pointing this out, I hadn't even realised. This will be fixed in atomium v2.0, released in a few weeks.