django-rest-framework-csv
django-rest-framework-csv copied to clipboard
Hierarchical parser
Hi, When rendering a hierarchical structure, the renderer works fine and the name of the columns are: level.N.sublevel But the parser does not take this structure in account. Instead, it just takes the names of the columns and put the values, all in a flat structure. I suggest the following code to recover the hierarchical structure:
class HierarchicalCSVParser(BaseParser):
"""
Parses CSV serialized data into hierarchical structure.
The parser assumes the first line contains the column names.
"""
media_type = 'text/csv'
def parse(self, stream, media_type=None, parser_context=None):
parser_context = parser_context or {}
delimiter = parser_context.get('delimiter', ',')
try:
encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET)
rows = unicode_csv_reader(universal_newlines(stream), delimiter=delimiter, charset=encoding)
data = OrderedRows(next(rows))
for row in rows:
row_data = dict(zip(data.header, row))
hierarchical_data = self._csv_convert(row_data)
data.append(hierarchical_data)
return data
except Exception as exc:
raise ParseError('CSV parse error - %s' % str(exc))
def _csv_convert(self,flat_data):
first_level_keys = {key.split(".")[0] for key in flat_data.keys()}
if list(first_level_keys)[0].isdigit():
d = []
else:
d = {}
for first_level_key in first_level_keys:
# a subset of the dictionary with only the entries with the
# key: first_level_key.* and non empty value
subset = {key:value for key, value in flat_data.items() if key.partition(".")[0]==first_level_key and len(value)>0}
if len(subset) > 0:
at_deepest = subset.keys()[0].partition(".")[1]==''
if at_deepest:
# end of recursivity
d.update(subset)
else:
# can go deeper
# remove the first_level_key
flat_second_level_subset = {key.partition(".")[2]:value for key, value in subset.items()}
second_level_subset = self._csv_convert(flat_second_level_subset)
if first_level_key.isdigit():
# add to the list
d.append(second_level_subset)
else:
# add to the dictionary
d[first_level_key] = second_level_subset
return d
Francesc
Can confirm that this issue is still relevant in 2019.
Here’s another implementation
from rest_framework_csv.parsers import CSVParser
class HierarchicalCSVParser(CSVParser):
def parse(self, stream, media_type=None, parser_context=None):
flattened_data = super().parse(stream, media_type, parser_context)
hierarchical_data = self.hierarchify_many(flattened_data)
return hierarchical_data
@staticmethod
def hierarchify_many(flattened_list: list):
return [
HierarchicalCSVParser.hierarchify_one(flattened_item)
for flattened_item in flattened_list
]
@staticmethod
def hierarchify_one(flattened_dictionary: dict):
result = {}
for flat_key, value in flattened_dictionary.items():
keys = flat_key.split(".")
last = keys.pop()
pointer = result
for key in keys:
if key not in pointer:
pointer[key] = {}
elif not isinstance(pointer[key], dict):
raise KeyError(f"{flat_key} {key} is not a dictionary")
pointer = pointer[key]
pointer[last] = value
return result