django-rest-framework-csv icon indicating copy to clipboard operation
django-rest-framework-csv copied to clipboard

Hierarchical parser

Open cescp opened this issue 11 years ago • 2 comments

Hi, When rendering a hierarchical structure, the renderer works fine and the name of the columns are: level.N.sublevel But the parser does not take this structure in account. Instead, it just takes the names of the columns and put the values, all in a flat structure. I suggest the following code to recover the hierarchical structure:

class HierarchicalCSVParser(BaseParser):
    """
    Parses CSV serialized data into hierarchical structure.

    The parser assumes the first line contains the column names.
    """

    media_type = 'text/csv'

    def parse(self, stream, media_type=None, parser_context=None):
        parser_context = parser_context or {}
        delimiter = parser_context.get('delimiter', ',')

        try:
            encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET)
            rows = unicode_csv_reader(universal_newlines(stream), delimiter=delimiter, charset=encoding)
            data = OrderedRows(next(rows))
            for row in rows:
                row_data = dict(zip(data.header, row))
                hierarchical_data = self._csv_convert(row_data)
                data.append(hierarchical_data)
            return data
        except Exception as exc:
            raise ParseError('CSV parse error - %s' % str(exc))

    def _csv_convert(self,flat_data):
        first_level_keys = {key.split(".")[0] for key in flat_data.keys()}
        if list(first_level_keys)[0].isdigit():
            d = []
        else:
            d = {}
        for first_level_key in first_level_keys:                
            # a subset of the dictionary with only the entries with the
            # key: first_level_key.* and non empty value
            subset = {key:value for key, value in flat_data.items() if key.partition(".")[0]==first_level_key and len(value)>0}
            if len(subset) > 0:
                at_deepest = subset.keys()[0].partition(".")[1]==''
                if at_deepest:
                    # end of recursivity
                    d.update(subset)
                else:
                    # can go deeper
                    # remove the first_level_key 
                    flat_second_level_subset = {key.partition(".")[2]:value for key, value in subset.items()}
                    second_level_subset = self._csv_convert(flat_second_level_subset)
                    if first_level_key.isdigit():
                        # add to the list
                        d.append(second_level_subset)
                    else:
                        # add to the dictionary
                        d[first_level_key] = second_level_subset

        return d

Francesc

cescp avatar Nov 20 '14 10:11 cescp

Can confirm that this issue is still relevant in 2019.

FlorianWendelborn avatar Mar 26 '19 14:03 FlorianWendelborn

Here’s another implementation

from rest_framework_csv.parsers import CSVParser


class HierarchicalCSVParser(CSVParser):
    def parse(self, stream, media_type=None, parser_context=None):
        flattened_data = super().parse(stream, media_type, parser_context)
        hierarchical_data = self.hierarchify_many(flattened_data)
        return hierarchical_data

    @staticmethod
    def hierarchify_many(flattened_list: list):
        return [
            HierarchicalCSVParser.hierarchify_one(flattened_item)
            for flattened_item in flattened_list
        ]

    @staticmethod
    def hierarchify_one(flattened_dictionary: dict):
        result = {}

        for flat_key, value in flattened_dictionary.items():
            keys = flat_key.split(".")
            last = keys.pop()

            pointer = result
            for key in keys:
                if key not in pointer:
                    pointer[key] = {}
                elif not isinstance(pointer[key], dict):
                    raise KeyError(f"{flat_key} {key} is not a dictionary")

                pointer = pointer[key]

            pointer[last] = value

        return result

FlorianWendelborn avatar May 10 '19 17:05 FlorianWendelborn