Search code examples
pythonjsonrecursionnestedflatten

flatten a complex dict, while compressing keys using dot as a separator


I'm trying come up with a python3 function (using existing libs also works, of course) that would flatten this input:

{
  'key1': 1, 
  'key2dict': {'subkey1': 1, 'subkey2': 2},
  'key3listOfDict': [
    {'subkey3': 3, 'subkey4': 4},
    {'subkey5': 5, 'subkey6': 6}
  ],
  'key4nestedListOfDict': [
    {
      'subkey7': 7, 
      'subkeyNested': [
        {'subkey8': 8}, 
        {'subkey9': 9}
      ]
    }
  ]
}

into this:

[
  {
    'key1': 1, 
    'key2dict.subkey1': 1,
    'key2dict.subkey2': 2,
    'key3listOfDict.subkey3': 3,
    'key3listOfDict.subkey4': 4,
    'key4nestedListOfDict.subkey7': 7,
    'key4nestedListOfDict.subkeyNested.subkey8': 8,
  },
  {
    'key1': 1, 
    'key2dict.subkey1': 1,
    'key2dict.subkey2': 2,
    'key3listOfDict.subkey3': 3,
    'key3listOfDict.subkey4': 4,
    'key4nestedListOfDict.subkey7': 7,
    'key4nestedListOfDict.subkeyNested.subkey9': 9,
  },
  {
    'key1': 1, 
    'key2dict.subkey1': 1,
    'key2dict.subkey2': 2,
    'key3listOfDict.subkey5': 5,
    'key3listOfDict.subkey6': 6,
    'key4nestedListOfDict.subkey7': 7,
    'key4nestedListOfDict.subkeyNested.subkey8': 8,
  },
  {
    'key1': 1, 
    'key2dict.subkey1': 1,
    'key2dict.subkey2': 2,
    'key3listOfDict.subkey5': 5,
    'key3listOfDict.subkey6': 6,
    'key4nestedListOfDict.subkey7': 7,
    'key4nestedListOfDict.subkeyNested.subkey9': 9,
  }
]

The main challenge I'm having is the proper handling the lists-of-objects, and nested lists-of-objects. I researched and tried some myself, but those didn't work as expected.

Any help would be much appreciated!

for the record, I have these so far (not doing the work properly..):

from collections.abc import MutableMapping

def flatten(dictionary, parent_key='', separator='.'):
    items = []
    for key, value in dictionary.items():
        new_key = parent_key + separator + key if parent_key else key
        if isinstance(value, MutableMapping):
            items.extend(flatten(value, new_key, separator=separator).items())
        else:
            items.append((new_key, value))
    return dict(items)

def flatten_handle_lists(row):
    rows = []
    for i, (key, value) in enumerate(row.items()):
        if isinstance(value, list):
            for j, v in enumerate(value):
                expansion_row = dict(row)
                del expansion_row[key]
                expansion_row.update(flatten(v, key, '.'))
                rows.append(expansion_row)
    return rows

Solution

  • It's much simpler to split this into two steps: handle the lists first, then do the dictionary flattening afterward.

    import itertools
    from collections.abc import Mapping
    
    
    def explode_nested_lists(dictionary):
        """
        Turn a nested dictionary with lists representing different possible subtrees
        into a list of dictionaries for each combination of the subtrees.
        """
        options = {}
        for k, v in dictionary.items():
            if isinstance(v, list):
                options[k] = list(itertools.chain(*map(explode_nested_lists, v)))
            elif isinstance(v, Mapping):
                options[k] = explode_nested_lists(v)
            else:
                options[k] = [v]
        keys = list(options.keys())
        return [
            dict(zip(keys, vals)) for vals in itertools.product(*(options[k] for k in keys))
        ]
    
    
    def flatten_keys(dictionary, separator="."):
        """Flatten a nested dictionary by joining keys with a separator."""
        result = {}
        for key, value in dictionary.items():
            if isinstance(value, Mapping):
                result.update(
                    (key + separator + k, v)
                    for k, v in flatten_keys(value, separator).items()
                )
            else:
                result[key] = value
        return result
    
    
    def flatten(dictionary, separator="."):
        return [flatten_keys(d, separator) for d in explode_nested_lists(dictionary)]