Search code examples
pythondictionaryreducefold

Python fold/reduce composition of multiple dictionaries


I want to achieve the following. It's essentially the composition or merging of an arbitrary number of dictionaries, with reference to a 'seed' or root dictionary, accumulating all unchanged and updated values in the final result.

seed = {
    'update': False,
    'data': {
        'subdata': {
            'field1': 5,
            'field2': '2018-01-30 00:00:00'
        },
        'field3': 2,
        'field4': None
    },
    'data_updates': {},
    'subdata_updates': {},
    'diffs': {}
}

update_1 = {
    'update': True,
    'data': {
        'subdata': {
            'field1': 6,
            'field2': '2018-01-30 00:00:00'
        },
        'field3': 2,
        'field4': None
    },
    'data_updates': {},
    'subdata_updates': {'field1': 6},
    'diffs': {
        'field1': {
            'field': 'field1',
            'before': 5,
            'after': 6
        }
    }
}

update_2 = {
    'update': True,
    'data': {
        'subdata': {
            'field1': 5,
            'field2': '2018-01-30 00:00:00',
        },
        'field3': 2,
        'field4': 1
    },
    'data_updates': {'field4': 1},
    'subdata_updates': {},
    'diffs': {
        'field4': {
            'field': 'field4',
            'before': None,
            'after': 1
        }
    }
}

# I want to be able to pass in an arbitrary number of updates.
assert reduce_maps(seed, *[update_1, update_2]) == {
    'update': True,
    'data': {
        'subdata': {
            'field1': 6,
            'field2': '2018-01-30 00:00:00',
        },
        'field3': 2,
        'field4': 1
    },
    'data_updates': {'field4': 1},
    'subdata_updates': {'field1': 6},
    'diffs': {
        'field1': {
            'field': 'field1',
            'before': 5,
            'after': 6
        },
        'field4': {
            'field': 'field4',
            'before': None,
            'after': 1
        }
    }
}

You can assume the data will always be in this shape, you can also assume that each payload only ever updates one field and that no two updates will ever update the same field.

I can dimly perceive an analogue of fold lurking in the background here building up the data in passes around seed.


Solution

  • import copy
    from functools import partial, reduce
    
    def traverse(seed, update, sentinel):
        for key, value in update.items():
            if isinstance(value, dict):
                try:
                    traverse(seed[key], update[key], sentinel)
                except KeyError:
                    seed[key] = value
            else:
                if key not in seed or value != seed[key] \
                        and key not in sentinel:
                    seed[key] = value
                    sentinel.add(key)
        return seed
    
    
    def reduce_maps(seed, *updates):
        seed = copy.deepcopy(seed)
        return reduce(
            partial(traverse, sentinel=set()), [seed, *updates]
        )