Search code examples
pythonlistdictionaryavro

Merge lists of complex dicts with arbitrary keys


I have this code:

dotteds = ["apple.orange.banana", "a.b.c", "a.b.d"]

name = "name"
avtype = "type"
fields = "fields"


main_dictionary_list = []

for x in dotteds:

    split_name = x.split('.')
    if len(split_name) > 1:
        value = {name: split_name[-1], avtype: 'string'}
        dicts = []
        for y in split_name:
            dicts.append({name: y, avtype: {name: y, avtype: "record", fields: []}})
        dicts[-1] = value

        value = value['name']+split_name[-1]

        for z in reversed(range(len(dicts))):
            if z != 0:
                dicts[z - 1]['type']['fields'].append(dicts[z])

        main_dictionary_list.append(dicts[0])

    else:
        dicts = []
        value = {name: split_name[-1], avtype: 'string'}
        dicts.append(value)
        main_dictionary_list.append(dicts[0])

print(main_dictionary_list)

Which gives me an output like this:

[{
        'name': 'apple',
        'type': {
            'name': 'apple',
            'type': 'record',
            'fields': [{
                    'name': 'orange',
                    'type': {
                        'name': 'orange',
                        'type': 'record',
                        'fields': [{
                                'name': 'banana',
                                'type': 'string'
                            }
                        ]
                    }
                }
            ]
        }
    }, {
        'name': 'a',
        'type': {
            'name': 'a',
            'type': 'record',
            'fields': [{
                    'name': 'b',
                    'type': {
                        'name': 'b',
                        'type': 'record',
                        'fields': [{
                                'name': 'c',
                                'type': 'string'
                            }
                        ]
                    }
                }
            ]
        }
    }, {
        'name': 'a',
        'type': {
            'name': 'a',
            'type': 'record',
            'fields': [{
                    'name': 'b',
                    'type': {
                        'name': 'b',
                        'type': 'record',
                        'fields': [{
                                'name': 'd',
                                'type': 'string'
                            }
                        ]
                    }
                }
            ]
        }
    }
]

Ideally I need:

[{
        'name': 'apple',
        'type': {
            'name': 'apple',
            'type': 'record',
            'fields': [{
                    'name': 'orange',
                    'type': {
                        'name': 'orange',
                        'type': 'record',
                        'fields': [{
                                'name': 'banana',
                                'type': 'string'
                            }
                        ]
                    }
                }
            ]
        }
    }, {
        'name': 'a',
        'type': {
            'name': 'a',
            'type': 'record',
            'fields': [{
                    'name': 'b',
                    'type': {
                        'name': 'b',
                        'type': 'record',
                        'fields': [{
                                'name': 'c',
                                'type': 'string'
                            }, 
                            {
                                'name': 'd',
                                'type': 'string'
                            }
                        ]
                    }
                }
            ]
        }
    }
]

I need to be able to do this with any number of combinations:

apple.orange.banana, a.b.c, a.b.d, a.b.q.e.a.s.d, etc.

I cannot figure out how to combine the similar 'name: key' combinations. It's intended to be avro format.

I have also tried making the dotted values into a dictionary which is a bit of trouble on its own.


Solution

  • You can use recursion with collections.defaultdict:

    from collections import defaultdict
    def group(vals, last=None):
       if any(len(i) == 1 for i in vals):
          return [{'name':last, 'type':{'name':last, 'type':'record', 'fields':[{'name':i[0], 'type':'string'} if len(i) == 1 else group([i], i[0])[0] for i in vals]}}]
       _d = defaultdict(list)
       for i in vals:
          _d[i[0]].append(i[1:])
       return [{'name':a, 'type':group(b, last=a)} if last is None else 
                  {'name':last, 'type':'record', 'fields':group(b, last=a)} for a, b in _d.items()]
    

    import json
    vals = ['apple.orange.banana', 'a.b.c', 'a.b.d']
    print(json.dumps(group([i.split('.') for i in vals]), indent=4))
    

    Output:

    [
      {
        "name": "apple",
        "type": [
            {
                "name": "apple",
                "type": "record",
                "fields": [
                    {
                        "name": "orange",
                        "type": {
                            "name": "orange",
                            "type": "record",
                            "fields": [
                                {
                                    "name": "banana",
                                    "type": "string"
                                }
                            ]
                        }
                    }
                ]
            }
        ]
    },
    {
        "name": "a",
        "type": [
            {
                "name": "a",
                "type": "record",
                "fields": [
                    {
                        "name": "b",
                        "type": {
                            "name": "b",
                            "type": "record",
                            "fields": [
                                {
                                    "name": "c",
                                    "type": "string"
                                },
                                {
                                    "name": "d",
                                    "type": "string"
                                }
                            ]
                        }
                    }
                ]
            }
          ]
       }
    ]
    

    vals = ['asd.2', 'asd.3', 'asd.5.3.4']
    print(json.dumps(group([i.split('.') for i in vals]), indent=4))
    

    Output:

    [
      {
        "name": "asd",
        "type": [
            {
                "name": "asd",
                "type": {
                    "name": "asd",
                    "type": "record",
                    "fields": [
                        {
                            "name": "2",
                            "type": "string"
                        },
                        {
                            "name": "3",
                            "type": "string"
                        },
                        {
                            "name": "5",
                            "type": "record",
                            "fields": [
                                {
                                    "name": "5",
                                    "type": "record",
                                    "fields": [
                                        {
                                            "name": "3",
                                            "type": {
                                                "name": "3",
                                                "type": "record",
                                                "fields": [
                                                    {
                                                        "name": "4",
                                                        "type": "string"
                                                    }
                                                ]
                                            }
                                        }
                                    ]
                                }
                            ]
                        }
                    ]
                }
             }
          ]
       }
    ]