python algorithm tree graph-theory pseudocode

Find maximum recurring subtree

I have a Python dictionary like this:

dictionary = {
    'r1': {
        'val': 'r',
        'sub': {
            'a1': {
                'val': 'a',
                'sub': {
                    'b1': {
                        'val': 'b',
                        'sub': {}
                    }
                }
            },
            'b1': {
                'val': 'b',
                'sub': {
                    'c1': {
                        'val': 'c',
                        'sub': {}
                    },
                    'a2': {
                        'val': 'a',
                        'sub': {
                            'c2': {
                                'val': 'c',
                                'sub': {}
                            },
                        }
                    },
                    'a3': {
                        'val': 'a',
                        'sub': {}
                    },
                    'b2': {
                        'val': 'b',
                        'sub': {}
                    },
                    'a4': {
                        'val': 'a',
                        'sub': {
                            'c3': {
                                'val': 'c',
                                'sub': {}
                            },
                            'd1': {
                                'val': 'd',
                                'sub': {}
                            }
                        }
                    }
                }
            }
        }
    }
}

Interpreting the dictionary as a tree, what I need to do is to find the maximum recurrent subtree.

For maximum recurrent subtree I mean for example:

subtree = {
    'structure': {
        'r': {
            'sub': {
                'b': {
                    'sub': {
                        'a': {
                            'sub': {
                                'c': {
                                    'sub': {}
                                },
                            }
                        },
                    }
                }
            }
        }
    },
    'counter': 2
}

Basically I am trying to implement a recursive function like:

def find_max_subtree(dict: tree, dict: output) -> dict:
    for key in tree:
        ...
        output[key] = {
            'sub' = {}
        }
        output[key]['sub'] = find_max_subtree(key['sub'], output['sub'])
    return output

I do not know how to solve it. Even a pseudocode strategy would be enough for me.

Solution

You can keep a depth counter as you recurse, and at each level, return the subdictionary that that has the largest associated counter:

def max_subtree(d, c = 0):
  a, c_d, b = max([(a, c, b) if not b else (a, *max_subtree(b, c+1)) 
                       for a, b in d.items()], key=lambda x:x[1])
  return (c_d, {a:b})

_, sub_dict = max_subtree(dictionary)

Output:

{'r': {'sub': {'b': {'sub': {'a': {'sub': {'c': {'sub': {}}}}}}}}}

from collections import Counter
def build_dict(d):
   return {} if not d else {d[0]:{'sub':build_dict(d[1:])}}

def get_paths(d, c = []):
   if 'sub' not in d:
      yield from [j for i in d.values() for j in get_paths(i,c)]
   elif not d['sub']:
      yield tuple(c+[d['val']])
   else:
      for _, b in d['sub'].items():
        yield from get_paths(b, c+[d['val']])

result = Counter(get_paths(dictionary))
c = max(result, key=lambda x:[result[x] > 1, result[x], len(x)])
print(build_dict(c), result[c])

Output:

{'r': {'sub': {'b': {'sub': {'a': {'sub': {'c': {'sub': {}}}}}}}}} 2