Search code examples
pythonpython-itertoolsitertools-groupby

How do I group by with name, preserve keys and names using itertools?


I am working with a data set that is a simple SQL Query that fetches the desired rows.

[(2, 5, 'JOHN K', 'YAHOO'), (2, 6, 'AARON M', 'YAHOO'), (2, 7, 'NICK C', 'YAHOO'), (1, 2, 'CELESTE G', 'GOOGLE'), (1, 3, 'RICH M', 'GOOGLE'), (1, 4, 'SANDEEP C', 'GOOGLE')]

What I have so far that yields the grouping without keys -

import itertools
import operator


def accumulate(rows):
    # itemgetter fetches and groups them by company name(3)
    it = itertools.groupby(rows, operator.itemgetter(3))
    k = {}
    for key, subiter in it:
        k[key] = ';'.join(item[2] for item in subiter)
    return k


if __name__ == '__main__':

    rows = [(2, 5, 'JOHN K', 'YAHOO'), (2, 6, 'AARON M', 'YAHOO'), (2, 7, 'NICK C', 'YAHOO'), (1, 2, 'CELESTE G', 'GOOGLE'), (1, 3, 'RICH M', 'GOOGLE'), (1, 4, 'SANDEEP C', 'GOOGLE')] 
    groupedby = (accumulate(rows))

    print(groupedby)

Output -

{'YAHOO': 'JOHN K;AARON M;NICK C', 'GOOGLE': 'CELESTE G;RICH M;SANDEEP C'}

Desired Output preserve the keys and still do the grouping -

{('YAHOO,2'): '(JOHN K,5);(AARON M,6);(NICK C,7)', ('GOOGLE,1'): '(CELESTE G,2);(RICH M,3);(SANDEEP C,4)'}

I am open to some other data structure that is not comma separated, using pipes or may be a tuple.

for key, subiter in it:
    k[key, ] = ';'.join(item[2] for item in subiter)

Any help is appreciated!


Solution

  • # 1
    ans = {}
    for a, b, c, d in arr:
        ans.setdefault("".join(["(", ",".join([d, str(a)]), ")"]), []).\
            append("".join(["(", ",".join([c, str(b)]), ")"]))
    {k: ";".join(v) for k, v in ans.items()}
    # {'(YAHOO,2)': '(JOHN K,5);(AARON M,6);(NICK C,7)',
    #  '(GOOGLE,1)': '(CELESTE G,2);(RICH M,3);(SANDEEP C,4)'}
    
    # 2
    ans = {}
    for el in arr:
        a, b, c, d = el
        key = "".join(["(", ",".join([d, str(a)]), ")"])
        val = "".join(["(", ",".join([c, str(b)]), ")"])
        if ans.get(key) is None:
            ans[key] = [val]
        else:
            ans[key].append(val)
    
    for k, v in ans.items():
        ans[k] = ";".join(v)
    
    ans
    # {'(YAHOO,2)': '(JOHN K,5);(AARON M,6);(NICK C,7)',
    #   '(GOOGLE,1)': '(CELESTE G,2);(RICH M,3);(SANDEEP C,4)'}​
    
    # I would just do this
    ans = {}
    for a, b, c, d in arr:
        ans.setdefault((d, a), []).append((c, b))
    ans
    # {('YAHOO', 2): [('JOHN K', 5), ('AARON M', 6), ('NICK C', 7)],
    #  ('GOOGLE', 1): [('CELESTE G', 2), ('RICH M', 3), ('SANDEEP C', 4)]}
    
    # Data
    arr = [(2, 5, 'JOHN K', 'YAHOO'),
           (2, 6, 'AARON M', 'YAHOO'),
           (2, 7, 'NICK C', 'YAHOO'),
           (1, 2, 'CELESTE G', 'GOOGLE'),
           (1, 3, 'RICH M', 'GOOGLE'),
           (1, 4, 'SANDEEP C', 'GOOGLE')]