Search code examples
pythoncsvpython-3.xdictionarydefaultdict

Converting CSV data to list in dictionary


I have a CSV file in the following form:

Name_1,2,K,14
Name_1,3,T,14
Name_1,4,T,18
Name_2,2,G,12
Name_2,4,T,14
Name_2,6,K,15
Name_3,2,K,12
Name_3,3,T,15
Name_3,4,G,18

And I want to convert it into a dictionary where Name_x is the key and corresponding data is the value in list form. Something like this:

{'Name_1': [[2, 'K', 14], [3, 'T', 14], [4, 'T', 18]],
 'Name_2': [[4, 'T', 14], [4, 'T', 14], [6, 'K' ,15]],
...}

So far, I think I have to use use defaultdict:

from collections import defaultdict
d = defaultdict(list)

But how do I append the data to d? I know defaultdict does not have an append method.


Solution

  • You need to use the name as the key and append the slice of the row as the value, there will be no order using a normal or defaultdict:

    import csv
    from collections import defaultdict
    
    with open('in.csv') as f:
        r = csv.reader(f)
        d = defaultdict(list)
        for row in r:
            d[row[0]].append(row[1:])
    print(d)
    

    If you want to maintain order you will need an OrderedDict:

    from collections import OrderedDict
    
    with open('in.csv') as f:
        r = csv.reader(f)
        od = OrderedDict()
        for row in r:
            # get key/ first element in row
            key = row[0]
            # create key/list paring if it does not exist, else just append the value
            od.setdefault(key, []).append(row[1:])
    print(od)
    

    Output:

    OrderedDict([('Name_1', [['2', 'K', '14'], ['3', 'T', '14'], ['4', 'T', '18']]), ('Name_2', [['2', 'G', '12'], ['4', 'T', '14'], ['6', 'K', '15']]), ('Name_3', [['2', 'K', '12'], ['3', 'T', '15'], ['4', 'G', '18']])])
    

    You could also use groupby if the names are grouped which will group elements based on the first item/name in each row:

    import csv
    from collections import OrderedDict
    from itertools import groupby
    from operator import itemgetter
    
    with open('in.csv') as f:
        r = csv.reader(f)
        od = OrderedDict()
        for k, v in groupby(r, key=itemgetter(0)):
            od[k] = [sub[1:] for sub in v]
    

    If you are using python3 you can unpack using *:

    with open("in.csv") as f:
        r = csv.reader(f)
        od = OrderedDict()
        for row in r:
            key, *rest = row
            od.setdefault(key, []).append(rest)
    
    
    import csv
    from collections import OrderedDict
    from itertools import groupby
    from operator import itemgetter
    
    with open('in.csv') as f:
        r = csv.reader(f)
        od = OrderedDict()
        for k, v in groupby(r, key=itemgetter(0)):
            od[k] = [sub for _, *sub in v]
    print(od)