Search code examples
pythondictionarylist-comprehensiondelta

Python: Substracting between list of dictionaries


I have 2 list containing dictionaries as follows:

listone = [{'unit1': {'test1': 10}}, 
           {'unit1': {'test2': 45'}, 
           {'unit2': {'test1': 78'}, 
           {'unit2': {'test2': 2'}}]

listtwo = [{'unit1': {'test1': 56}}, 
           {'unit1': {'test2': 34'}, 
           {'unit2': {'test1': 23'}, 
           {'unit2': {'test2': 5'}}]

I also do have all the unit names & test-names in separate lists:

units = ['unit1', 'unit2']
testnames = ['test1,'test2']

How could I find the delta for each test value, i.e. val of (test2 - test1), so that I could finally arrange the data as follows:

unit1, test1, delta
unit1, test2, delta
unit2, test1, delta
unit2, test2, delta

So far, I have these:

def delta(array1, array2):
        temp = []
        temp2 = []
        tmp = []
        tmp2 = []
        delta = []
        for unit in units:
            for mkey in array1:
                for skey in mkey:
                    if skey == unit:
                        temp.append(mkey[skey])
                        floater(temp) #floats all the values
                        for i in testnames:
                            for u in temp:
                                tmp.append(u[i])
                                tmp = filter(None, tmp2)

            for mkey in array2:
                for skey in mkey:
                    if skey == unit:
                        temp.append(mkey[skey])
                        floater(temp2)
                        for i in testnames:
                            for u in temp2:
                                tmp2.append(u[i])
                                tmp2 = filter(None, tmp2)

        delta = [tmp2 - tmp for tmp2, tmp in zip(tmp2, tmp)] 
        print delta

delta(listone,listtwo)

Unfortunately, the code gives Keyerror. :( Help, please. Thanks.


Solution

  • Similar but a bit more encapsulated:

    from collections import defaultdict
    
    listone = [
        {'unit1': {'test1': 10}},
        {'unit1': {'test2': 45}}, 
        {'unit2': {'test1': 78}}, 
        {'unit2': {'test2': 2}}
    ]
    
    listtwo = [
        {'unit1': {'test1': 56}},
        {'unit1': {'test2': 34}}, 
        {'unit2': {'test1': 23}}, 
        {'unit2': {'test2': 5}}
    ]
    
    def dictify(lst):
        res = defaultdict(lambda: defaultdict(int))
        for entry in lst:
            for unit,testentry in entry.iteritems():
                for test,val in testentry.iteritems():
                    res[unit][test] = val
        return res
        # returns dict['unitX']['testY'] = val
    
    def genDeltas(dictA, dictB):
        units = dictA.keys()
        units.sort()
        tests = dictA[units[0]].keys()
        tests.sort()
        for unit in units:
            _A = dictA[unit]
            _B = dictB[unit]
            for test in tests:
                yield unit,test,(_B[test]-_A[test])
    
    for unit,test,delta in genDeltas(dictify(listone),dictify(listtwo)):
        print "{0}, {1}, {2}".format(unit,test,delta)
    

    Edit: to find field-averages:

    class Avg(object):
        def __init__(self, total=0.0, num=0):
            super(Avg,self).__init__()
            self.total = total
            self.num   = num
    
        def add(self, value):
            self.total += value
            self.num   += 1
    
        def value(self):
            return self.total / self.num
    
    def avgBy(data, field=0):
        res = defaultdict(Avg)
        for unit,testdict in data.iteritems():
            for test,val in testdict.iteritems():
                res[(unit,test)[field]].add(val)
        return {item:avg.value() for item,avg in res.iteritems()}
    
    dictone = dictify(listone)
    avg_by_unit = avgBy(dictone, 0)
    print(avg_by_unit)
    avg_by_test = avgBy(dictone, 1)
    print(avg_by_test)