Search code examples
pythongroup-bypython-itertools

groupby() giving an empty list


I've executed the following script:

from itertools import groupby
from pprint import pprint as prnt
dt = [('23271800', 0.00066790780636275307),
 ('23271812', 0.0010018617095441298),
 ('26112103', 0.00066790780636275307),
 ('27111616', 0.0056772163540834012),
 # ... many lines deleted ...
 ('40161500', 0.00040074468381765189)
]

agg = groupby(dt, lambda x: x[0])
lst = list(agg)
lst1 = map(lambda x: (x[0], list(x[1])), lst)
prnt(lst1)

For the item '23271800' it should report [('23271800', 0.00066790780636275307)] as its corresponding groupby item. However, I am getting an incorrect output.

[('23271800', []),
 ('23271812', []),
 ('26112103', []),
 ('27111616', []),
 # ... many lines deleted ...
 ('40161500', [('40161500', 0.00040074468381765189)])]

Need help in understanding if I am doing anything incorrectly here.

PS: Code paste: http://codepad.org/cCd8DfoT


Solution

  • d = [(key, list(group)) for key, group in groupby(dt, lambda x: x[0])]
    prnt(d)
    

    groupby will return key and a generator for the group, for each group it finds.

    Output

    [('23271800', [('23271800', 0.0006679078063627531)]),
     ('23271812', [('23271812', 0.0010018617095441298)]),
     ('26112103', [('26112103', 0.0006679078063627531)]),
     ('27111616', [('27111616', 0.005677216354083401)]),
     ('30101600',
      [('30101600', 1.3909064158636346e-05), ('30101600', 0.002002905238843634)]),
     ('30102200', [('30102200', 0.00013358156127255062)]),
     ('31100000', [('31100000', 2.1849453575689805e-05)]),
     ('31161500', [('31161500', 0.0005180729752775727)]),
     ('31161501', [('31161501', 0.00012902764441098641)]),
     ('31161505', [('31161505', 0.013866049271881438)]),
     ('31161513', [('31161513', 0.021559049445886335)]),
     ('31161518', [('31161518', 0.0011596016382808651)]),
     ('31161520', [('31161520', 0.022263593545425106)]),
     ('31161600', [('31161600', 0.003930380552826971)]),
     ('31161618', [('31161618', 0.0016029787352706075)]),
     ('31161620', [('31161620', 0.0008462931211056002)]),
     ('31161700', [('31161700', 0.0008833842874611101)]),
     ('31161716', [('31161716', 7.067074299688881e-05)]),
     ('31161717', [('31161717', 0.0014193040885208503)]),
     ('31161727', [('31161727', 0.01364664212812536)]),
     ('31161801', [('31161801', 0.000179280516444739)]),
     ('31161900',
      [('31161900', 1.6624352427769844e-05), ('31161900', 0.0001496191718499286)]),
     ('31161904', [('31161904', 6.666007460763289e-05)]),
     ('31162409', [('31162409', 0.007129527514430318)]),
     ('31162800',
      [('31162800', 0.0002625302360269781),
       ('31162800', 0.359403893120933),
       ('31162800', 0.2207879284986886),
       ('31162800', 0.0002625302360269781)]),
     ('31163200',
      [('31163200', 0.00037295581888139136),
       ('31163200', 4.1439535431265705e-05)]),
     ('31163201', [('31163201', 0.011292216638533014)]),
     ('31163202',
      [('31163202', 4.5417730832667214e-05),
       ('31163202', 4.5417730832667214e-05)]),
     ('31163203', [('31163203', 0.003471418917146539)]),
     ('31163204', [('31163204', 0.0002962025923869601)]),
     ('31163214', [('31163214', 0.0014119501813264418)]),
     ('31163215', [('31163215', 0.017772155543217604)]),
     ('31171504', [('31171504', 0.05423235622453355)]),
     ('31181600', [('31181600', 5.262772981769086e-05)]),
     ('31181602', [('31181602', 0.00019920057382748777)]),
     ('31191518', [('31191518', 0.0014972878296483697)]),
     ('39121719', [('39121719', 0.0022708865416333607)]),
     ('40141600', [('40141600', 5.0614113112184855e-05)]),
     ('40141607', [('40141607', 0.0958030259751574)]),
     ('40141616',
      [('40141616', 0.005499007768977646), ('40141616', 0.00015275021580493458)]),
     ('40141636', [('40141636', 0.0007247510239255406)]),
     ('40141680', [('40141680', 0.12267031972561518)]),
     ('40142000', [('40142000', 0.0002962025923869601)]),
     ('40142100', [('40142100', 8.188292818389522e-05)]),
     ('40142315', [('40142315', 0.00034758467473980007)]),
     ('40142323', [('40142323', 0.0006308018171203779)]),
     ('40161500', [('40161500', 0.0004007446838176519)])]