Search code examples
pythonobjectlambdagroup-by

Python lambda groupby with aggagration result in syntax error


Hello i am struggling with the lambda groupby function in combination with a nested structure to get the result in the structure as shown in the example below:

Target structure

#  This already works! ########################################################
# GM0014": {
# "i1401": {
# "score": 1.178,
# "rawScore": -1.178,
# "year": "2019",
# "id": "i1401"
# },
# "i1021": {
# "score": 1.838,
# "rawScore": -1.838,
# "year": "2020",
# "id": "i1021"
# },
# "i1022": {
# "score": 0.496,
# "rawScore": -0.496,
# "year": "2020",
# "id": "i1022"
# },
# "i1013": {
# "score": 0.415,
# "rawScore": 0.415,
# "year": "2020",
# "id": "i1013"
# },
#  This does not work! ########################################################
# "overAll": {
# "score": 0.982,
# "rawScore": -0.774

I use here fore the below dataset. This values should be transformed to the "targetstructure" as shown above. I use the scoreMax and rawScoremax for the Overall section.

data = [  {'region': 'GM0014', 'variable': 'i1013', 'year': '2020', 'score': 0.415, 'rawScore': 0.415, 'scoreMax': 0.982, 'rawScoreMax': -0.774 }
        , {'region': 'GM0014', 'variable': 'i1021', 'year': '2020', 'score': -1.838, 'rawScore': 1.838, 'scoreMax': 0.982, 'rawScoreMax': -0.774}
        , {'region': 'GM0014', 'variable': 'i1022', 'year': '2020', 'score': -0.496, 'rawScore': 0.496, 'scoreMax': 0.982, 'rawScoreMax': -0.774}
        , {'region': 'GM0014', 'variable': 'i1401', 'year': '2019', 'score': -1.178, 'rawScore': 1.178, 'scoreMax': 0.982, 'rawScoreMax': -0.774}
        , {'region': 'GM0034', 'variable': 'i1013', 'year': '2020', 'score': -0.913, 'rawScore': -0.913, 'scoreMax': -0.071, 'rawScoreMax': -0.385 }
        , {'region': 'GM0034', 'variable': 'i1021', 'year': '2020', 'score': -0.244, 'rawScore': 0.244, 'scoreMax': -0.071, 'rawScoreMax': -0.385}
        , {'region': 'GM0034', 'variable': 'i1022', 'year': '2020', 'score': -0.332, 'rawScore': 0.332, 'scoreMax': -0.071, 'rawScoreMax': -0.385}
        , {'region': 'GM0034', 'variable': 'i1401', 'year': '2019', 'score': -0.053, 'rawScore': 0.053, 'scoreMax': -0.071, 'rawScoreMax': -0.385}
        , {'region': 'GM0037', 'variable': 'i1013', 'year': '2020', 'score': 0.487, 'rawScore': 0.487, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
        , {'region': 'GM0037', 'variable': 'i1021', 'year': '2020', 'score': -2.172, 'rawScore': 2.172, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
        , {'region': 'GM0037', 'variable': 'i1022', 'year': '2020', 'score': -1.654, 'rawScore': 1.654, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
        , {'region': 'GM0037', 'variable': 'i1401', 'year': '2019', 'score': 1.236, 'rawScore': -1.236, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
        , {'region': 'GM0047', 'variable': 'i1013', 'year': '2020', 'score': 0.885, 'rawScore': 0.885, 'scoreMax': 0.562, 'rawScoreMax': -0.12}
        , {'region': 'GM0047', 'variable': 'i1021', 'year': '2020', 'score': -2.19, 'rawScore': 2.19, 'scoreMax': 0.562, 'rawScoreMax': -0.12}
        , {'region': 'GM0047', 'variable': 'i1022', 'year': '2020', 'score': -1.542, 'rawScore': 1.542, 'scoreMax': 0.562, 'rawScoreMax': -0.12}
        , {'region': 'GM0047', 'variable': 'i1401', 'year': '2019', 'score': 2.368, 'rawScore': -2.368, 'scoreMax': 0.562, 'rawScoreMax': -0.12}]

This is code works except for the Overall section

 Group by function with Lambda
 test = {key : {l['variable'] : { 'score'   : l['score']
                                 ,'rawscore': l['rawScore']
                                 ,'rawscore': l['rawScore']
                                 ,'year'    : l['year']
                                 ,'id'      : l['variable']
                                
 } for l in lines}
         for key, lines in  itertools.groupby(data, lambda p: p['region']) }

To get the "OverAll" section to work i try to modify the above code to the code below:

test = {key : {l['variable'] : { 'score'   : l['score']
                                ,'rawscore': l['rawScore']
                                ,'rawscore': l['rawScore']
                                ,'year'    : l['year']
                                ,'id'      : l['variable']
                                } 
                    for l in lines } 
            {'overAll': { 'score'    : l['scoreMax']
                         ,'rawscore' : l['rawScoreMax']
                        }
        for key, lines in  itertools.groupby(data, lambda p: p['region']) }}

But get the error:

{'overAll': { 'score' : l['scoreMax'] ^ SyntaxError: invalid syntax

Can you please help me! Mann thansk.


Solution

  • I hope I've understood your question right:

    from itertools import groupby
    
    out = {}
    for k, g in groupby(data, lambda p: p["region"]):
        g = list(g)
    
        out[k] = {
            "overAll": {"score": g[0]["scoreMax"], "rawscore": g[0]["rawScoreMax"]}
        }
        for d in g:
            out[k][d["variable"]] = d
            del out[k][d["variable"]]["scoreMax"]
            del out[k][d["variable"]]["rawScoreMax"]
    
    print(out)
    

    Prints:

    {
        "GM0014": {
            "overAll": {"score": 0.982, "rawscore": -0.774},
            "i1013": {
                "region": "GM0014",
                "variable": "i1013",
                "year": "2020",
                "score": 0.415,
                "rawScore": 0.415,
            },
            "i1021": {
                "region": "GM0014",
                "variable": "i1021",
                "year": "2020",
                "score": -1.838,
                "rawScore": 1.838,
            },
            "i1022": {
                "region": "GM0014",
                "variable": "i1022",
                "year": "2020",
                "score": -0.496,
                "rawScore": 0.496,
            },
            "i1401": {
                "region": "GM0014",
                "variable": "i1401",
                "year": "2019",
                "score": -1.178,
                "rawScore": 1.178,
            },
        },
        "GM0034": {
            "overAll": {"score": -0.071, "rawscore": -0.385},
            "i1013": {
                "region": "GM0034",
                "variable": "i1013",
                "year": "2020",
                "score": -0.913,
                "rawScore": -0.913,
            },
            "i1021": {
                "region": "GM0034",
                "variable": "i1021",
                "year": "2020",
                "score": -0.244,
                "rawScore": 0.244,
            },
    
    ...