Search code examples
pythonpandasjson-normalize

Remove integer list keys from column headers


I have a Python script, which uses a function from a previous Stack Overflow solution.

from pandas import json_normalize
from collections.abc import MutableMapping as mm
def flatten(dictionary, parent_key=False, separator='.'):
    items = []
    for key, value in dictionary.items():
        new_key = str(parent_key) + separator + key if parent_key else key
        if isinstance(value, mm):
            items.extend(flatten(value, new_key, separator).items())
        elif isinstance(value, list):
            for k, v in enumerate(value):
                items.extend(flatten({str(k): v}, new_key).items())
        else:
            items.append((new_key, value))
    return dict(items)

d = { 
    "_id" : 1, 
    "labelId" : [
        6422
    ], 
    "levels" : [
        {
            "active" : "true", 
            "level" : 3, 
            "actions" : [
                {
                    "isActive" : "true"
                }]
        }]
}

x = flatten(d)

x = json_normalize(x)

print(x)

Current Output:

   _id  labelId.0 levels.0.active  levels.0.level levels.0.actions.0.isActive 
0    1       6422            true               3                        true                           

The issue I am having is the numeric keys which gets included in the column name. Is there a way I can amend my code in order to achieve my desired output?

Desired Output:

   _id    labelId   levels.active    levels.level     levels.actions.isActive 
0    1       6422            true               3                        true                           


Solution

  • First of all using parent_key as bool then assigning it other type value is not the best practice. It works but can become messy. I modified a code a bit, adding separate argument to track parent_key status as bool, and p_key which carry the string you wanted. Here is snippet

    from pandas import json_normalize
    from collections.abc import MutableMapping as mm
    def flatten(dictionary, p_key=None, parent_key=False, separator='.'):
        items = []
        for key, value in dictionary.items():
            if parent_key:
                new_key = f"{str(p_key)}{separator}{key}"
            else:
                new_key = p_key if p_key else key
            if isinstance(value, mm):
                items.extend(flatten(
                    dictionary=value,
                    p_key=new_key,
                    parent_key=True,
                    separator=separator).items())
            elif isinstance(value, list):
                for k, v in enumerate(value):
                    items.extend(flatten(
                        dictionary={str(k): v},
                        p_key=new_key,
                        parent_key=False,
                        separator=separator).items())
            else:
                items.append((new_key, value))
        return dict(items)
    
    d = { 
        "_id" : 1, 
        "labelId" : [
            6422
        ], 
        "levels" : [
            {
                "active" : "true", 
                "level" : 3, 
                "actions" : [
                    {
                        "isActive" : "true"
                    }]
            }]
    }
    
    x = flatten(d)
    
    x = json_normalize(x)
    
    print(x)