I have a Python script, which uses a function from a previous Stack Overflow solution.
from pandas import json_normalize
from collections.abc import MutableMapping as mm
def flatten(dictionary, parent_key=False, separator='.'):
items = []
for key, value in dictionary.items():
new_key = str(parent_key) + separator + key if parent_key else key
if isinstance(value, mm):
items.extend(flatten(value, new_key, separator).items())
elif isinstance(value, list):
for k, v in enumerate(value):
items.extend(flatten({str(k): v}, new_key).items())
else:
items.append((new_key, value))
return dict(items)
d = {
"_id" : 1,
"labelId" : [
6422
],
"levels" : [
{
"active" : "true",
"level" : 3,
"actions" : [
{
"isActive" : "true"
}]
}]
}
x = flatten(d)
x = json_normalize(x)
print(x)
Current Output:
_id labelId.0 levels.0.active levels.0.level levels.0.actions.0.isActive
0 1 6422 true 3 true
The issue I am having is the numeric keys which gets included in the column name. Is there a way I can amend my code in order to achieve my desired output?
Desired Output:
_id labelId levels.active levels.level levels.actions.isActive
0 1 6422 true 3 true
First of all using parent_key
as bool then assigning it other type value is not the best practice. It works but can become messy. I modified a code a bit, adding separate argument to track parent_key
status as bool, and p_key
which carry the string you wanted. Here is snippet
from pandas import json_normalize
from collections.abc import MutableMapping as mm
def flatten(dictionary, p_key=None, parent_key=False, separator='.'):
items = []
for key, value in dictionary.items():
if parent_key:
new_key = f"{str(p_key)}{separator}{key}"
else:
new_key = p_key if p_key else key
if isinstance(value, mm):
items.extend(flatten(
dictionary=value,
p_key=new_key,
parent_key=True,
separator=separator).items())
elif isinstance(value, list):
for k, v in enumerate(value):
items.extend(flatten(
dictionary={str(k): v},
p_key=new_key,
parent_key=False,
separator=separator).items())
else:
items.append((new_key, value))
return dict(items)
d = {
"_id" : 1,
"labelId" : [
6422
],
"levels" : [
{
"active" : "true",
"level" : 3,
"actions" : [
{
"isActive" : "true"
}]
}]
}
x = flatten(d)
x = json_normalize(x)
print(x)