What are the possible ways to unnest a list of nested json objects inside a dataframe column and join again with the same dataframe.
e.g.:
data = {
'tax_info': [
'[]',
"[{'price': '1.26', 'rate': 0.06, 'title': 'Idaho State Tax', 'price_set': {'shop_money': {'amount': '1.26', 'currency_code': 'USD'}, 'presentment_money': {'amount': '1.26', 'currency_code': 'USD'}}, 'channel_liable': False}]",
"[{'price': '1.98', 'rate': 0.0625, 'title': 'Illinois State Tax', 'price_set': {'shop_money': {'amount': '1.98', 'currency_code': 'USD'}, 'presentment_money': {'amount': '1.98', 'currency_code': 'USD'}}, 'channel_liable': False}, {'price': '0.39', 'rate': 0.0125, 'title': 'Chicago City Tax', 'price_set': {'shop_money': {'amount': '0.39', 'currency_code': 'USD'}, 'presentment_money': {'amount': '0.39', 'currency_code': 'USD'}}, 'channel_liable': False}, {'price': '0.87', 'rate': 0.0275, 'title': 'Cook County Tax', 'price_set': {'shop_money': {'amount': '0.87', 'currency_code': 'USD'}, 'presentment_money': {'amount': '0.87', 'currency_code': 'USD'}}, 'channel_liable': False}]"
]
}
I don't know if pandas has anything for parsing stringified data like this, and json.loads
would not work since that's not valid JSON, but you could use ast.literal_eval
and pd.json_normalize
as below:
# import ast
data = {'tax_info': [[], [{'price': '1.26', 'rate': 0.06, 'title': 'Idaho State Tax', 'price_set': {'shop_money': {'amount': '1.26', 'currency_code': 'USD'}, 'presentment_money': {'amount': '1.26', 'currency_code': 'USD'}}, 'channel_liable': False}], [{'price': '1.98', 'rate': 0.0625, 'title': 'Illinois State Tax', 'price_set': {'shop_money': {'amount': '1.98', 'currency_code': 'USD'}, 'presentment_money': {'amount': '1.98', 'currency_code': 'USD'}}, 'channel_liable': False}, {'price': '0.39', 'rate': 0.0125, 'title': 'Chicago City Tax', 'price_set': {'shop_money': {'amount': '0.39', 'currency_code': 'USD'}, 'presentment_money': {'amount': '0.39', 'currency_code': 'USD'}}, 'channel_liable': False}, {'price': '0.87', 'rate': 0.0275, 'title': 'Cook County Tax', 'price_set': {'shop_money': {'amount': '0.87', 'currency_code': 'USD'}, 'presentment_money': {'amount': '0.87', 'currency_code': 'USD'}}, 'channel_liable': False}]]}
data['tax_info'] = [ast.literal_eval(s) for s in data['tax_info']]
df = pd.json_normalize(data, ['tax_info', []])
# in one statement, and without altering data:
# pd.json_normalize({**data, 'tax_info': [ast.literal_eval(s) for s in data['tax_info']]}, ['tax_info', []])