I have a dataframe like this:
from to
B C
A B
D E
D F
B D
I want to create a nested dictionary than contains from
and to
values. The final output will look like this:
{ "A": {
"B": {
"C":
"D": {
"E": {},
"F": {},
}
}
}
}
My code is:
d = dict()
curr = d
for p, c in [("B", "C"), ("A", "B"), ("D", "E"), ("D", "F"), ("B", "D")]:
# First time
if not d:
d.setdefault(p, {}).setdefault(c, {})
# Check if p is a parent
if p in d:
curr = curr.setdefault(p, {}).setdefault(c, {})
else:
if c in d:
curr = d
curr.setdefault(p, {}).setdefault(c, d.get(c))
del d[c]
else:
curr = d
for k, v in curr.items():
if p in v:
curr = v
curr.setdefault(p, {}).setdefault(c, {})
else:
curr.setdefault(p, {}).setdefault(c, {})
The result is:
{'A': {'B': {'C': {}}},
'B': {'D': {'E': {},
'F': {}}}}
I don't know how to check whether one key is already a parent or a child and substitute if it's already a child/parent
How about this
pairs = [("B", "C"), ("A", "B"), ("D", "E"), ("D", "F"), ("B", "D")]
d = {}
for a, b in pairs:
d.setdefault(a, {})[b] = d.setdefault(b, {})
ok = {'A': d['A']}
print(ok)
prints
{'A': {'B': {'C': {}, 'D': {'E': {}, 'F': {}}}}}
Here is version that is a bit more dynamic and supports multiple top level keys.
import json
pairs = [
("B", "C"),
("A", "B"),
("D", "E"),
("D", "F"),
("B", "D"),
("z", "x")
]
d = {}
something_points_to = set()
for a, b in pairs:
something_points_to.add(b)
d.setdefault(a, {})[b] = d.setdefault(b, {})
root_keys = [key for key in d if key not in something_points_to]
final_result = {key: d[key] for key in root_keys}
print(json.dumps(final_result, indent=4))
That should give you:
{
"A": {
"B": {
"C": {},
"D": {
"E": {},
"F": {}
}
}
},
"z": {
"x": {}
}
}