I have the following list of objects that I want to sort into order based on dependencies. firstly the objects without dependencies would be first added to the list, then the batch that has dependencies on the first lot that was added, and so on and so on until all the items are removed from the list.
pp = [
{"name": 'pipeline13', "deps": 'pipeline11' },
{"name": 'pipeline1', "deps": 'pipeline4' },
{"name": 'pipeline4'},
{"name": 'pipeline2', "deps": 'pipeline4'},
{"name": 'pipeline3'},
{"name": 'pipeline5'},
{"name": 'pipeline6', "deps": 'pipeline2'},
{"name": 'pipeline7'},
{"name": 'pipeline8', "deps": 'pipeline2'},
{"name": 'pipeline9', "deps": 'pipeline3'},
{"name": 'pipeline10', "deps": 'pipeline1' },
{"name": 'pipeline11', "deps": 'pipeline10' }
]
Currently, I have the below code which works but it is not scalable nor very pythonic.
output = []
output_stage_1 = []
output_stage_2 = []
output_stage_3 = []
output_stage_4 = []
output_stage_5 = []
while pp:
for p in pp:
if not p.get('deps'):
output.append(p)
pp.remove(p)
if p.get('deps') in [i.get('name') for i in output]:
output_stage_1.append(p)
pp.remove(p)
if p.get('deps') in [i.get('name') for i in output_stage_1]:
output_stage_2.append(p)
pp.remove(p)
if p.get('deps') in [i.get('name') for i in output_stage_2]:
output_stage_3.append(p)
pp.remove(p)
if p.get('deps') in [i.get('name') for i in output_stage_3]:
output_stage_4.append(p)
pp.remove(p)
if p.get('deps') in [i.get('name') for i in output_stage_4]:
output_stage_5.append(p)
pp.remove(p)
print(output + output_stage_1 + output_stage_2 + output_stage_3 + output_stage_4 + output_stage_5)
Your could do it like this:
ordered = [ item["name"] for item in pp if "deps" not in item ]
while len(ordered) < len(pp):
for item in pp:
if "deps" not in item : continue
if item["name"] not in ordered and item["deps"] in ordered:
ordered.append(item["name"])
Note that I didn't optimize it so it could be a bit slow on large data sets.
[EDIT] here's an optimized version:
ordered = [ item for item in pp if "deps" not in item ]
dependents = [ (item["name"],item["deps"],item) for item in pp if "deps" in item]
included = set([item["name"] for item in ordered])
remaining = len(dependents)
while remaining > 0:
nextGroup = []
circularCount = remaining
for name,deps,item in dependents:
if deps in included:
ordered.append(item)
included.add(name)
remaining -= 1
else:
nextGroup.append((name,deps,item))
dependents = nextGroup
if remaining == circularCount : break
if remaining > 0 :
# There was a circular dependency error