I am working on a Product Bundle creation and recommendation project. The bundling and recommendation have to happen in real-time based on user input.
The conditions are that 1.The product bundle should be such that it covers the user inputs as much as possible. 2. And there should be less duplication of user inputs across the recommends items.
user_input=['a','b','c']
d1=['a','c','d']
d2=['a','b','e','f']
d3=['a','c','b','f']
d4=['b']
d5=['g','e','a']
d6=['g']
expected output - d1 + d4, d3, d1+d2,d5+d4+d1
Following is my code, it gives results but it shows duplicate results and also doesn't show all the combinations. Any help is appreciated.
dlist=[d1,d2,d3,d4,d5,d6]
diff_match=[]
# find match n diff of each product based on user input
for i in range(len(dlist)):
match=set(user_input).intersection(set(dlist[i]))
#print("match is",match)
diff=set(user_input).difference(set(dlist[i]))
#print("diff is",diff)
temp={'match':match,'diff':diff}
diff_match.append(temp)
for i in range(len(diff_match)):
# if match is found, recommend the product alone
diff_match_temp=diff_match[i]['match']
print("diff match temp is",diff_match_temp)
if diff_match_temp==user_input:
print ("absolute match")
#scenario where the user input is subset of product features, seperate from partial match
elif (all(x in list(diff_match_temp) for x in list(user_input))):
print("User input subset of product features")
print("The parent list is",diff_match[i]['match'])
print("the product is", dlist[i])
else:
'''else check if the difference between user input and the current product is fulfilled by other product,
if yes, these products are bundled together'''
for j in range(len(diff_match)):
temp_diff=diff_match[i]['diff']
print("temp_diff is",temp_diff)
# empty set should be explicitly checked to avoid wrong match
if (temp_diff.intersection(diff_match[j]['match'])==temp_diff and len(temp_diff)!=0 and list(temp_diff) != user_input) :
#if temp_diff==diff_match[j]['match'] and len(temp_diff)!=0 and list(temp_diff) != user_input :
print("match found with another product")
print("parent is",dlist[i])
print("the combination is",dlist[j] )
To do it recursively, you could create a function that returns the product with the largest component coverage and recurses to complete the bundles with the remaining components:
def getBundles(C,P):
cSet = set(C) # use set operations, largest to smallest coverage
coverage = sorted(P.items(),key=lambda pc:-len(cSet.intersection(pc[1])))
for i,(p,cs) in enumerate(coverage):
if cSet.isdisjoint(cs):continue # no coverage
if cSet.issubset(cs): yield [p];continue # complete (stop recursion)
remaining = cSet.difference(cs) # remaining components to bundle
unused = dict(coverage[i+1:]) # products not already bundled
yield from ([p]+rest for rest in getBundles(remaining,unused))
output:
prods = {"d1":['a','c','d'],
"d2":['a','b','e','f'],
"d3":['a','c','b','f'],
"d4":['b'],
"d5":['g','e','a'],
"d6":['g']}
user_input=['a','b','c']
for bundle in getBundles(user_input,prods):
print(bundle)
['d3']
['d1', 'd2']
['d1', 'd4']
Note that redundant combinations such as ['d5','d1','d2'] are excluded because d1+d2 covers everything that d5 covers and more, so d5 is redundant. Permutations of the same products are also excluded (e.g. d1+d2 is the same as d2+d1)
[EDIT]
If you need to offer redundant combinations (maybe as an extended selection option), you can write a slightly different recursive function that won't exclude them. You should also sort the result in order of closest fit when presenting them:
def getBundles(C,P,remain=None,bundle=None):
cSet = set(C) # use set operations
if remain is None: remain,bundle = cSet,[] # track coverage & budle
prods = list(P.items()) # remaining products
for i,(p,cs) in enumerate(prods):
if cSet.isdisjoint(cs):continue # no coverage
newBundle = bundle+[p] # add product to bundle
if remain.issubset(cs): yield newBundle # full coverage bundle
toCover = remain.difference(cs) # not yet covered
unused = dict(prods[i+1:]) # remainin products
yield from getBundles(C,unused,toCover,newBundle) # recurse for rest
output:
prods = {"d1":['a','c','d'],
"d2":['a','b','e','f'],
"d3":['a','c','b','f'],
"d4":['b'],
"d5":['g','e','a'],
"d6":['g']}
user_input=['a','b','c']
for bundle in sorted(getBundles(user_input,prods),
key=lambda b:sum(map(len,(prods[p] for p in b)))):
print(bundle)
['d1', 'd4']
['d3']
['d3', 'd4']
['d1', 'd2']
['d1', 'd3']
['d1', 'd4', 'd5']
['d3', 'd5']
['d1', 'd2', 'd4']
['d1', 'd3', 'd4']
['d2', 'd3']
['d3', 'd4', 'd5']
['d2', 'd3', 'd4']
['d1', 'd2', 'd5']
['d1', 'd3', 'd5']
['d1', 'd2', 'd3']
['d1', 'd2', 'd4', 'd5']
['d1', 'd3', 'd4', 'd5']
['d2', 'd3', 'd5']
['d1', 'd2', 'd3', 'd4']
['d2', 'd3', 'd4', 'd5']
['d1', 'd2', 'd3', 'd5']
['d1', 'd2', 'd3', 'd4', 'd5']