I have a dataframe like this
Dt A B C D
11-apr 1 1 1 1
10-apr 2 3 1 2
how do I get a new frame that looks like this:
I want to have all possible combinations of ABCD
possible combinations : A+B, A+C, A+D,B+c,B+d,c+D,A+B+C,B+C+D,A+B+D,A+C+D,A+B+C+d
I am able to get all the combinations but I got the File "pandas/_libs/src/inference.pyx", line 1472, in pandas._libs.lib.map_infer TypeError: sequence item 2: expected string, float found when I tried to create these columns.
from itertools import chain, combinations
ss_list1=[]
ss_list2=[]
for subset in all_subsets(df):
ss_list=list(subset)
# print ss_list
ss_list_t=tuple(ss_list)
# print ss_list_t
ss_list1.insert(1,ss_list_t)
for c in ss_list1:
if len(c)>1:
# print c
ss_list2.insert(1, c)
print ss_list2
df = pd.concat([df[c[1]].add(df[c[0]]) for c in ss_list2], axis=1, keys=ss_list2)
df.columns = df.columns.map(','.join)
File "pandas/_libs/src/inference.pyx", line 1472, in pandas._libs.lib.map_infer TypeError: sequence item 2: expected string, float found
Use:
#create index from Dt column if necessary
df = df.set_index('Dt')
#https://stackoverflow.com/a/5898031
from itertools import chain, combinations
def all_subsets(ss):
return chain(*map(lambda x: combinations(ss, x), range(2, len(ss)+1)))
#get all combination from 2 to N
tups = list(all_subsets(df.columns))
#for each combination sum values
df1 = pd.concat([df.loc[:,c].sum(axis=1) for c in tups], axis=1)
#set new columns by join list of tuples tups
df1.columns = ['+'.join(x) for x in tups]
print (df1)
A+B A+C A+D B+C B+D C+D A+B+C A+B+D A+C+D B+C+D A+B+C+D
Dt
11-apr 2 2 2 2 2 2 3 3 3 3 4
10-apr 5 3 4 4 5 3 6 7 5 6 8