pythonpandascollectionscombinationspython-itertools

# Iterate through rows in data frame with n rows and n columns and count the frequency of combinations 1:n

Iterate through rows in data frame with `n` rows and 6 columns and count the frequency of combinations `1:n` within each row.

Non working template code:

``````import pandas as pd
import itertools
from collections import Counter

# create sample data
df = pd.DataFrame([
[2, 10, 18, 31, 41],
[12, 27, 28, 39, 42]
])

def get_combinations(row)
all_combinations[]
for i in range(1, len(df)+1):
result = list(itertools.combinations(df, i))
return all_combinations

# get all posssible combinations of values in a row
all_rows = df.apply(get_combinations, 1).values
all_rows_flatten = list(itertools.chain.from_iterable(all_rows))

# use Counter to count how many there are of each combination
count_combinations = Counter(all_rows_flatten)
print(all_combinations["count_combinations"])
``````

Solution

• IIUC, you can try:

``````import itertools
from collections import Counter

import pandas as pd

df = pd.DataFrame([[2, 10, 18, 31, 41], [12, 27, 28, 39, 42], [12, 4, 18, 6, 41]])

def get_combinations(row):
all_combs = [
c for i in range(1, len(row) + 1) for c in itertools.combinations(row, i)
]
return all_combs

df["all_combs"] = df.apply(get_combinations, axis=1)

print(df)

cnt = Counter(c for combs in df["all_combs"] for c in combs)
print(cnt)
``````

Prints:

``````    0   1   2   3   4                                                                                                                                                                                                                                                                                                                                                                                            all_combs
0   2  10  18  31  41                  [(2,), (10,), (18,), (31,), (41,), (2, 10), (2, 18), (2, 31), (2, 41), (10, 18), (10, 31), (10, 41), (18, 31), (18, 41), (31, 41), (2, 10, 18), (2, 10, 31), (2, 10, 41), (2, 18, 31), (2, 18, 41), (2, 31, 41), (10, 18, 31), (10, 18, 41), (10, 31, 41), (18, 31, 41), (2, 10, 18, 31), (2, 10, 18, 41), (2, 10, 31, 41), (2, 18, 31, 41), (10, 18, 31, 41), (2, 10, 18, 31, 41)]
1  12  27  28  39  42  [(12,), (27,), (28,), (39,), (42,), (12, 27), (12, 28), (12, 39), (12, 42), (27, 28), (27, 39), (27, 42), (28, 39), (28, 42), (39, 42), (12, 27, 28), (12, 27, 39), (12, 27, 42), (12, 28, 39), (12, 28, 42), (12, 39, 42), (27, 28, 39), (27, 28, 42), (27, 39, 42), (28, 39, 42), (12, 27, 28, 39), (12, 27, 28, 42), (12, 27, 39, 42), (12, 28, 39, 42), (27, 28, 39, 42), (12, 27, 28, 39, 42)]
2  12   4  18   6  41                                  [(12,), (4,), (18,), (6,), (41,), (12, 4), (12, 18), (12, 6), (12, 41), (4, 18), (4, 6), (4, 41), (18, 6), (18, 41), (6, 41), (12, 4, 18), (12, 4, 6), (12, 4, 41), (12, 18, 6), (12, 18, 41), (12, 6, 41), (4, 18, 6), (4, 18, 41), (4, 6, 41), (18, 6, 41), (12, 4, 18, 6), (12, 4, 18, 41), (12, 4, 6, 41), (12, 18, 6, 41), (4, 18, 6, 41), (12, 4, 18, 6, 41)]

Counter(
{
(18,): 2,
(41,): 2,
(18, 41): 2,
(12,): 2,
(2,): 1,
(10,): 1,
(31,): 1,
(2, 10): 1,
(2, 18): 1,
(2, 31): 1,
(2, 41): 1,
(10, 18): 1,

...
``````

To convert `cnt` dict to a dataframe:

``````df_cnt = pd.DataFrame({"comb": cnt.keys(), "counts": cnt.values()})
``````