python pandas collections combinations python-itertools

Iterate through rows in data frame with n rows and n columns and count the frequency of combinations 1:n

Iterate through rows in data frame with n rows and 6 columns and count the frequency of combinations 1:n within each row.

Non working template code:

import pandas as pd
import itertools
from collections import Counter

# create sample data
df = pd.DataFrame([
    [2, 10, 18, 31, 41],
   [12, 27, 28, 39, 42]
])

def get_combinations(row)
  all_combinations[]
  for i in range(1, len(df)+1):
    result = list(itertools.combinations(df, i))
    return all_combinations

# get all posssible combinations of values in a row
all_rows = df.apply(get_combinations, 1).values
all_rows_flatten = list(itertools.chain.from_iterable(all_rows))

# use Counter to count how many there are of each combination
count_combinations = Counter(all_rows_flatten)
print(all_combinations["count_combinations"])

Solution

IIUC, you can try:

import itertools
from collections import Counter

import pandas as pd

df = pd.DataFrame([[2, 10, 18, 31, 41], [12, 27, 28, 39, 42], [12, 4, 18, 6, 41]])


def get_combinations(row):
    all_combs = [
        c for i in range(1, len(row) + 1) for c in itertools.combinations(row, i)
    ]
    return all_combs


df["all_combs"] = df.apply(get_combinations, axis=1)

print(df)

cnt = Counter(c for combs in df["all_combs"] for c in combs)
print(cnt)

Prints:

    0   1   2   3   4                                                                                                                                                                                                                                                                                                                                                                                            all_combs
0   2  10  18  31  41                  [(2,), (10,), (18,), (31,), (41,), (2, 10), (2, 18), (2, 31), (2, 41), (10, 18), (10, 31), (10, 41), (18, 31), (18, 41), (31, 41), (2, 10, 18), (2, 10, 31), (2, 10, 41), (2, 18, 31), (2, 18, 41), (2, 31, 41), (10, 18, 31), (10, 18, 41), (10, 31, 41), (18, 31, 41), (2, 10, 18, 31), (2, 10, 18, 41), (2, 10, 31, 41), (2, 18, 31, 41), (10, 18, 31, 41), (2, 10, 18, 31, 41)]
1  12  27  28  39  42  [(12,), (27,), (28,), (39,), (42,), (12, 27), (12, 28), (12, 39), (12, 42), (27, 28), (27, 39), (27, 42), (28, 39), (28, 42), (39, 42), (12, 27, 28), (12, 27, 39), (12, 27, 42), (12, 28, 39), (12, 28, 42), (12, 39, 42), (27, 28, 39), (27, 28, 42), (27, 39, 42), (28, 39, 42), (12, 27, 28, 39), (12, 27, 28, 42), (12, 27, 39, 42), (12, 28, 39, 42), (27, 28, 39, 42), (12, 27, 28, 39, 42)]
2  12   4  18   6  41                                  [(12,), (4,), (18,), (6,), (41,), (12, 4), (12, 18), (12, 6), (12, 41), (4, 18), (4, 6), (4, 41), (18, 6), (18, 41), (6, 41), (12, 4, 18), (12, 4, 6), (12, 4, 41), (12, 18, 6), (12, 18, 41), (12, 6, 41), (4, 18, 6), (4, 18, 41), (4, 6, 41), (18, 6, 41), (12, 4, 18, 6), (12, 4, 18, 41), (12, 4, 6, 41), (12, 18, 6, 41), (4, 18, 6, 41), (12, 4, 18, 6, 41)]

Counter(
    {
        (18,): 2,
        (41,): 2,
        (18, 41): 2,
        (12,): 2,
        (2,): 1,
        (10,): 1,
        (31,): 1,
        (2, 10): 1,
        (2, 18): 1,
        (2, 31): 1,
        (2, 41): 1,
        (10, 18): 1,

...

To convert cnt dict to a dataframe:

df_cnt = pd.DataFrame({"comb": cnt.keys(), "counts": cnt.values()})