pythonpandascollectionscombinationspython-itertools

Iterate through rows in data frame with n rows and n columns and count the frequency of combinations 1:n


Iterate through rows in data frame with n rows and 6 columns and count the frequency of combinations 1:n within each row.

Non working template code:

import pandas as pd
import itertools
from collections import Counter

# create sample data
df = pd.DataFrame([
    [2, 10, 18, 31, 41],
   [12, 27, 28, 39, 42]
])

def get_combinations(row)
  all_combinations[]
  for i in range(1, len(df)+1):
    result = list(itertools.combinations(df, i))
    return all_combinations

# get all posssible combinations of values in a row
all_rows = df.apply(get_combinations, 1).values
all_rows_flatten = list(itertools.chain.from_iterable(all_rows))

# use Counter to count how many there are of each combination
count_combinations = Counter(all_rows_flatten)
print(all_combinations["count_combinations"])

Solution

  • IIUC, you can try:

    import itertools
    from collections import Counter
    
    import pandas as pd
    
    df = pd.DataFrame([[2, 10, 18, 31, 41], [12, 27, 28, 39, 42], [12, 4, 18, 6, 41]])
    
    
    def get_combinations(row):
        all_combs = [
            c for i in range(1, len(row) + 1) for c in itertools.combinations(row, i)
        ]
        return all_combs
    
    
    df["all_combs"] = df.apply(get_combinations, axis=1)
    
    print(df)
    
    cnt = Counter(c for combs in df["all_combs"] for c in combs)
    print(cnt)
    

    Prints:

        0   1   2   3   4                                                                                                                                                                                                                                                                                                                                                                                            all_combs
    0   2  10  18  31  41                  [(2,), (10,), (18,), (31,), (41,), (2, 10), (2, 18), (2, 31), (2, 41), (10, 18), (10, 31), (10, 41), (18, 31), (18, 41), (31, 41), (2, 10, 18), (2, 10, 31), (2, 10, 41), (2, 18, 31), (2, 18, 41), (2, 31, 41), (10, 18, 31), (10, 18, 41), (10, 31, 41), (18, 31, 41), (2, 10, 18, 31), (2, 10, 18, 41), (2, 10, 31, 41), (2, 18, 31, 41), (10, 18, 31, 41), (2, 10, 18, 31, 41)]
    1  12  27  28  39  42  [(12,), (27,), (28,), (39,), (42,), (12, 27), (12, 28), (12, 39), (12, 42), (27, 28), (27, 39), (27, 42), (28, 39), (28, 42), (39, 42), (12, 27, 28), (12, 27, 39), (12, 27, 42), (12, 28, 39), (12, 28, 42), (12, 39, 42), (27, 28, 39), (27, 28, 42), (27, 39, 42), (28, 39, 42), (12, 27, 28, 39), (12, 27, 28, 42), (12, 27, 39, 42), (12, 28, 39, 42), (27, 28, 39, 42), (12, 27, 28, 39, 42)]
    2  12   4  18   6  41                                  [(12,), (4,), (18,), (6,), (41,), (12, 4), (12, 18), (12, 6), (12, 41), (4, 18), (4, 6), (4, 41), (18, 6), (18, 41), (6, 41), (12, 4, 18), (12, 4, 6), (12, 4, 41), (12, 18, 6), (12, 18, 41), (12, 6, 41), (4, 18, 6), (4, 18, 41), (4, 6, 41), (18, 6, 41), (12, 4, 18, 6), (12, 4, 18, 41), (12, 4, 6, 41), (12, 18, 6, 41), (4, 18, 6, 41), (12, 4, 18, 6, 41)]
    
    Counter(
        {
            (18,): 2,
            (41,): 2,
            (18, 41): 2,
            (12,): 2,
            (2,): 1,
            (10,): 1,
            (31,): 1,
            (2, 10): 1,
            (2, 18): 1,
            (2, 31): 1,
            (2, 41): 1,
            (10, 18): 1,
    
    ...
    

    To convert cnt dict to a dataframe:

    df_cnt = pd.DataFrame({"comb": cnt.keys(), "counts": cnt.values()})