Search code examples
pythondataframerpy2arules

Package Arules in Python


I am using arules in Python. I executed the below code to generate all associations. I want to know how can i convert the output of arules to some data-structure in Python. The output if of type 'rpy2.robjects.methods.RS4'. --below is the code---

from rpy2.robjects import pandas2ri
pandas2ri.activate()
import collections
from collections import OrderedDict
import pandas as pd
import numpy as np
from rpy2.robjects.vectors import ListVector
from rpy2.robjects.packages import importr
arules = importr("arules")
od = OrderedDict()
od["supp"] = 0.0005
od["conf"] = 0.7
od["target"] = 'rules'
df = pd.DataFrame (
    [
        ['1','1', '1'],
        ['1', '0','0'],
        ['1', '1', '1'],
        ['1', '0', '0'],
        ['1', '1', '1'],
        ['1', '0', '1'],
        ['1', '1', '1'],
        ['0', '0', '1'],
        ['0', '1', '1'],
        ['1', '0', '1'],
    ],
    columns=list ('ABC')) 
result = ListVector(od)
df['A'] = df['A'].astype('category')
df['B'] = df['B'].astype('category')
df['C'] = df['C'].astype('category')
my_rules = arules.apriori(df, parameter=result)
print("herererererere")
print(type(my_rules))
print("rules")

Solution

  • Here is a minimalist example of how to do this:

    # prepare the data as a dataframe with boolean values
    import pandas as pd
    
    df = pd.DataFrame (
        [
            [True,True, True],
            [True, False,False],
            [True, True, True],
            [True, False, False],
            [True, True, True],
            [True, False, True],
            [True, True, True],
            [False, False, True],
            [False, True, True],
            [True, False, True],
        ],
        columns=list ('ABC')) 
    
    # set up rpy2
    from rpy2.robjects import pandas2ri
    pandas2ri.activate()
    import rpy2.robjects as ro
    from rpy2.robjects.packages import importr
    arules = importr("arules")
    
    # run apriori
    itsets = arules.apriori(df, 
       parameter = ro.ListVector({"supp": 0.1, "target": "frequent itemsets"}))
    
    # get itemsets as a dataframe
    print(arules.DATAFRAME(itsets))
    
    # get quality as a dataframe
    print(itsets.slots["quality"])
    
    # get itemsets as a matrix
    itemset_as_matrix = ro.r('function(x) as(items(x), "matrix")')
    itemset_as_matrix(itsets)