Search code examples
pythonrrpy2

How to predict using rpy2?


I have a R code to predict when hard disk will be full.

$ duinfo <- read.table('duinfo.dat', colClasses=c("Date","numeric"), col.names=c("day","usd"))
$ attach(duinfo)
$ totalspace <- 500000
$ model <- lm(day ~ usd)
$ as.Date(predict(model, data.frame(usd = totalspace)), origin="1970-01-01")

I have the following code in python to get the predicted date when disk will be full, as follows:

import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
utils = importr('utils')

data = robjects.r(
    'read.table(file = "duinfo.dat", colClasses=c("Date","numeric"), col.names=c("day","usd"))')
robjects.r.attach(data)
totalspace = 500000
c = robjects.IntVector((2,0))
usd = data.rx(True, c)
c = robjects.IntVector((1,0))
day = data.rx(True, c)
fmla = robjects.Formula('day ~ usd')
stats = importr('stats')
model = stats.lm(fmla)

How do I convert the last statement to python code using rpy2?


Solution

  • I was able to convert the code to python:

    import rpy2.robjects as robjects
    from rpy2.robjects.packages import importr
    utils = importr('utils')
    stats = importr('stats')
    base = importr("base")
    
    data = robjects.r(
        'read.table(file = "duinfo.dat", colClasses=c("Date","numeric"))')
    # data = robjects.r(
    #    'read.table(file = "duinfo.dat", colClasses=c("Date","numeric"), col.names=c("days","used"))')
    robjects.r.attach(data)
    totalspace = 500000
    # Get used data
    c = robjects.IntVector((2,0))
    used = data.rx(True, c)
    
    # Get Days
    c = robjects.IntVector((1,0))
    days = data.rx(True, c)
    robjects.globalenv["used"] = used
    robjects.globalenv["days"] = days
    #
    model = stats.lm('days ~ used')
    dataf = robjects.DataFrame({})
    d = {'used': totalspace}
    dataf = robjects.DataFrame(d)
    # Use the predict function to extrapolate the model
    end_date = base.as_Date(robjects.r.predict(model, dataf), origin="1970-01-01")
    rStr = base.format(end_date, format = "%d/%m/%Y")
    print(rStr[0])