Search code examples
c++rfor-loopif-statementrcpp

How to convert R to C++ for Rcpp


The following R code chunk is very inefficient in terms of speed and I need it to be significantly faster as in the original problem the length(dt) is quite large.

Can you help me convert the following R code chunk to C++ in order to utilize RCPP function in R? My knowledge of C++ is very close to 0 and can't figure out how to do the conversion.

inity <- 0
cumy <- rep(0,length = length(dt))
dec.index <- 1
startingPoint <- 2
temp <- numeric()
repFlag <- F

for (i in 2:length(dt)){
  cumy[i] <- inity + rgamma(n = 1, shape = a*timedt, scale = b)
  inity <- cumy[i]
  
  if (dt[i] %in% decPoints){
    if (dt[i] %in% LTset){
      repFlag <- ifelse(cumy[i] >= LP, T, F)
    } else if (dt[i] %in% MainMDset && repFlag == T){
      genRanProb <- rbinom(1,1,(1-p1))
      cumy[i] <- inity*genRanProb
      inity <- cumy[i]
    } else if (dt[i] %in% ProbMDset && repFlag == T){
      genRanProb <- rbinom(1,1,pA)
      cumy[i] <- inity*genRanProb
      inity <- cumy[i]
    }
  }
}

If you want to run the code, you can use following values:

a <- 0.2
b <- 1
ph <- 1000
timedt <- 1
oppInt <- 90
dt <- seq(0,ph,timedt) 
LT <- 30
MainMDset <- seq(oppInt, ph, oppInt)
ProbMDset <- c(0,seq((oppInt + oppInt/2), ph, oppInt)) 
LTset <- sort(c(ProbMDset, MainMDset))
LTset <- LTset - LT
decPoints <- sort(c(LTset, ProbMDset, MainMDset))
decPoints <- decPoints[-which(decPoints < 0)]
decPoints[1] <- 1

p1 <- 0
pA <- 0.5
LP <- 40

Code for follow up question:

Rcpp::cppFunction("
 NumericVector cumyRes(double a, double b, double timedt, NumericVector dt, 
                       NumericVector ProbMDset, NumericVector MainMDset, 
                       NumericVector decPoints, double LP, double LT,
                       double p1, double pA, int ii, double x1, double x2){
  bool repFlag = false;
  int n = dt.size();
  double inity = 0;
  NumericVector out(n);
  std::unordered_set<double> sampleSetMd(MainMDset.begin(), MainMDset.end());
  std::unordered_set<double> sampleSetProb(ProbMDset.begin(), ProbMDset.end());
  std::unordered_set<double> sampleSetDec(decPoints.begin(), decPoints.end());
  
  for (int i = 1; i < n; ++i){
    ii = ii + 1;
    double d = dt[ii];
    out[ii] = inity + rgamma(1, a * timedt, b)[0];
    inity = out[ii];
    
    if (sampleSetDec.find(d) != sampleSetDec.end()) {
        if (sampleSetProb.find(d + LT) != sampleSetProb.end() ||
            sampleSetMd.find(d + LT) != sampleSetMd.end()) {
          repFlag = inity >= LP;
        } else if (sampleSetMd.find(d) != sampleSetMd.end() && repFlag) {
          double genRanProb = rbinom(1, 1, (1 - p1))[0];
          for (int j = ii; ii < (ii+10); ++j){
            out[j] = inity * genRanProb;
          }
          inity = inity * genRanProb;
          ii = ii + x1 - 1;
        } else if (sampleSetProb.find(d) != sampleSetProb.end() && repFlag) {
          double genRanProb = rbinom(1, 1, pA)[0];
          for (int j = ii; ii < (ii+5); ++j){
            out[j] = inity * genRanProb;
          }
          inity = inity * genRanProb;
          ii = ii + x2 - 1;
        }}}
  return out;
}")

Solution

  • There are several errors in your C++ code, probably too many to list in a concise answer. However, the following corrections seem to follow your logic, and compiles to give similar answers to your R loop in less time:

    Rcpp::cppFunction("
     NumericVector cumyRes(double a, double b, double timedt, NumericVector dt, 
                           NumericVector ProbMDset, NumericVector MainMDset, 
                           NumericVector decPoints, double LP, double LT,
                           double p1, double pA){
      bool repFlag = false;
      int n = dt.size();
      double inity = 0;
      NumericVector out(n);
      std::unordered_set<double> sampleSetMd(MainMDset.begin(), MainMDset.end());
      std::unordered_set<double> sampleSetProb(ProbMDset.begin(), ProbMDset.end());
      std::unordered_set<double> sampleSetDec(decPoints.begin(), decPoints.end());
      
      for (int i = 1; i < n; ++i){
        double d = dt[i];
        out[i] = inity + rgamma(1, a * timedt, b)[0];
        inity = out[i];
        
        if (sampleSetDec.find(d) != sampleSetDec.end()) {
            if (sampleSetProb.find(d + LT) != sampleSetProb.end() ||
                sampleSetMd.find(d + LT) != sampleSetMd.end()) {
              repFlag = inity >= LP;
            } else if (sampleSetMd.find(d) != sampleSetMd.end() && repFlag) {
              double genRanProb = rbinom(1, 1, (1 - p1))[0];
              out[i] = inity * genRanProb;
              inity = out[i];
            } else if (sampleSetProb.find(d) != sampleSetProb.end() && repFlag) {
              double genRanProb = rbinom(1, 1, pA)[0];
              out[i] = inity * genRanProb;
              inity = out[i];
            }}}
      return out;
    }")
    

    You could test it with:

    cumyRes(a, b, timedt, dt, ProbMDset, MainMDset, decPoints, LP, LT, p1, pA)