Search code examples
rparallel.foreachdoparallel

%dopar% in R does not work properly when try to use parallel


%dopar% in R does not work properly when try to use parallel

First,I'm sorry that my english is not good,only in stackoverflow i found similar questions but cant understand how to deal :(

Here is my original code:

library(cramer)
n<-50
theta<-seq(-2,2,0.2)
ks<-rep(0,21)
cvm<-rep(0,21)
ks2<-rep(0,21)
cvm2<-rep(0,21)
for (k in 1:21) {
  for (i in 1:5) {
    X<-runif(n)
    Y<-runif(n,min=0,max=1/(1+theta[k]/sqrt(10)))
    ks[k]<-ks[k]+(ks.test(X,Y)$p.value<=0.05)
    cvm[k]<-cvm[k]+(cramer.test(X,Y)$p.value<=0.05)
    ks2[k]<-ks2[k]+(ks.test(X,Y)$p.value<=0.05)
    cvm2[k]<-cvm2[k]+(cramer.test(X,Y)$p.value<=0.05)
  }
}

ks<-ks
cvm<-cvm
ks2<-ks2
cvm2<-cvm2

When it's done ,we can find that the values changed like this

> ks
 [1] 5 5 5 5 5 4 4 2 0 0 0 0 0 1 3 4 5 5 5 5 5
> cvm
 [1] 5 5 5 5 5 4 5 1 2 1 0 1 1 1 2 5 5 5 5 5 5
> ks2
 [1] 5 5 5 5 5 4 4 2 0 0 0 0 0 1 3 4 5 5 5 5 5
> cvm2
 [1] 5 5 5 5 5 4 5 1 2 1 0 1 1 1 2 5 5 5 5 5 5

For speed up my calculate ,i try to use foreach ,so the new code i try is :

library("cramer")
library("foreach")
library("doParallel")

n<-50
theta<-seq(-2,2,0.2)
ks<-rep(0,21)
cvm<-rep(0,21)
ks2<-rep(0,21)
cvm2<-rep(0,21)

cl<-parallel::makeCluster(5,outfile="debug.txt")
doParallel::registerDoParallel(cl)

foreach (k = 1:21,.combine =list,.multicombine = TRUE,.inorder=TRUE,.packages = "cramer") %dopar% {
   cat("start to calculate \n", file = paste0("debug_file_", k, ".txt"))

 for (i in 1:6)  {
    cat("It's the ",k,"progress","now the runtimes is ",i,"\n", file = paste0("debug_file_", k, ".txt"),append=T)
    X<-runif(n)
    cat("X is",ks[k],"\n", file = paste0("debug_file_", k, ".txt"),append=T)
    Y<-runif(n,min=0,max=1/(1+theta[k]/sqrt(10)))
    ks[k]<-ks[k]+(ks.test(X,Y)$p.value<=0.05)
    cat("ks- ",k,"is",ks[k],"\n", file = paste0("debug_file_", k, ".txt"),append=T)
    cvm[k]<-cvm[k]+(cramer.test(X,Y)$p.value<=0.05)
    ks2[k]<-ks2[k]+(ks.test(X,Y)$p.value<=0.05)
    cvm2[k]<-cvm2[k]+(cramer.test(X,Y)$p.value<=0.05)
    cat("cvm2- ",k,"is",cvm2[k],"\n", file = paste0("debug_file_", k, ".txt"),append=T)
  }
  }

ks
cvm
ks2
cvm2

parallel::stopCluster(cl)

Through the cpu & debug files i guess the calculate parallel is worked ,but can't write the value correctly.

> ks
 [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
> cvm
 [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
> ks2
 [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
> cvm2
 [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

I have found many informantion ,guess it's because the parallel copy the date in different cluster,but don't know how to fix my code.


my question is long, thx for you time


Solution

  • ks[k] <- ... won't actually modify the ks variable outside of the foreach. Modify the foreach to combine all of the values into a data frame.

    library("cramer")
    library("foreach")
    library("doParallel")
    
    n <- 50
    theta <- seq(-2, 2, 0.2)
    
    cl <- parallel::makeCluster(5, outfile = "debug.txt")
    doParallel::registerDoParallel(cl)
    
    foreach (k = 1:21, .combine = rbind, .multicombine = TRUE, .inorder=TRUE, .packages = "cramer") %dopar% {
      ks <- 0
      cvm <- 0
      ks2 <- 0
      cvm2 <- 0
      
      for (i in 1:5)  {
        X <- runif(n)
        Y <- runif(n, min = 0, max = 1 / (1 + theta[k] / sqrt(10)))
        ks <- ks + (ks.test(X, Y)$p.value <= 0.05)
        cvm <- cvm + (cramer.test(X, Y)$p.value <= 0.05)
        ks2 <- ks2 + (ks.test(X, Y)$p.value <= 0.05)
        cvm2 <- cvm2 + (cramer.test(X, Y)$p.value <= 0.05)
      }
      
      data.frame(
        ks = ks,
        cvm = cvm,
        ks2 = ks2,
        cvm2 = cvm2
      )
    }
    #>    ks cvm ks2 cvm2
    #> 1   5   5   5    5
    #> 2   5   5   5    5
    #> 3   5   5   5    5
    #> 4   5   5   5    5
    #> 5   5   5   5    5
    #> 6   5   5   5    5
    #> 7   3   3   3    3
    #> 8   1   1   1    1
    #> 9   1   1   1    1
    #> 10  0   1   0    1
    #> 11  2   0   2    0
    #> 12  1   3   1    3
    #> 13  0   0   0    0
    #> 14  1   1   1    1
    #> 15  2   3   2    3
    #> 16  3   3   3    3
    #> 17  4   4   4    4
    #> 18  3   3   3    3
    #> 19  4   4   4    4
    #> 20  5   5   5    5
    #> 21  5   5   5    5
    
    parallel::stopCluster(cl)