Search code examples
rsub-array

A way to extract the name of subarray that was sampled in R


I have a simulation that I have constructed and need to refer to a given subarray that was randomly sampled from in order to create some plots using a for loop. I am not quite certain how to go about accomplishing this...

Here is my code:

K <- 2       # number of subarrays
N <- 100 
Hstar <- 10

perms <- 10000

p <- 0.95

specs <- 1:N 

pop <- array(dim = c(c(perms, N), K))

haps <- as.character(1:Hstar)

probs <- rep(1/Hstar, Hstar) 

for(j in 1:perms){
    for(i in 1:K){ 
        if(i == 1){
            pop[j, specs, i] <- sample(haps, size = N, replace = TRUE, prob = probs)
    }
        else{
            K1 <- c(1:5)
            K2 <- c(6:10)
            pop[j ,, 1] <- sample(haps[K1], size = N, replace = TRUE, prob = probs[K1])
            pop[j ,, 2] <- sample(haps[K2], size = N, replace = TRUE, prob = probs[K2]) 
        }
    }
}

HAC.mat <- array(dim = c(c(perms, N), K))

for(k in specs){
    for(j in 1:perms){
        for(i in 1:K){ 
            ind.index <- sample(specs, size = k, replace = FALSE)
            hap.plot <- pop[sample(1:nrow(pop), size = 1, replace = TRUE),     ind.index, sample(1:K, size = 1, replace = TRUE)]
            HAC.mat[j, k, i] <- length(unique(hap.plot))
    }
  }
}

means <- apply(HAC.mat, MARGIN = 2, mean)
lower <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.025))
upper <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.975))

par(mfrow = c(1, 2))

for(i in 1:K){
    if(i == 1){
        plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, Hstar))
        polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
        lines(specs, means, lwd = 2)
        HAC.bar <- barplot(N*probs, xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = 1:Hstar)

}
    else{
        plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, max(means)))
        polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
        lines(specs, means, lwd = 2)
        HAC.bar <- barplot(max(HAC.mat)*probs[K1], xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = min(means):ceiling(max(means)))
    }
}

The issue is in the last else statement where I indicate

max(HAC.mat)*probs[K1]

Above, I just put K1 as an illustration. However, in reality, I don't know which row of K1 or K2 was sampled. Is there a way to specify this generically? Something like K[i]?

Thanks! Please let me know if more clarification is needed.


Solution

  • You can use get and paste0. I made your example have less permutations so it finishes faster (since it's just for testing this) and I made the probs different so that you can see that it works.

    K <- 2       # number of subarrays
    N <- 10 
    Hstar <- 10
    
    perms <- 1000
    
    p <- 0.95
    
    specs <- 1:N 
    
    pop <- array(dim = c(c(perms, N), K))
    
    haps <- as.character(1:Hstar)
    
    probs <- seq(1/Hstar, 1,.1) 
    
    for(j in 1:perms){
      for(i in 1:K){ 
        if(i == 1){
          pop[j, specs, i] <- sample(haps, size = N, replace = TRUE, prob = probs)
        }
        else{
          K1 <- c(1:5)
          K2 <- c(6:10)
          pop[j ,, 1] <- sample(haps[K1], size = N, replace = TRUE, prob = probs[K1])
          pop[j ,, 2] <- sample(haps[K2], size = N, replace = TRUE, prob = probs[K2]) 
          print(paste("j is: ", j))
        }
      }
    }
    
    HAC.mat <- array(dim = c(c(perms, N), K))
    
    for(k in specs){
      for(j in 1:perms){
        for(i in 1:K){ 
          ind.index <- sample(specs, size = k, replace = FALSE)
          hap.plot <- pop[sample(1:nrow(pop), size = 1, replace = TRUE),     ind.index, sample(1:K, size = 1, replace = TRUE)]
          HAC.mat[j, k, i] <- length(unique(hap.plot))
        }
      }
    }
    
    means <- apply(HAC.mat, MARGIN = 2, mean)
    lower <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.025))
    upper <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.975))
    
    par(mfrow = c(1, 2))
    
    for(i in 1:K){
      print(paste("i is:",i))
      print(paste("probs[...] is:", probs[get(paste0("K",i))]))
      if(i == 1){
        plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, Hstar))
        polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
        lines(specs, means, lwd = 2)
        HAC.bar <- barplot(N*probs, xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = 1:Hstar)
    
      }
      else{
        plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, max(means)))
        polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
        lines(specs, means, lwd = 2)
        HAC.bar <- barplot(max(HAC.mat)*probs[get(paste0("K",i))], xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = min(means):ceiling(max(means)))
      }
    }
    

    I added the print statements so that we can see that it works:

    [1] "i is: 1"
    [1] "probs[...] is: 0.1" "probs[...] is: 0.2" "probs[...] is: 0.3" "probs[...] is: 0.4" "probs[...] is: 0.5"
    [1] "i is: 2"
    [1] "probs[...] is: 0.6" "probs[...] is: 0.7" "probs[...] is: 0.8" "probs[...] is: 0.9" "probs[...] is: 1"