Search code examples
rstringdataframematrixvector

Specifying the variable names when converting a vector to a matrix in R


I have a named numeric vector called Rs. The vector has 8 elements looking like: c(L2DA.L2DF= .637, L2DA.L2G= 0.553,...).

I can convert this named numeric vector to an 8x8 correlation matrix using metafor::vec2mat(Rs) (see below).

Question: But I wonder how to assign the rownames and colnames for that correlation matrix so that these names represent the names in my original named numeric vector?

For example, in the matrix, element [1,1] = 0.637 comes from first element of my vector, so it should have the colname L2DA and rowname L2DF, and so on.

library(metafor)

dat <- read.csv("https://raw.githubusercontent.com/ilzl/i/master/j.csv")

dat$var1.var2 <- apply(dat[c("var1","var2")],1,paste0,collapse=".")

res <- rma(ri~var1.var2+0,1, data=dat)

(Rs = setNames(coef(res),sub("var1.var2","",names(coef(res)))))

(R_matrix = vec2mat(Rs))

          [,1]  [,2]      [,3]      [,4]      [,5]      [,6]      [,7]      [,8]
[1,] 1.0000000 0.637 0.5533333 0.4180000 0.5550000 0.5678947 0.4781481 0.3675000
[2,] 0.6370000 1.000 0.2440000 0.2900000 0.4840000 0.3500000 0.4750000 0.5700000
[3,] 0.5533333 0.244 1.0000000 0.2933333 0.5100000 0.3300000 0.4775000 0.5765714
[4,] 0.4180000 0.290 0.2933333 1.0000000 0.4627778 0.5212121 0.5569565 0.4928571
[5,] 0.5550000 0.484 0.5100000 0.4627778 1.0000000 0.4695652 0.5140625 0.5313793
[6,] 0.5678947 0.350 0.3300000 0.5212121 0.4695652 1.0000000 0.5194118 0.5258333
[7,] 0.4781481 0.475 0.4775000 0.5569565 0.5140625 0.5194118 1.0000000 0.4240000
[8,] 0.3675000 0.570 0.5765714 0.4928571 0.5313793 0.5258333 0.4240000 1.0000000

Solution

  • First, vec2mat is masking a problem:

    Rs
    # L2DA.L2DF  L2DA.L2G  L2DA.L2L  L2DA.L2M  L2DA.L2P  L2DA.L2V  L2DF.L2G  L2DF.L2L  L2DF.L2M  L2DF.L2P  L2DF.L2V   L2G.L2L   L2G.L2M   L2G.L2P   L2M.L2L 
    # 0.6370000 0.5533333 0.4180000 0.5550000 0.5678947 0.4781481 0.3675000 0.2440000 0.2900000 0.4840000 0.3500000 0.4750000 0.5700000 0.2933333 0.5100000 
    #   L2P.L2L   L2P.L2M  L2R.L2DA  L2R.L2DF   L2R.L2G   L2R.L2L   L2R.L2M   L2R.L2P   L2R.L2V   L2V.L2G   L2V.L2L   L2V.L2M   L2V.L2P 
    # 0.3300000 0.4775000 0.5765714 0.4627778 0.5212121 0.5569565 0.4928571 0.4695652 0.5140625 0.5313793 0.5194118 0.5258333 0.4240000 
    

    Note that there are SIX L2DA.* values, suggesting that the first column of the matrix (inferring should be "L2DA" should span between 0.637 and 0.478 ... however, it is including the L2DF.L2G value as well:

    vec2mat(Rs)
    #           [,1]  [,2]      [,3]      [,4]      [,5]      [,6]      [,7]      [,8]
    # [1,] 1.0000000 0.637 0.5533333 0.4180000 0.5550000 0.5678947 0.4781481 0.3675000
    # [2,] 0.6370000 1.000 0.2440000 0.2900000 0.4840000 0.3500000 0.4750000 0.5700000
    # [3,] 0.5533333 0.244 1.0000000 0.2933333 0.5100000 0.3300000 0.4775000 0.5765714
    # [4,] 0.4180000 0.290 0.2933333 1.0000000 0.4627778 0.5212121 0.5569565 0.4928571
    # [5,] 0.5550000 0.484 0.5100000 0.4627778 1.0000000 0.4695652 0.5140625 0.5313793
    # [6,] 0.5678947 0.350 0.3300000 0.5212121 0.4695652 1.0000000 0.5194118 0.5258333
    # [7,] 0.4781481 0.475 0.4775000 0.5569565 0.5140625 0.5194118 1.0000000 0.4240000
    # [8,] 0.3675000 0.570 0.5765714 0.4928571 0.5313793 0.5258333 0.4240000 1.0000000
    

    For whatever reason, not all combinations are present in your coefficients; at least L2DA.L2R is missing:

    nms <- unique(unlist(strsplit(names(Rs), "[.]")))
    nms
    # [1] "L2DA" "L2DF" "L2G"  "L2L"  "L2M"  "L2P"  "L2V"  "L2R" 
    grep("L2DA", names(Rs), value=TRUE)
    # [1] "L2DA.L2DF" "L2DA.L2G"  "L2DA.L2L"  "L2DA.L2M"  "L2DA.L2P"  "L2DA.L2V"  "L2R.L2DA" 
    setdiff(nms, sub(".*\\.", "", grep("L2DA", names(Rs), value=TRUE)))
    # [1] "L2R"
    

    ... though L2R.L2DA is present ... it appears that the assumption of vec2mat is not shared with the data.

    While we could likely brute-force to match, I think this inferential-step would make it less-clear that all coefficients have been assigned correctly to the rows/columns.

    Instead of vec2mat, let's do it ourselves, preserving the names.

    tmp <- data.frame(nm = names(Rs), R = Rs) |>
      transform(row = sub("\\..*", "", nm), col = sub(".*\\.", "", nm))
    head(tmp)
    #                  nm         R  row  col
    # L2DA.L2DF L2DA.L2DF 0.6370000 L2DA L2DF
    # L2DA.L2G   L2DA.L2G 0.5533333 L2DA  L2G
    # L2DA.L2L   L2DA.L2L 0.4180000 L2DA  L2L
    # L2DA.L2M   L2DA.L2M 0.5550000 L2DA  L2M
    # L2DA.L2P   L2DA.L2P 0.5678947 L2DA  L2P
    # L2DA.L2V   L2DA.L2V 0.4781481 L2DA  L2V
    reshape2::dcast(tmp, row ~ col, value.var = "R")
    #    row      L2DA      L2DF       L2G       L2L       L2M       L2P       L2V
    # 1 L2DA        NA 0.6370000 0.5533333 0.4180000 0.5550000 0.5678947 0.4781481
    # 2 L2DF        NA        NA 0.3675000 0.2440000 0.2900000 0.4840000 0.3500000
    # 3  L2G        NA        NA        NA 0.4750000 0.5700000 0.2933333        NA
    # 4  L2M        NA        NA        NA 0.5100000        NA        NA        NA
    # 5  L2P        NA        NA        NA 0.3300000 0.4775000        NA        NA
    # 6  L2R 0.5765714 0.4627778 0.5212121 0.5569565 0.4928571 0.4695652 0.5140625
    # 7  L2V        NA        NA 0.5313793 0.5194118 0.5258333 0.4240000        NA
    

    This is not too surprising since we know we don't have both directions of pairs. Let's take tmp and swap the two, then row-bind, then dcast again.

    tmp2 <- transform(tmp, row2 = row, row = col) |>
      transform(col = row2, row2 = NULL)
    head(tmp2)
    #                  nm         R  row  col
    # L2DA.L2DF L2DA.L2DF 0.6370000 L2DF L2DA
    # L2DA.L2G   L2DA.L2G 0.5533333  L2G L2DA
    # L2DA.L2L   L2DA.L2L 0.4180000  L2L L2DA
    # L2DA.L2M   L2DA.L2M 0.5550000  L2M L2DA
    # L2DA.L2P   L2DA.L2P 0.5678947  L2P L2DA
    # L2DA.L2V   L2DA.L2V 0.4781481  L2V L2DA
    out <- rbind(tmp, tmp2) |>
      reshape2::dcast(row ~ col, value.var = "R")
    out
    #    row      L2DA      L2DF       L2G       L2L       L2M       L2P       L2R       L2V
    # 1 L2DA        NA 0.6370000 0.5533333 0.4180000 0.5550000 0.5678947 0.5765714 0.4781481
    # 2 L2DF 0.6370000        NA 0.3675000 0.2440000 0.2900000 0.4840000 0.4627778 0.3500000
    # 3  L2G 0.5533333 0.3675000        NA 0.4750000 0.5700000 0.2933333 0.5212121 0.5313793
    # 4  L2L 0.4180000 0.2440000 0.4750000        NA 0.5100000 0.3300000 0.5569565 0.5194118
    # 5  L2M 0.5550000 0.2900000 0.5700000 0.5100000        NA 0.4775000 0.4928571 0.5258333
    # 6  L2P 0.5678947 0.4840000 0.2933333 0.3300000 0.4775000        NA 0.4695652 0.4240000
    # 7  L2R 0.5765714 0.4627778 0.5212121 0.5569565 0.4928571 0.4695652        NA 0.5140625
    # 8  L2V 0.4781481 0.3500000 0.5313793 0.5194118 0.5258333 0.4240000 0.5140625        NA
    

    If you need this as a simple matrix, then we can do this:

    out2 <- as.matrix(out[,-1])
    dimnames(out2) <- list(out$row, colnames(out)[-1])
    out2
    #           L2DA      L2DF       L2G       L2L       L2M       L2P       L2R       L2V
    # L2DA        NA 0.6370000 0.5533333 0.4180000 0.5550000 0.5678947 0.5765714 0.4781481
    # L2DF 0.6370000        NA 0.3675000 0.2440000 0.2900000 0.4840000 0.4627778 0.3500000
    # L2G  0.5533333 0.3675000        NA 0.4750000 0.5700000 0.2933333 0.5212121 0.5313793
    # L2L  0.4180000 0.2440000 0.4750000        NA 0.5100000 0.3300000 0.5569565 0.5194118
    # L2M  0.5550000 0.2900000 0.5700000 0.5100000        NA 0.4775000 0.4928571 0.5258333
    # L2P  0.5678947 0.4840000 0.2933333 0.3300000 0.4775000        NA 0.4695652 0.4240000
    # L2R  0.5765714 0.4627778 0.5212121 0.5569565 0.4928571 0.4695652        NA 0.5140625
    # L2V  0.4781481 0.3500000 0.5313793 0.5194118 0.5258333 0.4240000 0.5140625        NA
    
    diag(out2) <- 1
    out2
    #           L2DA      L2DF       L2G       L2L       L2M       L2P       L2R       L2V
    # L2DA 1.0000000 0.6370000 0.5533333 0.4180000 0.5550000 0.5678947 0.5765714 0.4781481
    # L2DF 0.6370000 1.0000000 0.3675000 0.2440000 0.2900000 0.4840000 0.4627778 0.3500000
    # L2G  0.5533333 0.3675000 1.0000000 0.4750000 0.5700000 0.2933333 0.5212121 0.5313793
    # L2L  0.4180000 0.2440000 0.4750000 1.0000000 0.5100000 0.3300000 0.5569565 0.5194118
    # L2M  0.5550000 0.2900000 0.5700000 0.5100000 1.0000000 0.4775000 0.4928571 0.5258333
    # L2P  0.5678947 0.4840000 0.2933333 0.3300000 0.4775000 1.0000000 0.4695652 0.4240000
    # L2R  0.5765714 0.4627778 0.5212121 0.5569565 0.4928571 0.4695652 1.0000000 0.5140625
    # L2V  0.4781481 0.3500000 0.5313793 0.5194118 0.5258333 0.4240000 0.5140625 1.0000000
    

    As a function:

    myfun <- function(Rs) {
      tmp <- data.frame(nm = names(Rs), R = Rs) |>
        transform(row = sub("\\..*", "", nm), col = sub(".*\\.", "", nm))
      tmp2 <- transform(tmp, row2 = row, row = col) |>
        transform(col = row2, row2 = NULL)
      out <- rbind(tmp, tmp2) |>
        tidyr::pivot_wider(id_cols = "row", names_from = "col", values_from = "R")
      out <- out[order(match(out$row, colnames(out))),]
      out2 <- as.matrix(out[,-1])
      dimnames(out2) <- list(out$row, colnames(out)[-1])
      diag(out2) <- 1
      out2
    }
    Rs
    # L2DA.L2DF  L2DA.L2G  L2DA.L2L  L2DA.L2M  L2DA.L2P  L2DA.L2V  L2DF.L2G  L2DF.L2L  L2DF.L2M  L2DF.L2P  L2DF.L2V   L2G.L2L   L2G.L2M   L2G.L2P   L2M.L2L 
    # 0.6370000 0.5533333 0.4180000 0.5550000 0.5678947 0.4781481 0.3675000 0.2440000 0.2900000 0.4840000 0.3500000 0.4750000 0.5700000 0.2933333 0.5100000 
    #   L2P.L2L   L2P.L2M  L2R.L2DA  L2R.L2DF   L2R.L2G   L2R.L2L   L2R.L2M   L2R.L2P   L2R.L2V   L2V.L2G   L2V.L2L   L2V.L2M   L2V.L2P 
    # 0.3300000 0.4775000 0.5765714 0.4627778 0.5212121 0.5569565 0.4928571 0.4695652 0.5140625 0.5313793 0.5194118 0.5258333 0.4240000 
    myfun(Rs)
    #           L2DF       L2G       L2L       L2M       L2P       L2V      L2DA       L2R
    # L2DF 1.0000000 0.3675000 0.2440000 0.2900000 0.4840000 0.3500000 0.6370000 0.4627778
    # L2G  0.3675000 1.0000000 0.4750000 0.5700000 0.2933333 0.5313793 0.5533333 0.5212121
    # L2L  0.2440000 0.4750000 1.0000000 0.5100000 0.3300000 0.5194118 0.4180000 0.5569565
    # L2M  0.2900000 0.5700000 0.5100000 1.0000000 0.4775000 0.5258333 0.5550000 0.4928571
    # L2P  0.4840000 0.2933333 0.3300000 0.4775000 1.0000000 0.4240000 0.5678947 0.4695652
    # L2V  0.3500000 0.5313793 0.5194118 0.5258333 0.4240000 1.0000000 0.4781481 0.5140625
    # L2DA 0.6370000 0.5533333 0.4180000 0.5550000 0.5678947 0.4781481 1.0000000 0.5765714
    # L2R  0.4627778 0.5212121 0.5569565 0.4928571 0.4695652 0.5140625 0.5765714 1.0000000