Search code examples
rmatrixdplyrreshape2

How to treat multiple columns with same name as triplicates


I have a table having a column with genes ID and other 12 columns with gene expression levels that I want to plot. I would like to have these 12 columns as triplicates of 4 observations.

Current table:

str(CAF2.table)
    'data.frame':   53617 obs. of  13 variables:
     $ ID              : int  16650001 16650003 16650005 16650007 16650009 16650011 16650013 16650015 16650017 16650019 ...
     $ CAF unst 6h     : num  3.54 4.88 4.12 4.59 2.19 ...
     $ PDGF-CC stim 6h : num  3.44 4.12 4.96 5.14 2.18 ...
     $ CAF unst 48h    : num  3.53 4.08 5.3 4.03 1.86 ...
     $ PDGF-CC stim 48h: num  4.53 4.78 5.73 4.37 2.61 ...
     $ CAF unst 6h     : num  3.76 5 5.48 4.33 2.1 ...
     $ PDGF-CC stim 6h : num  3.35 4.45 5.62 4.19 1.86 ...
     $ CAF unst 48h    : num  4.01 4.26 5.71 4.46 2.03 ...
     $ PDGF-CC stim 48h: num  3.23 4.44 5.87 5.23 2.08 ...
     $ CAF unst 6h     : num  4.05 5.29 5.43 4.69 2.15 ...
     $ PDGF-CC stim 6h : num  4.32 4.96 5.55 5.24 2.07 ...
     $ CAF unst 48h    : num  3.58 4.33 6.15 5.44 2.17 ...
     $ PDGF-CC stim 48h: num  3.59 4.68 5.63 4.87 2.14 ...
    > 
> head.matrix(CAF2.table)
        ID CAF unst 6h PDGF-CC stim 6h CAF unst 48h PDGF-CC stim 48h CAF unst 6h PDGF-CC stim 6h CAF unst 48h
1 16650001     3.54199         3.44148      3.53341          4.52657     3.76071         3.35167      4.01463
2 16650003     4.87582         4.11537      4.08440          4.78277     5.00019         4.45496      4.26396
3 16650005     4.12428         4.96379      5.29691          5.72679     5.48193         5.61789      5.71083
4 16650007     4.59127         5.14367      4.03304          4.37235     4.32733         4.18659      4.46297
5 16650009     2.19290         2.17692      1.86428          2.61429     2.10245         1.86202      2.03368
6 16650011     3.99230         4.42224      5.12626          5.09150     4.33128         5.03978      5.10775
  PDGF-CC stim 48h CAF unst 6h PDGF-CC stim 6h CAF unst 48h PDGF-CC stim 48h
1          3.22680     4.05261         4.32480      3.57975          3.58918
2          4.44208     5.29064         4.96353      4.32983          4.68338
3          5.87353     5.43021         5.55279      6.14516          5.63250
4          5.23411     4.69241         5.24125      5.43603          4.86934
5          2.08496     2.15482         2.06980      2.17285          2.13631
6          4.80622     4.41581         4.96284      4.77983          5.21974

> dput(head(CAF2.table))
structure(list(ID = c(16650001L, 16650003L, 16650005L, 16650007L, 
16650009L, 16650011L), `CAF unst 6h` = c(3.54199, 4.87582, 4.12428, 
4.59127, 2.1929, 3.9923), `PDGF-CC stim 6h` = c(3.44148, 4.11537, 
4.96379, 5.14367, 2.17692, 4.42224), `CAF unst 48h` = c(3.53341, 
4.0844, 5.29691, 4.03304, 1.86428, 5.12626), `PDGF-CC stim 48h` = c(4.52657, 
4.78277, 5.72679, 4.37235, 2.61429, 5.0915), `CAF unst 6h` = c(3.76071, 
5.00019, 5.48193, 4.32733, 2.10245, 4.33128), `PDGF-CC stim 6h` = c(3.35167, 
4.45496, 5.61789, 4.18659, 1.86202, 5.03978), `CAF unst 48h` = c(4.01463, 
4.26396, 5.71083, 4.46297, 2.03368, 5.10775), `PDGF-CC stim 48h` = c(3.2268, 
4.44208, 5.87353, 5.23411, 2.08496, 4.80622), `CAF unst 6h` = c(4.05261, 
5.29064, 5.43021, 4.69241, 2.15482, 4.41581), `PDGF-CC stim 6h` = c(4.3248, 
4.96353, 5.55279, 5.24125, 2.0698, 4.96284), `CAF unst 48h` = c(3.57975, 
4.32983, 6.14516, 5.43603, 2.17285, 4.77983), `PDGF-CC stim 48h` = c(3.58918, 
4.68338, 5.6325, 4.86934, 2.13631, 5.21974)), row.names = c(NA, 
6L), class = "data.frame")

Desidered format for the table:

head.matrix(library.table.frame)
      Gene CAF.unst.6h CAF.unst.48h CAF.PDGFCC.stim.6h CAF.PDGFCC.stim.48h
1      FAS     9.36838      9.44594            9.35826             9.98569
2      FAS     9.50368      9.46829            9.45213             9.92114
3      FAS     9.49759      9.50703            9.47470             9.87510
4 TNFRSF17     5.64129      5.73613            5.50024             5.52308
5 TNFRSF17     5.38073      5.64783            5.60257             5.60377
6 TNFRSF17     5.71662      5.65910            5.54476             5.49416

Thank you very much for your help.


Solution

  • It looks like you have duplicate column names which can sometimes cause problems.

    As it stands, you could use tidyr both pivot_longer then pivot_wider to put into the desired format:

    library(tidyr)
    
    CAF2.table %>%
      pivot_longer(cols = -ID) %>%
      pivot_wider(id_cols = ID, values_fn = list) %>%
      unnest(cols = -ID)
    

    Output

             ID `CAF unst 6h` `PDGF-CC stim 6h` `CAF unst 48h` `PDGF-CC stim 48h`
          <int>         <dbl>             <dbl>          <dbl>              <dbl>
     1 16650001          3.54              3.44           3.53               4.53
     2 16650001          3.76              3.35           4.01               3.23
     3 16650001          4.05              4.32           3.58               3.59
     4 16650003          4.88              4.12           4.08               4.78
     5 16650003          5.00              4.45           4.26               4.44
     6 16650003          5.29              4.96           4.33               4.68
     7 16650005          4.12              4.96           5.30               5.73
     8 16650005          5.48              5.62           5.71               5.87
     9 16650005          5.43              5.55           6.15               5.63
    10 16650007          4.59              5.14           4.03               4.37
    11 16650007          4.33              4.19           4.46               5.23
    12 16650007          4.69              5.24           5.44               4.87
    13 16650009          2.19              2.18           1.86               2.61
    14 16650009          2.10              1.86           2.03               2.08
    15 16650009          2.15              2.07           2.17               2.14
    16 16650011          3.99              4.42           5.13               5.09
    17 16650011          4.33              5.04           5.11               4.81
    18 16650011          4.42              4.96           4.78               5.22