Search code examples
rdata-manipulation

"Even Out" Table in R


I have this table in R:

library(plyr)

col_1_legend = 
structure(list(original_data.col_1 = structure(1:3, .Label = c("a", 
"b", "c"), class = "factor"), sample_data.col_1 = 1:3), row.names = c(1L, 
3L, 4L), class = "data.frame")


col_3_legend = structure(list(original_data.col_3 = structure(c(2L, 1L, 4L, 
3L), .Label = c("a", "bb", "f", "g"), class = "factor"), sample_data.col_3 = 4:7), row.names = c(NA, 
4L), class = "data.frame")

col_4_legend = 
structure(list(original_data.col_4 = structure(c(3L, 1L, 2L, 
4L, 5L), .Label = c("j", "jj", "k", "p", "pp"), class = "factor"), 
    sample_data.col_4 = 8:12), row.names = c(1L, 2L, 4L, 5L, 
6L), class = "data.frame")

Since each column had a different number of rows, I had to figure out a way to make a data frame with a differing number of rows:

my_table<- plyr::rbind.fill(col_1_legend,col_3_legend, col_4_legend)

This looks something like this:

   original_data.col_1 sample_data.col_1 original_data.col_3 sample_data.col_3 original_data.col_4 sample_data.col_4
1                    a                 1                <NA>                NA                <NA>                NA
2                    b                 2                <NA>                NA                <NA>                NA
3                    c                 3                <NA>                NA                <NA>                NA
4                 <NA>                NA                  bb                 4                <NA>                NA
5                 <NA>                NA                   a                 5                <NA>                NA
6                 <NA>                NA                   g                 6                <NA>                NA
7                 <NA>                NA                   f                 7                <NA>                NA
8                 <NA>                NA                <NA>                NA                   k                 8
9                 <NA>                NA                <NA>                NA                   j                 9
10                <NA>                NA                <NA>                NA                  jj                10
11                <NA>                NA                <NA>                NA                   p                11
12                <NA>                NA                <NA>                NA                  pp                12

Is it somehow possible to "even out" this table so that each column always starts at the first index and then trails off towards the bottom? This means that the first row in the above table could by definition never contain an NA.

Thank you!


Solution

  • Update: Request for solution without qpcR package (was not easy!):

    library(tidyverse)
    
    dataframe_list <- list(col_1_legend, col_3_legend, col_4_legend)
    
    dataframe_list %>% 
      map(., ~ .x %>%
            mutate(rank = row_number())) %>% 
      bind_rows() %>% 
      mutate(across(everything(), as.character)) %>% 
      pivot_longer(-rank) %>% 
      na.omit() %>% 
      pivot_wider(values_fn=list) %>% 
      unnest(cols = c(original_data.col_1, sample_data.col_1, original_data.col_3, 
                      sample_data.col_3, original_data.col_4, sample_data.col_4)) 
    
    
      rank  original_data.col_1 sample_data.col_1 original_data.col_3 sample_data.col_3 original_data.col_4 sample_data.col_4
      <chr> <chr>               <chr>             <chr>               <chr>             <chr>               <chr>            
    1 1     a                   1                 bb                  4                 k                   8                
    2 2     b                   2                 a                   5                 j                   9                
    3 3     c                   3                 g                   6                 jj                  10               
    4 4     NA                  NA                f                   7                 p                   11               
    5 5     NA                  NA                NA                  NA                pp                  12 
    

    First answer: One way could be: Create a list of your dataframes, then use cbind.na

    dataframe_list <- list(col_1_legend, col_3_legend, col_4_legend)
    #install.packages("qpcR")
    
    library(qpcR)
    do.call(qpcR:::cbind.na, dataframe_list)
    
      original_data.col_1 sample_data.col_1 original_data.col_3 sample_data.col_3 original_data.col_4 sample_data.col_4
    1                    a                 1                  bb                 4                   k                 8
    3                    b                 2                   a                 5                   j                 9
    4                    c                 3                   g                 6                  jj                10
    11                <NA>                NA                   f                 7                   p                11
    2                 <NA>                NA                <NA>                NA                  pp                12