Search code examples
rdata-cleaningreshape2

How to gather every 4 columns, but variables may have different length


I have a data frame that is organized with 4 columns that repeats 145. The length of the columns are not the same.

I would like reshape the data frame so that I have just 4 columns. I also have to spread some data.

Here is an example of what my data looks like:

df<- data.frame(
  id = c(rep("M",8), rep(NA, 2)),
  day =c(rep(seq(1:4),2), rep(NA,2)),
  parameter= c(rep("glu",4), rep("lac",4), rep(NA,2)),
  value = c(rep(2,4), rep(0.5,4), rep(NA,2)),
  id1 =c(rep("v",10)),
  day1= c(rep(1,5), rep(2,3), rep(1,2)), 
  parameter1 = c(rep("glu", 8), rep("lac", 2)),
  value1 = c(rep(2,8), rep(5,2)))

This is the result I want:

ideal.df<-data.frame(id =c(rep("M",10), rep("v", 10)),
                     day = c(rep(seq(1:4),2), rep(NA,2), rep(1,5), rep(2,3), rep(1,2)),
                     glu = c(rep(2,4), rep(NA,6), rep(1,8), rep(NA,2) ),
                     lac = c( rep(0.5,4), rep(NA,6),rep(5,2), rep(NA,8))
                     )

Solution

  • The expected output seems very strange. Below is some very ugly example code to generate a result that seems more logical, but apparently isn’t what the OP wants. Could the OP please explain why the result should be as posted (ideal.df) rather than what is shown here (df2)?

    Given data:

    df<- data.frame(
      id = c(rep("M",8), rep(NA, 2)),
      day =c(rep(seq(1:4),2), rep(NA,2)),
      parameter= c(rep("glu",4), rep("lac",4), rep(NA,2)),
      value = c(rep(2,4), rep(0.5,4), rep(NA,2)),
      id1 =c(rep("v",10)),
      day1= c(rep(1,5), rep(2,3), rep(1,2)), 
      parameter1 = c(rep("glu", 8), rep("lac", 2)),
      value1 = c(rep(2,8), rep(5,2)))
    df
    #>      id day parameter value id1 day1 parameter1 value1
    #> 1     M   1       glu   2.0   v    1        glu      2
    #> 2     M   2       glu   2.0   v    1        glu      2
    #> 3     M   3       glu   2.0   v    1        glu      2
    #> 4     M   4       glu   2.0   v    1        glu      2
    #> 5     M   1       lac   0.5   v    1        glu      2
    #> 6     M   2       lac   0.5   v    2        glu      2
    #> 7     M   3       lac   0.5   v    2        glu      2
    #> 8     M   4       lac   0.5   v    2        glu      2
    #> 9  <NA>  NA      <NA>    NA   v    1        lac      5
    #> 10 <NA>  NA      <NA>    NA   v    1        lac      5
    

    Result of simple processing of the data:

    df2 <- data.frame(id = c(df$id, df$id1), day = c(df$day, df$day1))
    df2$glu <- c(ifelse(df$parameter=="glu",df$value,NA), ifelse(df$parameter1=="glu",df$value1,NA))
    df2$lac <- c(ifelse(df$parameter=="lac",df$value,NA), ifelse(df$parameter1=="lac",df$value1,NA))
    df2
    #>    id day glu lac
    #> 1   1   1   2  NA
    #> 2   1   2   2  NA
    #> 3   1   3   2  NA
    #> 4   1   4   2  NA
    #> 5   1   1  NA 0.5
    #> 6   1   2  NA 0.5
    #> 7   1   3  NA 0.5
    #> 8   1   4  NA 0.5
    #> 9  NA  NA  NA  NA
    #> 10 NA  NA  NA  NA
    #> 11  1   1   2  NA
    #> 12  1   1   2  NA
    #> 13  1   1   2  NA
    #> 14  1   1   2  NA
    #> 15  1   1   2  NA
    #> 16  1   2   2  NA
    #> 17  1   2   2  NA
    #> 18  1   2   2  NA
    #> 19  1   1  NA 5.0
    #> 20  1   1  NA 5.0
    

    Desired result:

    ideal.df<-data.frame(id =c(rep("M",10), rep("v", 10)),
                         day = c(rep(seq(1:4),2), rep(NA,2), rep(1,5), rep(2,3), rep(1,2)),
                         glu = c(rep(2,4), rep(NA,6), rep(1,8), rep(NA,2) ),
                         lac = c( rep(0.5,4), rep(NA,6),rep(5,2), rep(NA,8))
    )
    ideal.df
    #>    id day glu lac
    #> 1   M   1   2 0.5
    #> 2   M   2   2 0.5
    #> 3   M   3   2 0.5
    #> 4   M   4   2 0.5
    #> 5   M   1  NA  NA
    #> 6   M   2  NA  NA
    #> 7   M   3  NA  NA
    #> 8   M   4  NA  NA
    #> 9   M  NA  NA  NA
    #> 10  M  NA  NA  NA
    #> 11  v   1   1 5.0
    #> 12  v   1   1 5.0
    #> 13  v   1   1  NA
    #> 14  v   1   1  NA
    #> 15  v   1   1  NA
    #> 16  v   2   1  NA
    #> 17  v   2   1  NA
    #> 18  v   2   1  NA
    #> 19  v   1  NA  NA
    #> 20  v   1  NA  NA