R move columns to be rows of other columns

I am converting a messy excel sheet into usable data. The basic structure is a repeating block of variables with similar items in rows. I want to move columns so they become rows of the first block of columns.

Difficult to describe so here is mwe.

d1 = structure(list(...1 = c("p", "w", "s", NA, "A Ex", NA, "Pres", 
"Time", "HR"), ...2 = c("1", "1", "1", NA, "Walk", NA, NA, NA, 
NA), ...3 = c(NA, NA, NA, NA, NA, NA, "Done", "Time", "HR"), 
    ...4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA), ...5 = c("p", 
    "w", "s", NA, "A Ex", NA, "Pres", "Time", "HR"), ...6 = c("1", 
    "1", "2", NA, "Walk", NA, NA, NA, NA), ...7 = c(NA, NA, NA, 
    NA, NA, NA, "Done", "Time", "HR"), ...8 = c(NA, NA, NA, NA, 
    NA, NA, NA, NA, NA), ...9 = c("p", "w", "s", NA, "A Ex", 
    NA, "Pres", "Time", "HR"), ...10 = c("1", "1", "3", NA, "Walk", 
    NA, NA, NA, NA), ...11 = c(NA, NA, NA, NA, NA, NA, "Done", 
    "Time", "HR"), ...12 = c(NA, NA, NA, NA, NA, NA, NA, NA, 
    NA)), row.names = c(NA, -9L), class = c("tbl_df", "tbl", 
"data.frame"))

I want to move columns 5:8 and 6:12 so they become additional rows of 1:4 so result is:

df2 = structure(list(...1 = c("p", "w", "s", NA, "A Ex", NA, "Pres", 
"Time", "HR", "p", "w", "s", NA, "A Ex", NA, "Pres", "Time", 
"HR", "p", "w", "s", NA, "A Ex", NA, "Pres", "Time", "HR"), ...2 = c("1", 
"1", "1", NA, "Walk", NA, NA, NA, NA, "1", "1", "2", NA, "Walk", 
NA, NA, NA, NA, "1", "1", "3", NA, "Walk", NA, NA, NA, NA), ...3 = c(NA, 
NA, NA, NA, NA, NA, "Done", "Time", "HR", NA, NA, NA, NA, NA, 
NA, "Done", "Time", "HR", NA, NA, NA, NA, NA, NA, "Done", "Time", 
"HR"), ...4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, 
-27L), class = c("tbl_df", "tbl", "data.frame"))

Solution

base R

The basic premise is to split by groups of 4 columns:

splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
splitframes
# $`0`
# # A tibble: 9 x 4
#   ...1  ...2  ...3  ...4 
#   <chr> <chr> <chr> <lgl>
# 1 p     1     <NA>  NA   
# 2 w     1     <NA>  NA   
# 3 s     1     <NA>  NA   
# 4 <NA>  <NA>  <NA>  NA   
# 5 A Ex  Walk  <NA>  NA   
# 6 <NA>  <NA>  <NA>  NA   
# 7 Pres  <NA>  Done  NA   
# 8 Time  <NA>  Time  NA   
# 9 HR    <NA>  HR    NA   
# $`1`
# # A tibble: 9 x 4
#   ...1  ...2  ...3  ...4 
#   <chr> <chr> <chr> <lgl>
# 1 p     1     <NA>  NA   
# 2 w     1     <NA>  NA

Unfortunately, rbind.data.frame needs all arguments to have the same names,

lapply(splitframes, names)
# $`0`
# [1] "...1" "...2" "...3" "...4"
# $`1`
# [1] "...5" "...6" "...7" "...8"
# $`2`
# [1] "...9"  "...10" "...11" "...12"

so we need to fix that first:

splitframes <- lapply(splitframes, `colnames<-`, names(splitframes[[1]]))
do.call(rbind, splitframes)
# # A tibble: 27 x 4
#    ...1  ...2  ...3  ...4 
#  * <chr> <chr> <chr> <lgl>
#  1 p     1     <NA>  NA   
#  2 w     1     <NA>  NA   
#  3 s     1     <NA>  NA   
#  4 <NA>  <NA>  <NA>  NA   
#  5 A Ex  Walk  <NA>  NA   
#  6 <NA>  <NA>  <NA>  NA   
#  7 Pres  <NA>  Done  NA   
#  8 Time  <NA>  Time  NA   
#  9 HR    <NA>  HR    NA   
# 10 p     1     <NA>  NA   
# # ... with 17 more rows

dplyr

This package is a little more accommodating:

splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
dplyr::bind_rows(splitframes)
# New names:
# * ...5 -> ...1
# * ...6 -> ...2
# * ...7 -> ...3
# * ...8 -> ...4
# New names:
# * ...9 -> ...1
# * ...10 -> ...2
# * ...11 -> ...3
# * ...12 -> ...4
# # A tibble: 27 x 4
#    ...1  ...2  ...3  ...4 
#    <chr> <chr> <chr> <lgl>
#  1 p     1     <NA>  NA   
#  2 w     1     <NA>  NA   
#  3 s     1     <NA>  NA   
#  4 <NA>  <NA>  <NA>  NA   
#  5 A Ex  Walk  <NA>  NA   
#  6 <NA>  <NA>  <NA>  NA   
#  7 Pres  <NA>  Done  NA   
#  8 Time  <NA>  Time  NA   
#  9 HR    <NA>  HR    NA   
# 10 p     1     <NA>  NA   
# # ... with 17 more rows

data.table

splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
library(data.table)
rbindlist(splitframes)
# Column 1 ['...5'] of item 2 is missing in item 1. Use fill=TRUE to fill with NA (NULL for list columns), or use.names=FALSE to ignore column names. use.names='check' (default from v1.12.2) emits this message and proceeds as if use.names=FALSE for  backwards compatibility. See news item 5 in v1.12.2 for options to control this message.
#       ...1   ...2   ...3   ...4
#     <char> <char> <char> <lgcl>
#  1:      p      1   <NA>     NA
#  2:      w      1   <NA>     NA
#  3:      s      1   <NA>     NA
#  4:   <NA>   <NA>   <NA>     NA

To silence that message, use instead rbindlist(..., use.names=FALSE).