Search code examples
rdataframefunctiondplyrtidyverse

How to merge two columns when long-formatting data frame in R


Below, I'm can use tidyr::pivot_longer(DATA, -InstNm, names_sep = "_", names_to = c("Indicator", ".value")) to get very close to my Desired_output.

But I wonder how I can exactly achieve my Desired_output where CPct and EPct are merged as one column called CPct_EPct?

DATA <- read.table(header=T, text=
"InstNm    OTE_CPct OTE_L OTE_P    OTE_R      RAE_EPct RAE_NPct RAE_L RAE_B RAE_P  RAE_R
 Baker          60      1     1  Limited            56     76     1     0     1   Limited")

Desired_output <- read.table(header=T, text=
"InstNm    Indicator Rating   NPct  CPct_EPct  L   P  B
 Baker     OTE      Limited    NA        60    1   1  NA
 Baker     RAE      Limited    76        56    1   1  0")

Solution

  • Note that this method assumes either CPct or EPct is NA:

    library(dplyr)
    library(tidyr)
    
    DATA <- read.table(header=T, text=
                         "InstNm    OTE_CPct OTE_L OTE_P    OTE_R      RAE_EPct RAE_NPct RAE_L RAE_B RAE_P  RAE_R
     Baker          60      1     1  Limited            56     76     1     0     1   Limited")
    
    DATA |>
      pivot_longer(-InstNm, names_sep = "_", names_to = c("Indicator", ".value")) |>
      unite(CPct_EPct, c("CPct", "EPct"), na.rm = TRUE) |>
      rename(Rating = "R") |>
      relocate(InstNm, Indicator, Rating, NPct, CPct_EPct, L, P, B)
      
    # # A tibble: 2 × 8
    #   InstNm Indicator Rating   NPct CPct_EPct     L     P     B
    #   <chr>  <chr>     <chr>   <int> <chr>     <int> <int> <int>
    # 1 Baker  OTE       Limited    NA 60            1     1    NA
    # 2 Baker  RAE       Limited    76 56            1     1     0