Search code examples
rpivottidyr

pivot_longer three variables respecting uniquely identifier rows


I have the following dataset.

I want to pivot_longer() so I get three variables: (1) nutrition_site corresponding the longer version of the variables nutrition_site1:nutrition_site8; (2) gps_lat corresponding the longer version of the variables gps_lat_hf1:gps_lat_hf8, (3) gps_lon corresponding the longer version of the variables gps_lon_hf1:gps_lon_hf8. But so far, I am getting longitude and latitude in different rows.

nutrition <- structure(list(county = structure(c("Aweil Centre", "Juba", "Juba", 
"Juba", "Juba", "Juba", "Juba", "Juba", "Juba", "Juba", "Juba", 
"Juba", "Juba", "Juba", "Torit", "Torit", "Torit", "Torit", "Torit", 
"Torit"), label = "county", format.stata = "%12s"), payam_id = structure(c(1, 
4, 6, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 23, 3, 7, 8, 9, 
12, 20), format.stata = "%13.0g", labels = c("Aweil Centre" = 1, 
Bangasu = 2, Bur = 3, Dolo = 4, Gangura = 5, Gondokoro = 6, Himodoge = 7, 
Hiyala = 8, Ifwotu = 9, "Juba Nabri" = 10, Kator = 11, Kudo = 12, 
Liriya = 13, Lobonok = 14, Lokiliri = 15, Mangala = 16, Munuki = 17, 
"Northern Bari" = 18, Nyakenyi = 19, Nyong = 20, Rejaf = 21, 
Rirangu = 22, Rokon = 23, Torit = 24, Yambio = 25), class = c("haven_labelled", 
"vctrs_vctr", "double")), payam = structure(c("Aweil Centre", 
"Dolo", "Gondokoro", "Juba Nabri", "Kator", "Liriya", "Lobonok", 
"Lokiliri", "Mangala", "Munuki", "Northern Bari", "Nyakenyi", 
"Rejaf", "Rokon", "Bur", "Himodoge", "Hiyala", "Ifwotu", "Kudo", 
"Nyong"), format.stata = "%13s"), groupid = structure(c(1, 2, 
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20), label = "group(county payam)", format.stata = "%9.0g"), 
    nutrition_site1 = structure(c("Barmayen", "Kuda PHCC", "Gondokoro PHCU", 
    "Walangwalang PHCU", "Al Sabah Hospital", "Ngulere PHCU", 
    "Tombur PHCC", "Kubi PHCU", "Mogiri PHCU", "Munuki PHCC", 
    "Luri kworjik PHCC", "Gurei PHCC", "Lologo PHCC", "Rokon PHCC", 
    "Ohiri PHCU", "Hiliu PHCU", "Tirangore PHCU", "Imokoru PHCU", 
    "Lowoi PHCU", "Nyong PHCC"), label = "1 nutrition_site", format.stata = "%17s"), 
    gps_lat_hf1 = structure(c(8.359711, 4.957484, 4.898698, 4.883572, 
    4.84443, 4.692253, 4.90341, 4.3904226, 4.888618, 4.863658, 
    4.598713, 4.866878, 4.812911, 5.130493, 4.315281, 4.32201, 
    4.55415, 4.315281, 4.66667, 4.412822), label = "1 gps_lat_hf", format.stata = "%10.0g"), 
    gps_lon_hf1 = structure(c(27.61511, 31.225526, 31.671388, 
    31.625567, 31.60685, 31.961885, 31.546297, 31.922682, 31.85515, 
    31.579291, 31.361019, 31.518628, 31.595553, 30.941784, 32.432358, 
    32.64614, 32.79671, 32.432358, 32.16667, 32.570753), label = "1 gps_lon_hf", format.stata = "%10.0g"), 
    nutrition_site2 = structure(c("Baau", "", "Gezira PHCU", 
    "", "Juba-POC 1", "Ngangala PHCU", "Pager PHCC", "Lokiliri PHCC", 
    "Mangala PHCC", "Nyakuron PHCC", "Luri Rokwe PHCU", "Mangateen camp 2", 
    "Khor Romula PHCU", "", "Bur Mutaram PHCU", "", "Oguruny PHCU", 
    "Moti PHCU", "Kudo PHCC", "Torit Hospital"), label = "2 nutrition_site", format.stata = "%17s"), 
    gps_lat_hf2 = structure(c(8.707257, NA, 8.4090619, NA, 4.81954, 
    4.70483, 3.9677167, 5.608877, 5.145795, 4.84611, 4.898393, 
    4.88704, 4.79349, NA, 4.52591, NA, 4.39587, 4.315281, 4.57174, 
    4.49038), label = "2 gps_lat_hf", format.stata = "%10.0g"), 
    gps_lon_hf2 = structure(c(27.75921, NA, 29.9826985, NA, 31.53871, 
    31.920331, 31.6900341, 31.718082, 31.826536, 31.57904, 31.595486, 
    31.57887, 31.59258, NA, 32.49406, NA, 32.90319, 32.432358, 
    32.72784, 32.79671), label = "2 gps_lon_hf", format.stata = "%10.0g"), 
    nutrition_site3 = structure(c("Auluic", "", "", "", "Malakia PHCC", 
    "Lirya PHCC", "", "Aru PHCC", "", "Kimu PHCC", "Payeti PHCU", 
    "Jebel Dinka", "Gumbo PHCU", "", "Loronyo PHCU", "", "Mura-Hatiha PHCU", 
    "", "Khormush PHCU", ""), label = "3 nutrition_site", format.stata = "%17s"), 
    gps_lat_hf3 = structure(c(8.838672, NA, NA, NA, 4.837743, 
    4.385839, NA, 4.363774, NA, 4.865188, 5.0597473, 4.85618, 
    4.815084, NA, 4.57174, NA, 4.808474334, NA, 4.44852425, NA
    ), label = "3 gps_lat_hf", format.stata = "%10.0g"), gps_lon_hf3 = structure(c(27.42421, 
    NA, NA, NA, 31.603709, 32.046448, NA, 31.98457, NA, 31.562931, 
    31.6727054, 3.50619, 31.62269, NA, 32.72784, NA, 32.70710373, 
    NA, 32.432358, NA), label = "3 gps_lon_hf", format.stata = "%10.0g"), 
    nutrition_site4 = structure(c("Kuom", "", "", "", "Juba-POC1", 
    "", "", "Nesitu PHCC", "", "Rock city PHCC", "", "", "Rajaf East PHCU", 
    "", "", "", "Haforiere PHCU", "", "Offiriha PHCU", ""), label = "4 nutrition_site", format.stata = "%17s"), 
    gps_lat_hf4 = structure(c(8.721778, NA, NA, NA, 4.81954, 
    NA, NA, 4.692291, NA, 4.84753, NA, NA, 4.751689, NA, NA, 
    NA, 4.4567051, NA, 4.421822, NA), label = "4 gps_lat_hf", format.stata = "%10.0g"), 
    gps_lon_hf4 = structure(c(27.41608, NA, NA, NA, 31.53871, 
    NA, NA, 31.703227, NA, 31.5579, NA, NA, 31.609691, NA, NA, 
    NA, 32.7933135, NA, 32.59578, NA), label = "4 gps_lon_hf", format.stata = "%10.0g"), 
    nutrition_site5 = structure(c("Aroyo", "", "", "", "Kator PHCC", 
    "", "", "", "", "", "", "", "Don Bosco PHCC", "", "", "", 
    "Hiyala PHCU", "", "Offiri PHCU", ""), label = "5 nutrition_site", format.stata = "%17s"), 
    gps_lat_hf5 = structure(c(8.676097, NA, NA, NA, 4.831269, 
    NA, NA, NA, NA, NA, NA, NA, 4.79874, NA, NA, NA, 4.46423, 
    NA, 4.133865, NA), label = "5 gps_lat_hf", format.stata = "%10.0g"), 
    gps_lon_hf5 = structure(c(26.85703, NA, NA, NA, 31.596469, 
    NA, NA, NA, NA, NA, NA, NA, 31.62827, NA, NA, NA, 32.86489, 
    NA, 32.58579, NA), label = "5 gps_lon_hf", format.stata = "%10.0g"), 
    nutrition_site6 = structure(c("Gabat", "", "", "", "Juba-POC3", 
    "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""
    ), label = "6 nutrition_site", format.stata = "%17s"), gps_lat_hf6 = structure(c(8.722778, 
    NA, NA, NA, 4.811417, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), label = "6 gps_lat_hf", format.stata = "%10.0g"), 
    gps_lon_hf6 = structure(c(27.35, NA, NA, NA, 31.528815, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "6 gps_lon_hf", format.stata = "%10.0g"), 
    nutrition_site7 = structure(c("Mabong Akot", "", "", "", 
    "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
    ""), label = "7 nutrition_site", format.stata = "%17s"), 
    gps_lat_hf7 = structure(c(8.887422, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "7 gps_lat_hf", format.stata = "%10.0g"), 
    gps_lon_hf7 = structure(c(26.89625, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "7 gps_lon_hf", format.stata = "%10.0g"), 
    nutrition_site8 = structure(c("Alok", "", "", "", "", "", 
    "", "", "", "", "", "", "", "", "", "", "", "", "", ""), label = "8 nutrition_site", format.stata = "%17s"), 
    gps_lat_hf8 = structure(c(8.357711, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "8 gps_lat_hf", format.stata = "%10.0g"), 
    gps_lon_hf8 = structure(c(27.52639, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "8 gps_lon_hf", format.stata = "%10.0g")), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

Solution

  • Using the names_pattern= argument and the special ".value" you could do:

    library(tidyr)
    
    nutrition |>
      pivot_longer(matches("\\d$"),
        names_to = c(".value", "num"),
        names_pattern = c("^(.*?)(\\d+)$")
      )
    #> # A tibble: 160 × 8
    #>    county      payam_id payam groupid num   nutrition_site gps_lat_hf gps_lon_hf
    #>    <chr>       <hvn_lb> <chr>   <dbl> <chr> <chr>               <dbl>      <dbl>
    #>  1 Aweil Cent…        1 Awei…       1 1     "Barmayen"           8.36       27.6
    #>  2 Aweil Cent…        1 Awei…       1 2     "Baau"               8.71       27.8
    #>  3 Aweil Cent…        1 Awei…       1 3     "Auluic"             8.84       27.4
    #>  4 Aweil Cent…        1 Awei…       1 4     "Kuom"               8.72       27.4
    #>  5 Aweil Cent…        1 Awei…       1 5     "Aroyo"              8.68       26.9
    #>  6 Aweil Cent…        1 Awei…       1 6     "Gabat"              8.72       27.4
    #>  7 Aweil Cent…        1 Awei…       1 7     "Mabong Akot"        8.89       26.9
    #>  8 Aweil Cent…        1 Awei…       1 8     "Alok"               8.36       27.5
    #>  9 Juba               4 Dolo        2 1     "Kuda PHCC"          4.96       31.2
    #> 10 Juba               4 Dolo        2 2     ""                  NA          NA  
    #> # ℹ 150 more rows