Search code examples
rdataframedplyrheader

Headers type problem when using pivot_longer in R


I have a dataframe that corresponds to the number of hours worked each day by my research assistants. It looks like that :

structure(list(SurveyorId = c("Zineb", "Elisa", "AudreyB", "CamilleP", 
"CamilleV", "AudreyG", "CharlesE", "Lou", "Ludivine", "Elise", 
"Jason", "Mathilde", "Hassina", "Nihel", "Dauphine"), `16/10/2023` = c(8, 
7, 3, 7, 6, 4, NA, 2.5, NA, 7, NA, NA, 4, NA, NA), `17/10/2023` = c(8, 
7, 5.5, 4, 2.5, 4.5, NA, 2, NA, 4, NA, 5.5, 7.5, NA, NA), `18/10/2023` = c(8, 
5, 7, NA, NA, 2.5, NA, NA, 2, NA, NA, 8, 7.5, NA, NA), `19/10/2023` = c(8, 
7, NA, NA, NA, NA, NA, NA, 1.5, NA, NA, 2.5, 9, NA, NA), `20/10/2023` = c(8, 
NA, 7.5, NA, NA, 2, NA, 2, NA, NA, 2.2, NA, 3.5, NA, NA), `21/10/2023` = c(NA, 
NA, NA, 6, 6, NA, 7.5, 5, 4.5, 4, 4.5, NA, NA, NA, NA), `23/10/2023` = c(8, 
9, NA, 7.5, 7, 4, NA, 1, NA, 7.5, NA, 2.5, 4, NA, NA), `24/10/2023` = c(8, 
NA, NA, 4.5, 2, 4.5, 8, 3.75, NA, NA, NA, 5.5, 7.5, NA, NA), 
    `25/10/2023` = c(8, 5, NA, NA, NA, 2, 1.5, NA, 1.5, NA, NA, 
    4, 8, NA, NA), `26/10/2023` = c(8, 8, 8, NA, NA, NA, NA, 
    1, 2, 7, 4, NA, 8, NA, NA), `27/10/2023` = c(8, NA, 4, NA, 
    NA, 2, NA, NA, 2, NA, NA, NA, 2.75, NA, NA), `28/10/2023` = c(NA, 
    NA, NA, 7.5, 6, NA, 8, 6, 6.5, 4, NA, 3, NA, NA, NA), `30/10/2023` = c(8, 
    9, 6, 2, NA, 4, NA, 1, 1.5, 7, NA, NA, 4.75, 3, NA), `31/10/2023` = c(8, 
    NA, 4, 2, NA, 4, 8, 2.5, 0.5, 0, NA, 3, 8.5, 3.5, NA), `01/11/2023` = c(NA, 
    5, NA, NA, NA, 2, NA, NA, NA, 7, NA, 3, NA, 3, NA), `02/11/2023` = c(3, 
    6.5, 4, NA, NA, NA, NA, 2, 2, NA, 3.5, 3, 8, 5.4, NA), `03/11/2023` = c(8, 
    NA, NA, NA, NA, NA, NA, NA, 2, NA, 2, 4.5, 3, 3.5, NA), `04/11/2023` = c(1, 
    NA, NA, NA, 7, NA, NA, 4.5, NA, NA, 3.75, NA, NA, 2.4, 7), 
    `06/11/2023` = c(8, 7, 2, 7, 6, 4, NA, 2, NA, 5, NA, 5, 4.6, 
    NA, 7), `07/11/2023` = c(7.5, NA, 0.5, 4, 3.5, 4, 8, 4, NA, 
    NA, NA, 4, 8, NA, 7), `08/11/2023` = c(7.5, 6.5, 2, NA, NA, 
    NA, 2, NA, 3, NA, NA, NA, 8.15, NA, 7), `09/11/2023` = c("7.5", 
    "5.5", NA, NA, NA, NA, NA, "2", "2", "7", "3.2", "5", "9", 
    NA, "5.5"), `10/11/2023` = c(9, NA, 1.5, NA, NA, NA, NA, 
    NA, 2.5, NA, 2, NA, 3.5, NA, NA), `11/11/2023` = c(NA, NA, 
    NA, 7, 7, NA, 7, NA, NA, 4, 3.2, NA, NA, 5, 6), `13/11/2023` = c("7.5", 
    NA, "2", NA, "7", "3", NA, NA, NA, "5.5", NA, "4.5", "4.25", 
    NA, NA), `14/11/2023` = c(8, NA, NA, NA, 2, NA, 6.5, 4.5, 
    NA, NA, NA, 3.5, 8.5, NA, NA), `15/11/2023` = c(NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `16/11/2023` = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    `17/11/2023` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -15L))

To do statistics on this data frame, I want to pivot it so that I have all of their names in one columns, all dates in one column, and all hours worked into one column. For that I wanted to use pivot_longer as follows:

transposed_data <- Recensement_volume_horaire %>%
  pivot_longer(cols = -"SurveyorId", names_to = "Date", values_to = "Hours_Worked")

However, I get the following error message when running that:

Error in pivot_longer():
! Can't combine `16/10/2023` <double> and `09/11/2023` <character>.
Backtrace:
 1. Recensement_volume_horaire %>% ...
 3. tidyr:::pivot_longer.data.frame(., cols = -"SurveyorId", names_to = "Date", values_to = "Hours_Worked")

I have tried everything to put all column's headers into the same format but I fail to fix this. All my solutions were variations of code chunks like:

colnames(Recensement_volume_horaire) <- as.character(colnames(Recensement_volume_horaire))

Can you help me? It would save me!!


Solution

  • Looking at the structure of that frame indicates some columns are not numeric:

    str(Recensement_volume_horaire)
    # tibble [15 × 30] (S3: tbl_df/tbl/data.frame)
    #  $ SurveyorId: chr [1:15] "Zineb" "Elisa" "AudreyB" "CamilleP" ...
    #  $ 16/10/2023: num [1:15] 8 7 3 7 6 4 NA 2.5 NA 7 ...
    #  $ 17/10/2023: num [1:15] 8 7 5.5 4 2.5 4.5 NA 2 NA 4 ...
    #  $ 18/10/2023: num [1:15] 8 5 7 NA NA 2.5 NA NA 2 NA ...
    #  $ 19/10/2023: num [1:15] 8 7 NA NA NA NA NA NA 1.5 NA ...
    #  $ 20/10/2023: num [1:15] 8 NA 7.5 NA NA 2 NA 2 NA NA ...
    #  $ 21/10/2023: num [1:15] NA NA NA 6 6 NA 7.5 5 4.5 4 ...
    #  $ 23/10/2023: num [1:15] 8 9 NA 7.5 7 4 NA 1 NA 7.5 ...
    #  $ 24/10/2023: num [1:15] 8 NA NA 4.5 2 4.5 8 3.75 NA NA ...
    #  $ 25/10/2023: num [1:15] 8 5 NA NA NA 2 1.5 NA 1.5 NA ...
    #  $ 26/10/2023: num [1:15] 8 8 8 NA NA NA NA 1 2 7 ...
    #  $ 27/10/2023: num [1:15] 8 NA 4 NA NA 2 NA NA 2 NA ...
    #  $ 28/10/2023: num [1:15] NA NA NA 7.5 6 NA 8 6 6.5 4 ...
    #  $ 30/10/2023: num [1:15] 8 9 6 2 NA 4 NA 1 1.5 7 ...
    #  $ 31/10/2023: num [1:15] 8 NA 4 2 NA 4 8 2.5 0.5 0 ...
    #  $ 01/11/2023: num [1:15] NA 5 NA NA NA 2 NA NA NA 7 ...
    #  $ 02/11/2023: num [1:15] 3 6.5 4 NA NA NA NA 2 2 NA ...
    #  $ 03/11/2023: num [1:15] 8 NA NA NA NA NA NA NA 2 NA ...
    #  $ 04/11/2023: num [1:15] 1 NA NA NA 7 NA NA 4.5 NA NA ...
    #  $ 06/11/2023: num [1:15] 8 7 2 7 6 4 NA 2 NA 5 ...
    #  $ 07/11/2023: num [1:15] 7.5 NA 0.5 4 3.5 4 8 4 NA NA ...
    #  $ 08/11/2023: num [1:15] 7.5 6.5 2 NA NA NA 2 NA 3 NA ...
    #  $ 09/11/2023: chr [1:15] "7.5" "5.5" NA NA ...
    #  $ 10/11/2023: num [1:15] 9 NA 1.5 NA NA NA NA NA 2.5 NA ...
    #  $ 11/11/2023: num [1:15] NA NA NA 7 7 NA 7 NA NA 4 ...
    #  $ 13/11/2023: chr [1:15] "7.5" NA "2" NA ...
    #  $ 14/11/2023: num [1:15] 8 NA NA NA 2 NA 6.5 4.5 NA NA ...
    #  $ 15/11/2023: logi [1:15] NA NA NA NA NA NA ...
    #  $ 16/11/2023: logi [1:15] NA NA NA NA NA NA ...
    #  $ 17/11/2023: logi [1:15] NA NA NA NA NA NA ...
    

    Namely, `09/11/2023` and `13/11/2023` are both chr instead of num. We can fix them and then pivot:

    Recensement_volume_horaire %>%
      mutate(across(where(is.character) & ends_with("/2023"), ~ as.numeric(.))) %>%
      pivot_longer(cols = -"SurveyorId", names_to = "Date", values_to = "Hours_Worked")
    # # A tibble: 435 × 3
    #    SurveyorId Date       Hours_Worked
    #    <chr>      <chr>             <dbl>
    #  1 Zineb      16/10/2023            8
    #  2 Zineb      17/10/2023            8
    #  3 Zineb      18/10/2023            8
    #  4 Zineb      19/10/2023            8
    #  5 Zineb      20/10/2023            8
    #  6 Zineb      21/10/2023           NA
    #  7 Zineb      23/10/2023            8
    #  8 Zineb      24/10/2023            8
    #  9 Zineb      25/10/2023            8
    # 10 Zineb      26/10/2023            8
    # # ℹ 425 more rows
    # # ℹ Use `print(n = ...)` to see more rows
    

    I used a clear "is character and ends with 2023" condition for confirming all are numeric, over to you if you need to tighten that down (or loosen it?).