I have a dataframe that corresponds to the number of hours worked each day by my research assistants. It looks like that :
structure(list(SurveyorId = c("Zineb", "Elisa", "AudreyB", "CamilleP",
"CamilleV", "AudreyG", "CharlesE", "Lou", "Ludivine", "Elise",
"Jason", "Mathilde", "Hassina", "Nihel", "Dauphine"), `16/10/2023` = c(8,
7, 3, 7, 6, 4, NA, 2.5, NA, 7, NA, NA, 4, NA, NA), `17/10/2023` = c(8,
7, 5.5, 4, 2.5, 4.5, NA, 2, NA, 4, NA, 5.5, 7.5, NA, NA), `18/10/2023` = c(8,
5, 7, NA, NA, 2.5, NA, NA, 2, NA, NA, 8, 7.5, NA, NA), `19/10/2023` = c(8,
7, NA, NA, NA, NA, NA, NA, 1.5, NA, NA, 2.5, 9, NA, NA), `20/10/2023` = c(8,
NA, 7.5, NA, NA, 2, NA, 2, NA, NA, 2.2, NA, 3.5, NA, NA), `21/10/2023` = c(NA,
NA, NA, 6, 6, NA, 7.5, 5, 4.5, 4, 4.5, NA, NA, NA, NA), `23/10/2023` = c(8,
9, NA, 7.5, 7, 4, NA, 1, NA, 7.5, NA, 2.5, 4, NA, NA), `24/10/2023` = c(8,
NA, NA, 4.5, 2, 4.5, 8, 3.75, NA, NA, NA, 5.5, 7.5, NA, NA),
`25/10/2023` = c(8, 5, NA, NA, NA, 2, 1.5, NA, 1.5, NA, NA,
4, 8, NA, NA), `26/10/2023` = c(8, 8, 8, NA, NA, NA, NA,
1, 2, 7, 4, NA, 8, NA, NA), `27/10/2023` = c(8, NA, 4, NA,
NA, 2, NA, NA, 2, NA, NA, NA, 2.75, NA, NA), `28/10/2023` = c(NA,
NA, NA, 7.5, 6, NA, 8, 6, 6.5, 4, NA, 3, NA, NA, NA), `30/10/2023` = c(8,
9, 6, 2, NA, 4, NA, 1, 1.5, 7, NA, NA, 4.75, 3, NA), `31/10/2023` = c(8,
NA, 4, 2, NA, 4, 8, 2.5, 0.5, 0, NA, 3, 8.5, 3.5, NA), `01/11/2023` = c(NA,
5, NA, NA, NA, 2, NA, NA, NA, 7, NA, 3, NA, 3, NA), `02/11/2023` = c(3,
6.5, 4, NA, NA, NA, NA, 2, 2, NA, 3.5, 3, 8, 5.4, NA), `03/11/2023` = c(8,
NA, NA, NA, NA, NA, NA, NA, 2, NA, 2, 4.5, 3, 3.5, NA), `04/11/2023` = c(1,
NA, NA, NA, 7, NA, NA, 4.5, NA, NA, 3.75, NA, NA, 2.4, 7),
`06/11/2023` = c(8, 7, 2, 7, 6, 4, NA, 2, NA, 5, NA, 5, 4.6,
NA, 7), `07/11/2023` = c(7.5, NA, 0.5, 4, 3.5, 4, 8, 4, NA,
NA, NA, 4, 8, NA, 7), `08/11/2023` = c(7.5, 6.5, 2, NA, NA,
NA, 2, NA, 3, NA, NA, NA, 8.15, NA, 7), `09/11/2023` = c("7.5",
"5.5", NA, NA, NA, NA, NA, "2", "2", "7", "3.2", "5", "9",
NA, "5.5"), `10/11/2023` = c(9, NA, 1.5, NA, NA, NA, NA,
NA, 2.5, NA, 2, NA, 3.5, NA, NA), `11/11/2023` = c(NA, NA,
NA, 7, 7, NA, 7, NA, NA, 4, 3.2, NA, NA, 5, 6), `13/11/2023` = c("7.5",
NA, "2", NA, "7", "3", NA, NA, NA, "5.5", NA, "4.5", "4.25",
NA, NA), `14/11/2023` = c(8, NA, NA, NA, 2, NA, 6.5, 4.5,
NA, NA, NA, 3.5, 8.5, NA, NA), `15/11/2023` = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `16/11/2023` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
`17/11/2023` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -15L))
To do statistics on this data frame, I want to pivot it so that I have all of their names in one columns, all dates in one column, and all hours worked into one column. For that I wanted to use pivot_longer as follows:
transposed_data <- Recensement_volume_horaire %>%
pivot_longer(cols = -"SurveyorId", names_to = "Date", values_to = "Hours_Worked")
However, I get the following error message when running that:
Error in pivot_longer():
! Can't combine `16/10/2023` <double> and `09/11/2023` <character>.
Backtrace:
1. Recensement_volume_horaire %>% ...
3. tidyr:::pivot_longer.data.frame(., cols = -"SurveyorId", names_to = "Date", values_to = "Hours_Worked")
I have tried everything to put all column's headers into the same format but I fail to fix this. All my solutions were variations of code chunks like:
colnames(Recensement_volume_horaire) <- as.character(colnames(Recensement_volume_horaire))
Can you help me? It would save me!!
Looking at the str
ucture of that frame indicates some columns are not numeric:
str(Recensement_volume_horaire)
# tibble [15 × 30] (S3: tbl_df/tbl/data.frame)
# $ SurveyorId: chr [1:15] "Zineb" "Elisa" "AudreyB" "CamilleP" ...
# $ 16/10/2023: num [1:15] 8 7 3 7 6 4 NA 2.5 NA 7 ...
# $ 17/10/2023: num [1:15] 8 7 5.5 4 2.5 4.5 NA 2 NA 4 ...
# $ 18/10/2023: num [1:15] 8 5 7 NA NA 2.5 NA NA 2 NA ...
# $ 19/10/2023: num [1:15] 8 7 NA NA NA NA NA NA 1.5 NA ...
# $ 20/10/2023: num [1:15] 8 NA 7.5 NA NA 2 NA 2 NA NA ...
# $ 21/10/2023: num [1:15] NA NA NA 6 6 NA 7.5 5 4.5 4 ...
# $ 23/10/2023: num [1:15] 8 9 NA 7.5 7 4 NA 1 NA 7.5 ...
# $ 24/10/2023: num [1:15] 8 NA NA 4.5 2 4.5 8 3.75 NA NA ...
# $ 25/10/2023: num [1:15] 8 5 NA NA NA 2 1.5 NA 1.5 NA ...
# $ 26/10/2023: num [1:15] 8 8 8 NA NA NA NA 1 2 7 ...
# $ 27/10/2023: num [1:15] 8 NA 4 NA NA 2 NA NA 2 NA ...
# $ 28/10/2023: num [1:15] NA NA NA 7.5 6 NA 8 6 6.5 4 ...
# $ 30/10/2023: num [1:15] 8 9 6 2 NA 4 NA 1 1.5 7 ...
# $ 31/10/2023: num [1:15] 8 NA 4 2 NA 4 8 2.5 0.5 0 ...
# $ 01/11/2023: num [1:15] NA 5 NA NA NA 2 NA NA NA 7 ...
# $ 02/11/2023: num [1:15] 3 6.5 4 NA NA NA NA 2 2 NA ...
# $ 03/11/2023: num [1:15] 8 NA NA NA NA NA NA NA 2 NA ...
# $ 04/11/2023: num [1:15] 1 NA NA NA 7 NA NA 4.5 NA NA ...
# $ 06/11/2023: num [1:15] 8 7 2 7 6 4 NA 2 NA 5 ...
# $ 07/11/2023: num [1:15] 7.5 NA 0.5 4 3.5 4 8 4 NA NA ...
# $ 08/11/2023: num [1:15] 7.5 6.5 2 NA NA NA 2 NA 3 NA ...
# $ 09/11/2023: chr [1:15] "7.5" "5.5" NA NA ...
# $ 10/11/2023: num [1:15] 9 NA 1.5 NA NA NA NA NA 2.5 NA ...
# $ 11/11/2023: num [1:15] NA NA NA 7 7 NA 7 NA NA 4 ...
# $ 13/11/2023: chr [1:15] "7.5" NA "2" NA ...
# $ 14/11/2023: num [1:15] 8 NA NA NA 2 NA 6.5 4.5 NA NA ...
# $ 15/11/2023: logi [1:15] NA NA NA NA NA NA ...
# $ 16/11/2023: logi [1:15] NA NA NA NA NA NA ...
# $ 17/11/2023: logi [1:15] NA NA NA NA NA NA ...
Namely, `09/11/2023`
and `13/11/2023`
are both chr
instead of num
. We can fix them and then pivot:
Recensement_volume_horaire %>%
mutate(across(where(is.character) & ends_with("/2023"), ~ as.numeric(.))) %>%
pivot_longer(cols = -"SurveyorId", names_to = "Date", values_to = "Hours_Worked")
# # A tibble: 435 × 3
# SurveyorId Date Hours_Worked
# <chr> <chr> <dbl>
# 1 Zineb 16/10/2023 8
# 2 Zineb 17/10/2023 8
# 3 Zineb 18/10/2023 8
# 4 Zineb 19/10/2023 8
# 5 Zineb 20/10/2023 8
# 6 Zineb 21/10/2023 NA
# 7 Zineb 23/10/2023 8
# 8 Zineb 24/10/2023 8
# 9 Zineb 25/10/2023 8
# 10 Zineb 26/10/2023 8
# # ℹ 425 more rows
# # ℹ Use `print(n = ...)` to see more rows
I used a clear "is character and ends with 2023" condition for confirming all are numeric, over to you if you need to tighten that down (or loosen it?).