I need to spread the date value of a data frame (long to wide) but am having trouble achieving this because there are two variables that I need.
A solution I thought of might be to create two separate data frames, one for each variable, with the hourly values listed in the rows and dates listed in the columns.
I asked this question differently initially, but have since thought of a better way to pose it; as such, I shan't be deleting it but rather posting my revised requirements as the original question may help others.
My data frame:
df <- structure(list(date = structure(c(17563, 17563, 17563, 17563,
17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563,
17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563,
17563, 17563, 17564, 17564, 17564, 17564, 17564, 17564, 17564,
17564, 17564, 17564, 17564, 17564, 17564, 17564, 17564, 17564,
17564, 17564, 17564, 17564, 17564, 17564, 17564, 17564, 17565,
17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565,
17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565,
17565, 17565, 17565, 17565, 17565, 17566, 17566, 17566, 17566,
17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566,
17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566,
17566, 17566), class = "Date"), hour = c("00", "01", "02", "03",
"04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14",
"15", "16", "17", "18", "19", "20", "21", "22", "23", "00", "01",
"02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23",
"00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
"11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
"22", "23", "00", "01", "02", "03", "04", "05", "06", "07", "08",
"09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20", "21", "22", "23"), offered = c(30L, 28L, 15L, 21L, 11L,
14L, 18L, 35L, 42L, 36L, 37L, 38L, 54L, 45L, 37L, 52L, 40L, 66L,
84L, 69L, 75L, 51L, 39L, 38L, 25L, 21L, 18L, 20L, 7L, 14L, 14L,
28L, 37L, 50L, 46L, 31L, 45L, 45L, 39L, 31L, 48L, 69L, 91L, 117L,
74L, 66L, 60L, 37L, 20L, 31L, 15L, 26L, 18L, 12L, 21L, 42L, 107L,
118L, 138L, 137L, 93L, 109L, 102L, 91L, 102L, 76L, 76L, 70L,
68L, 74L, 55L, 54L, 28L, 19L, 23L, 12L, 16L, 12L, 18L, 39L, 96L,
119L, 111L, 95L, 65L, 81L, 67L, 76L, 64L, 64L, 68L, 71L, 54L,
65L, 51L, 41L), answered = c(30L, 28L, 15L, 21L, 11L, 14L, 18L,
35L, 42L, 36L, 37L, 38L, 54L, 45L, 37L, 51L, 40L, 66L, 83L, 68L,
74L, 51L, 39L, 38L, 25L, 21L, 18L, 20L, 7L, 14L, 14L, 28L, 37L,
49L, 46L, 31L, 43L, 45L, 39L, 31L, 47L, 65L, 81L, 83L, 61L, 65L,
58L, 37L, 20L, 31L, 15L, 25L, 17L, 12L, 21L, 42L, 106L, 115L,
134L, 127L, 93L, 107L, 97L, 88L, 94L, 74L, 74L, 66L, 65L, 69L,
52L, 51L, 28L, 19L, 23L, 12L, 16L, 12L, 17L, 39L, 91L, 115L,
104L, 95L, 65L, 79L, 67L, 73L, 64L, 64L, 68L, 70L, 53L, 64L,
48L, 38L)), row.names = c(NA, -96L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(date = structure(c(17563,
17564, 17565, 17566), class = "Date"), .rows = list(1:24, 25:48,
49:72, 73:96)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
Which looks like this:
> head(df)
# A tibble: 6 x 4
# Groups: date [1]
date hour offered answered
<date> <chr> <int> <int>
1 2018-02-01 00 30 30
2 2018-02-01 01 28 28
3 2018-02-01 02 15 15
4 2018-02-01 03 21 21
5 2018-02-01 04 11 11
6 2018-02-01 05 14 14
This is how I would like the output to look (one for offered
, one for answered
):
I'm pretty sure that I can achieve this with tidyr::spread()
but haven't been able to get it to look like the image above.
How can I achieve this?
I think you can do it in two parts, select
the required columns and spread
them to wide format and then change the hour
column by pasting current hour
value with the next hour
value.
For offered
library(tidyverse)
df %>%
select(date, hour, offered) %>%
spread(date, offered) %>%
mutate(hour = paste(hour, lead(hour, default = first(hour)), sep = "-"))
# A tibble: 24 x 5
# hour `2018-02-01` `2018-02-02` `2018-02-03` `2018-02-04`
# <chr> <int> <int> <int> <int>
# 1 00-01 30 25 20 28
# 2 01-02 28 21 31 19
# 3 02-03 15 18 15 23
# 4 03-04 21 20 26 12
# 5 04-05 11 7 18 16
# 6 05-06 14 14 12 12
# 7 06-07 18 14 21 18
# 8 07-08 35 28 42 39
# 9 08-09 42 37 107 96
#10 09-10 36 50 118 119
# … with 14 more rows
and for answered
df %>%
select(date, hour, answered) %>%
spread(date, answered) %>%
mutate(hour = paste(hour, lead(hour, default = first(hour)), sep = "-"))