I have a dataset of 4 years, where i want to calculate the maximum temperature of each day (1 measurement per hour provided). How can i add this temperature in an additional column in my dataset? I dont know how to do this without dropping the other columns.
My dataset is like:
structure(list(DateTime = structure(c(1420070400, 1420074000,
1420077600, 1420081200, 1420084800, 1420088400, 1420092000, 1420095600,
1420099200, 1420102800, 1420106400, 1420110000, 1420113600, 1420117200,
1420120800, 1420124400, 1420128000, 1420131600, 1420135200, 1420138800,
1420142400, 1420146000, 1420149600, 1420153200, 1420156800, 1420160400,
1420164000, 1420167600, 1420171200, 1420174800, 1420178400, 1420182000,
1420185600, 1420189200, 1420192800, 1420196400, 1420200000, 1420203600,
1420207200, 1420210800, 1420214400, 1420218000, 1420221600, 1420225200,
1420228800, 1420232400, 1420236000, 1420239600), tzone = "UTC", class = c("POSIXct",
"POSIXt")), Tmin = c(3.33733696, 3.2377765, 2.83953466, 3.03865558,
2.7399742, 2.44129282, 2.34173236, 2.2421719, 2.34173236, 2.54085328,
2.7399742, 2.83953466, 3.33733696, 3.43689742, 3.53645788, 2.93909512,
2.7399742, 2.83953466, 2.83953466, 2.7399742, 2.7399742, 2.7399742,
2.83953466, 2.93909512, 3.03865558, 3.43689742, 3.7355788, 3.7355788,
3.43689742, 4.03426018, 4.7311834, 3.2377765, 3.33733696, 4.53206248,
4.93030432, 6.02546938, 7.02107398, 4.93030432, 4.7311834, 4.33294156,
4.7311834, 3.83513926, 4.83074386, 4.33294156, 3.83513926, 3.2377765,
2.93909512, 2.7399742), Tmax = c(3.77972493, 3.38212841, 3.18333015,
3.18333015, 2.98453189, 2.48753624, 2.38813711, 2.28873798, 2.38813711,
2.78573363, 2.78573363, 3.18333015, 3.48152754, 4.27672058, 4.17732145,
3.48152754, 2.88513276, 2.88513276, 2.88513276, 2.88513276, 2.78573363,
2.6863345, 3.18333015, 2.98453189, 3.28272928, 3.6803258, 3.97852319,
4.27672058, 3.87912406, 4.6743171, 4.87311536, 4.47551884, 4.47551884,
4.77371623, 5.76770753, 6.86109796, 7.85508926, 8.74968143, 5.37011101,
4.47551884, 6.16530405, 4.77371623, 4.97251449, 5.17131275, 4.87311536,
4.6743171, 3.18333015, 2.98453189), Tmean = c(3.62254694166667,
3.30742526, 3.00888893, 3.07523033666667, 2.87620611666667, 2.49474302833333,
2.39523091833333, 2.262548105, 2.37864556666667, 2.7103526, 2.74352330333333,
2.959132875, 3.37376666666667, 3.77181510666667, 3.854741865,
3.32401061166667, 2.80986471, 2.84303541333333, 2.82645006166667,
2.80986471, 2.760108655, 2.7103526, 2.92596217166667, 2.97571822666667,
3.09181568833333, 3.58937623833333, 3.82157116166667, 3.98742467833333,
3.63913229333333, 4.41864382166667, 4.83327761333333, 3.62254694166667,
4.08693678833333, 4.68400944833333, 5.33083816333333, 6.49181278,
7.56986063833333, 7.28790966, 5.06547253666667, 4.451814525,
5.71230125166667, 4.38547311833333, 4.849862965, 4.849862965,
4.50157058, 3.98742467833333, 2.99230357833333, 2.79327935833333
)), row.names = c(NA, -48L), class = c("tbl_df", "tbl", "data.frame"
))
Added conversion of date-time to day-date:
Using summarise within mutate:
With dd
being the data struct provided in the question:
library(dplyr)
dd_new <- dd %>%
mutate(dmy = as.POSIXct(as.Date(DateTime), "%d-%m-%Y")) %>%
group_by(dmy) %>%
mutate(max_temp = max(Tmax)) %>%
ungroup() %>%
as.data.frame()
str(dd_new)
Returns now:
'data.frame': 48 obs. of 6 variables: $ DateTime: POSIXct, format: "2015-01-01 00:00:00" "2015-01-01 01:00:00" "2015-01-01 02:00:00" ... $ Tmin : num 3.34 3.24 2.84 3.04 2.74 ... $ Tmax : num 3.78 3.38 3.18 3.18 2.98 ... $ Tmean : num 3.62 3.31 3.01 3.08 2.88 ... $ dmy : POSIXct, format: "2015-01-01 01:00:00" "2015-01-01 01:00:00" "2015-01-01 01:00:00" ... $ max_temp: num 4.28 4.28 4.28 4.28 4.28 ...