I have a dataset that you can download from
https://www.dropbox.com/s/j4qnib9fhgrluln/RadData_Hourly_2005_2020.csv?dl=0
Containing hourly measurements from a location.
For each variable, I would like to plot the measurements on a line plot with the x-axes having the day and month and multiple lines, one for each year.
I prepared the data frame with the code:
histdata_shinyApp <- read_csv("RadData_Hourly_2005_2020.csv",
header=TRUE,
skip= 8,
dtoken = refreshable_token)%>%
mutate(TIMESTAMP=as.POSIXct(time, format='%Y%m%d:%H%M'),
date = ymd(as.Date(TIMESTAMP)),
year = format(date, "%Y"),
month = format(date, "%b"),
day = format(date, "%d"),
time= hms(format(as.POSIXct(TIMESTAMP, format='%Y-%m-%d %H:%M:%S'),format = "%H:%M:%S")),
.before=1)%>%
mutate(month=factor(month,
levels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"),
labels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
),
Gb.i.=as.numeric(Gb.i.)
)%>%
pivot_longer(cols = Gb.i.:WS10m,
names_to = "variable")
I then tried to plot with ggplot2:
library(ggplot2)
library(dplyr)
histdata_shinyApp%>%
filter(., variable=="Gb.i.")%>%
ggplot() +
geom_line(aes(x=month, y=value, group=year, color = year))+
geom_point(aes(x=month, y=value, group=year, color = year))
But the result gives me the sequence of all the years on the x, rather than a day-month sequence and years as a factor for line color.
I obtained a good result using ggpubr. Still, I would like to do the same with ggplot.
histdata_shinyApp%>%
filter(., variable=="Gd.i.")%>%
ggline(x="day", y="value", add="mean_se", facet.by="month")
The key steps in this is that you're using a "summary" stat rather than an "identity" stat - i.e., ggplot
should do calculations to determine point/line placements. To plot all lines across months, colouring by year use stat_summary
twice:
library(tidyverse)
histdata_shinyApp <- read.csv("https://www.dropbox.com/s/j4qnib9fhgrluln/RadData_Hourly_2005_2020.csv?dl=1",
header=TRUE,
skip= 8)%>%
mutate(TIMESTAMP=as.POSIXct(time, format='%Y%m%d:%H%M'),
date = ymd(as.Date(TIMESTAMP)),
year = format(date, "%Y"),
month = format(date, "%b"),
day = format(date, "%d"),
time= hms(format(as.POSIXct(TIMESTAMP, format='%Y-%m-%d %H:%M:%S'),format = "%H:%M:%S")),
.before=1)%>%
mutate(month=factor(month,
levels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"),
labels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
),
Gb.i.=as.numeric(Gb.i.)
)%>%
pivot_longer(cols = Gb.i.:WS10m,
names_to = "variable")
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `time = hms(...)`.
#> Caused by warning in `.parse_hms()`:
#> ! Some strings failed to parse, or all strings are NAs
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `Gb.i. = as.numeric(Gb.i.)`.
#> Caused by warning:
#> ! NAs introduced by coercion
histdata_shinyApp%>%
filter(., variable=="Gb.i.")%>%
ggplot() +
stat_summary(aes(x=month, y=value, group=year, color = year), fun = "mean", geom = "line")+
stat_summary(aes(x=month, y=value, group=year, color = year))