i am trying to plot trend lines and equation with R square for three variable (SA,SA1,SA2) using ggplot geom_smooth()
. While plotting three variables i get only one trend line. What might be the cause and solution of such plot? DATA LINK (3 trend lines + 3 equation with R^2)
library(ggplot2)
test <- read.xlsx2("filepath/test.xlsx", 1, header=TRUE)
> test
year SA SA1 SA2
1 2008 1.409155e+15 3.632740e+17 4.06998e+15
2 2009 1.533598e+15 3.767342e+17 4.05015e+15
..
..
10 2017 1.761596e+15 3.581407e+17 3.03403e+15
11 2018 1.677707e+15 3.428239e+17 3.15862e+15
dput(test)
structure(list(year = structure(1:11, .Label = c("2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018"), class = "factor"), SA = c(1409155313839800, 1533598052716370,
1524727969175020, 1583941250825040, 1597021832828680, 1549362217661020,
1607700438214130, 1592107298305410, 1735331260744350, 1761596167580970,
1677707298223350), SA1 = c(363273957183114432, 376734225895083200,
355896023882281984, 368398075167704192, 367791249493954048, 360257619620708800,
360061958768956736, 367763926166363648, 355088403981918272, 358140732212706304,
342823915606135936), SA2 = c(4.06998e+15, 4.05015e+15, 3.94057e+15,
3.9507e+15, 3.58963e+15, 3.53037e+15, 3.43302e+15, 3.20139e+15,
3.94638e+15, 3.03403e+15, 3.15862e+15)), row.names = c(NA, -11L
), class = "data.frame")
test$SA=as.numeric(levels(test$SA))[test$SA]
test$SA1=as.numeric(levels(test$SA1))[test$SA1]
test$SA2=as.numeric(levels(test$SA2))[test$SA2]
ggplot(test,aes(x=year, y= SA, group = 1)) + geom_line(mapping = aes(x = test$year, y = test$SA)) +
geom_line(mapping = aes(x = test$year,y = test$SA2), color = "red")+ geom_line(mapping = aes(x = test$year, y = test$SA1/100), + geom_smooth(method = "lm")
size = 1, color = "blue")+ scale_y_continuous(name = " Primary axis", sec.axis = sec_axis(~.*100, name = "Secondary axis"))
i also tried reshaping the data and then plot the trend lines for 3 variables but secondary axis got detoriated.
df <- reshape2::melt(test, id.var = "year")
df
year variable value
1 2008 SA 1.409155e+15
2 2009 SA 1.533598e+15
3 2010 SA1 1.524728e+15
4 2011 SA1 1.583941e+15
..
..
5 2017 SA2 1.597022e+15
6 2018 SA2 1.549362e+15
ggplot(df,aes(x=year, y= value, group = variable)) + geom_line()+ scale_y_continuous(name = "y axis", sec.axis = sec_axis(~.*100, name = "y axis"))+ geom_smooth(method = "lm")
Using the code provided by @Rui Barradas - Reinstate Monic: i got the following plot
library(dplyr)
library(tidyr)
library(ggplot2)
test %>%
mutate(SA1 = SA1/100) %>%
gather(sa, value, -year) %>%
ggplot(aes(x = year, y = value, group = sa, colour = sa)) +
geom_line() +
geom_smooth(method = "lm", formula = y ~ x) +
scale_color_manual(values = c("black", "red", "blue")) +
scale_y_continuous(name = " Primary axis", sec.axis = sec_axis(~.*100, name = "Secondary axis"))
This sort of problem is usually a data reformating problem. See reshaping data.frame from wide to long format.
And with the data as posted there is no need to coerce columns SA
, SA1
and SA2
to numeric, they were not posted as factors.
library(dplyr)
library(tidyr)
library(ggplot2)
test %>%
mutate(SA1 = SA1/100) %>%
gather(sa, value, -year) %>%
ggplot(aes(x = year, y = value, group = sa, colour = sa)) +
geom_line() +
geom_smooth(method = "lm", formula = y ~ x) +
scale_color_manual(values = c("black", "red", "blue")) +
scale_y_continuous(name = " Primary axis", sec.axis = sec_axis(~.*100, name = "Secondary axis"))