I am familiar with R
, but not very much with plotting. I have panel data as follows:
library(plm)
library(dplyr)
data("EmplUK", package="plm")
EmplUK <- EmplUK %>%
group_by(firm, year) %>%
mutate(Vote = sample(c(0,1),1) ,
Vote_won = ifelse(Vote==1, sample(c(0,1),1),0))
# EDIT:
EmplUK <- pdata.frame(EmplUK , index=c("firm", "year"), drop.index = FALSE)
# A tibble: 1,031 x 9
# Groups: firm, year [1,031]
firm year sector emp wage capital output Vote Vote_won
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1977 7 5.04 13.2 0.589 95.7 1 0
2 1 1978 7 5.60 12.3 0.632 97.4 0 0
3 1 1979 7 5.01 12.8 0.677 99.6 1 1
4 1 1980 7 4.72 13.8 0.617 101. 1 1
5 1 1981 7 4.09 14.3 0.508 99.6 0 0
6 1 1982 7 3.17 14.9 0.423 98.6 0 0
7 1 1983 7 2.94 13.8 0.392 100. 0 0
8 2 1977 7 71.3 14.8 16.9 95.7 1 0
9 2 1978 7 70.6 14.1 17.2 97.4 1 1
10 2 1979 7 70.9 15.0 17.5 99.6 1 1
toplot <- plm(output ~ wage, data=EmplUK, model="within")
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
wage -0.707 0.143 -4.94 0.00000095 ***
I would like to evaluate what the best relation between two variables in panel data are (linear, quadratic, polynomial) by visualising the relation between output and wage (and perhaps fitting such linear, quadratic, polynomial). I am however super unfamiliar with plotting.
I am looking for something like this (source) (where I get the formula for the fitted line):
I tried starting out as follows:
plot(EmplUK$output,EmplUK$wage,type='l',col='red',main='Linear relationship')
But that gives me this:
In all honesty I have very little idea what I am doing here. Is there anyone who could get me in the right direction?
I would probably do it with the de-meaned data.
demeaned_data <- EmplUK %>%
group_by(firm) %>%
mutate(across(c(output, wage), function(x)x-mean(x)))
ggplot(demeaned_data, aes(x=wage, y=output)) +
geom_point() +
geom_smooth(aes(colour="linear", fill="linear"),
method="lm",
formula=y ~ x, ) +
geom_smooth(aes(colour="quadratic", fill="quadratic"),
method="lm",
formula=y ~ x + I(x^2)) +
geom_smooth(aes(colour="cubic", fill="cubic"),
method="lm",
formula=y ~ x + I(x^2) + I(x^3)) +
scale_fill_brewer(palette="Set1") +
scale_colour_brewer(palette="Set1") +
theme_classic() +
labs(colour="Functional Form", fill="Functional Form")
An alternative would be to estimate the model with OLS and firm dummy variables and then you could get predictions for each firm and plot them separately.
library(ggeffects)
data("EmplUK", package="plm")
EmplUK <- EmplUK %>% mutate(firm = as.factor(firm))
m1 <- lm(output ~ wage + firm, data=EmplUK )
m2 <- lm(output ~ wage + I(wage^2) + firm, data=EmplUK )
m3 <- lm(output ~ wage + I(wage^2) + I(wage^3) + firm, data=EmplUK )
p1 <- ggpredict(m1, terms=c("wage", "firm")) %>%
mutate(form="linear") %>%
rename("wage" = "x",
"firm" = "group",
"output" = "predicted")
p2 <- ggpredict(m2, terms=c("wage", "firm")) %>%
mutate(form="quadratic") %>%
rename("wage" = "x",
"firm" = "group",
"output" = "predicted")
p3 <- ggpredict(m3, terms=c("wage", "firm")) %>%
mutate(form="cubic") %>%
rename("wage" = "x",
"firm" = "group",
"output" = "predicted")
ggplot() +
geom_line(data=p1, aes(x=wage, y=output, colour="linear")) +
geom_line(data=p2, aes(x=wage, y=output, colour="quadratic")) +
geom_line(data=p3, aes(x=wage, y=output, colour="cubic")) +
geom_point(data=EmplUK, aes(x=wage, y=output)) +
facet_wrap(~firm) +
theme_bw() +
labs(colour="Functional\nForm")