I have a data frame that looks like this:
head(length_test)
S.UNIT GENRE PATTERN
1 105 Conversation THAT_EXT
2 61 Conversation THAT_EXT
3 210 Conversation TO_EXT
4 196 Conversation TO_EXT
5 166 Academic Prose TO_EXT
6 152 Academic Prose TO_EXT
> str(length_test)
'data.frame': 7329 obs. of 3 variables:
$ S.UNIT : int 105 61 210 196 166 152 152 152 152 150 ...
$ GENRE : Factor w/ 5 levels "Academic Prose",..: 2 2 2 2 1 1 1 1 2 2 ...
$ PATTERN: Factor w/ 6 levels "THAT_EXT","THAT_EXT_NT",..: 1 1 5 5 5 5 5 5 5 5 ...
What I want to do is produce a plot like this, but using ggplot2:
interaction.plot(GENRE, PATTERN, S.UNIT)
My problem is that I can't plot just the means, instead I get something like this:
ggplot(data = length_test,
aes(x = GENRE, y = S.UNIT, colour = PATTERN, group=PATTERN)) +
geom_line() +
stat_summary(fun.y=mean, geom="point")
My problem is that can't get just the means plotted. Most likely I'm getting the stat_summary thing wrong but I haven't been able to come up with a solution. Any ideas?
A small reproducible example:
structure(list(S.UNIT = c(42L, 42L, 42L, 42L, 42L, 42L, 42L,
42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L,
42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L,
42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L), GENRE = structure(c(2L, 2L, 1L, 3L, 3L,
2L, 2L, 5L, 2L, 3L, 1L, 1L, 5L, 5L, 1L, 4L, 5L, 5L, 1L, 5L, 2L,
5L, 5L, 1L, 4L, 3L, 5L, 5L, 1L, 1L, 3L, 2L, 5L, 1L, 2L, 5L, 5L,
1L, 3L, 3L, 1L, 3L, 1L, 2L, 3L, 4L, 3L, 3L, 1L, 3L, 5L, 5L, 5L,
5L, 5L, 5L, 4L, 1L, 1L, 1L, 1L, 4L, 5L, 1L, 5L, 1L, 1L, 2L, 4L,
1L, 1L, 4L, 1L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 5L, 5L, 5L, 5L, 4L, 5L,
5L), .Label = c("Academic Prose", "Conversation", "News", "Novels",
"Popular Science"), class = "factor"), PATTERN = structure(c(6L,
6L, 6L, 2L, 4L, 4L, 4L, 5L, 6L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L,
5L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 5L, 5L, 1L, 1L, 1L, 3L, 5L, 5L,
1L, 5L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 5L, 1L, 3L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L), .Label = c("THAT_EXT", "THAT_EXT_NT", "THAT_POST",
"THAT_POST_NT", "TO_EXT", "TO_POST"), class = "factor")), .Names = c("S.UNIT",
"GENRE", "PATTERN"), class = "data.frame", row.names = c(NA,
-102L))
You can use stat_summary()
instead of geom_line(
) to connect points with lines. geom_line()
in this case connects all points not only mean values.
ggplot(data = length_test,
aes(x = GENRE, y = S.UNIT, colour = PATTERN, group=PATTERN)) +
stat_summary(fun.y=mean, geom="point")+
stat_summary(fun.y=mean, geom="line")