Search code examples
rggplot2line-plot

Plotting means similar to interaction.plot with ggplot2


I have a data frame that looks like this:

        head(length_test)
      S.UNIT          GENRE  PATTERN
    1    105   Conversation THAT_EXT
    2     61   Conversation THAT_EXT
    3    210   Conversation   TO_EXT
    4    196   Conversation   TO_EXT
    5    166 Academic Prose   TO_EXT
    6    152 Academic Prose   TO_EXT
> str(length_test)
'data.frame':   7329 obs. of  3 variables:
 $ S.UNIT : int  105 61 210 196 166 152 152 152 152 150 ...
 $ GENRE  : Factor w/ 5 levels "Academic Prose",..: 2 2 2 2 1 1 1 1 2 2 ...
 $ PATTERN: Factor w/ 6 levels "THAT_EXT","THAT_EXT_NT",..: 1 1 5 5 5 5 5 5 5 5 ...

What I want to do is produce a plot like this, but using ggplot2:

interaction.plot(GENRE, PATTERN, S.UNIT)

enter image description here

My problem is that I can't plot just the means, instead I get something like this:

ggplot(data = length_test,
       aes(x = GENRE, y = S.UNIT, colour = PATTERN, group=PATTERN)) +
  geom_line() +
  stat_summary(fun.y=mean, geom="point")

enter image description here

My problem is that can't get just the means plotted. Most likely I'm getting the stat_summary thing wrong but I haven't been able to come up with a solution. Any ideas?

A small reproducible example:

structure(list(S.UNIT = c(42L, 42L, 42L, 42L, 42L, 42L, 42L, 
42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 
42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 
42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 41L, 41L, 41L), GENRE = structure(c(2L, 2L, 1L, 3L, 3L, 
2L, 2L, 5L, 2L, 3L, 1L, 1L, 5L, 5L, 1L, 4L, 5L, 5L, 1L, 5L, 2L, 
5L, 5L, 1L, 4L, 3L, 5L, 5L, 1L, 1L, 3L, 2L, 5L, 1L, 2L, 5L, 5L, 
1L, 3L, 3L, 1L, 3L, 1L, 2L, 3L, 4L, 3L, 3L, 1L, 3L, 5L, 5L, 5L, 
5L, 5L, 5L, 4L, 1L, 1L, 1L, 1L, 4L, 5L, 1L, 5L, 1L, 1L, 2L, 4L, 
1L, 1L, 4L, 1L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 
5L), .Label = c("Academic Prose", "Conversation", "News", "Novels", 
"Popular Science"), class = "factor"), PATTERN = structure(c(6L, 
6L, 6L, 2L, 4L, 4L, 4L, 5L, 6L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 
5L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 5L, 5L, 1L, 1L, 1L, 3L, 5L, 5L, 
1L, 5L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 5L, 1L, 3L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L), .Label = c("THAT_EXT", "THAT_EXT_NT", "THAT_POST", 
"THAT_POST_NT", "TO_EXT", "TO_POST"), class = "factor")), .Names = c("S.UNIT", 
"GENRE", "PATTERN"), class = "data.frame", row.names = c(NA, 
-102L))

Solution

  • You can use stat_summary() instead of geom_line() to connect points with lines. geom_line() in this case connects all points not only mean values.

    ggplot(data = length_test,
           aes(x = GENRE, y = S.UNIT, colour = PATTERN, group=PATTERN)) +
      stat_summary(fun.y=mean, geom="point")+
      stat_summary(fun.y=mean, geom="line")
    

    enter image description here