I'm trying to produce a bar graph with frequencies of multiple groups. I tried using geom_bar() but I keep running into "Error: stat_count() must not be used with a y aesthetic." I have one line for each participant, with age (2 categories), condition (2 categories), and their performance (0 or 1). From what I read on the manual and in pretty much everywhere online, if I use
bar<-ggplot(data, aes(age, performance, fill = condition)) + geom_bar(position = "dodge")
I should get what I want (which is this), but instead I get the error and I can't figure out what I'm missing. Isn't the geom_bar() supposed to give count by default? When I use stat="identity" I get full bars like so: how it actually looks. Please help! Any advice will be greatly appreciated.
EDITED: Here's my actual data:
structure(list(ageyears = c(4L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 5L, 4L,
5L, 5L, 4L, 4L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 4L, 5L, 5L, 4L,
4L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, 5L, 5L, 4L, 4L,
5L, 5L, 5L, 4L, 5L, 5L, 4L, 5L, 5L, 4L, 4L, 5L, 4L, 5L, 5L, 4L,
5L, 4L, 4L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L,
5L, 5L, 5L, 4L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 5L,
4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 5L, 5L,
5L, 5L, 4L, 4L, 4L, 5L, 4L), MatrixLabels = structure(c(2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), Mat_sort_pass_fail = c(0L, 0L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L)), .Names = c("ageyears",
"MatrixLabels", "Mat_sort_pass_fail"), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 11L, 12L, 13L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 48L,
49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 74L, 75L, 76L,
77L, 78L, 79L, 80L, 82L, 83L, 85L, 86L, 87L, 88L, 89L, 90L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L,
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L,
115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L,
126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L,
137L, 138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L,
148L, 149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L,
159L, 160L, 197L, 198L, 200L, 201L, 202L, 203L, 204L, 205L, 206L,
207L), class = "data.frame")
From the documentation of geom_bar :
By default, geom_bar uses stat="count" which makes the height of the bar proportion to the number of cases in each group (or if the weight aethetic is supplied, the sum of the weights). If you want the heights of the bars to represent values in the data, use stat="identity" and map a variable to the y aesthetic.
In your case you should use the height as your sum of your performance, since you have a summarized data , so the ggplot should use stat = identity
EDIT After OP pasted the dput:
You need to first summarize your data, I am assuming the df
is your dataframe, you can use anything to do the summarization, I am using data.table
and baseR aggregate, you can pick either of them to do it as below:
###1. base R aggregate
df <- aggregate(Mat_sort_pass_fail ~ ageyears + MatrixLabels, data=df1 ,sum)
df$perc <- df$Mat_sort_pass_fail/sum(df$Mat_sort_pass_fail)
names(df) <- c("age","condition","performance","percentage")
###2. sumarization using data.table
library(data.table)
dt <- setDT(df)
dt1 <- dt[,list(Performance = sum(Mat_sort_pass_fail)),by=c("ageyears","MatrixLabels")]
dt1[,perc:=Performance/sum(Performance)] ##percentage within column
df <- data.frame(dt1)
names(df) <- c("age","condition","performance","percentage")
library(ggplot2)
library(RColorBrewer)
ggplot(df, aes(x = condition ,y=performance)) +
geom_bar(aes(fill = factor(age)),stat="identity",position = "dodge") +
ggtitle("Matrix Sort Performance") +
scale_fill_brewer(palette = "Dark2")
###In case you need the percentage run the below code:
ggplot(df, aes(x = condition ,y=percentage)) +
geom_bar(aes(fill = factor(age)),stat="identity",position = "dodge") +
ggtitle("Matrix Sort Performance") +
scale_fill_brewer(palette = "Dark2")