Search code examples
rggplot2

Can I use geom_text or similar to add group by sum rather than amending original data frame?


Here's a bar plot:

ggplot(filtered_funnel, aes(x = reorder(Funnel, -Sessions), y = Sessions)) +
      geom_bar(stat = "identity", fill = "#008080", alpha = 0.6) +
      xlab("Step") +
      ylab("Events") +
      scale_y_continuous(labels = function(l) {l = l / 1000; paste0(l, "K")}) +
      geom_text(aes(label = Sessions, group = Channel), color = "white")

It looks like this (note the text labels in white): enter image description here

This is because the data df filtered_funnel is actually split out by a field "Channel". I need the bar to be based on the grouped sum of e.g. sessions rather than each individual channel sessions.

Here's a glimpse of the source data:

> glimpse(filtered_funnel)
Observations: 108
Variables: 4
$ Channel  <chr> "Direct", "Direct", "Direct", "Direct", "Direct", "Direct", "Direct", "Direct", "Direct", "Direct", "Direct", "Direct", ...
$ Promo    <chr> "apples", "apples", "apples", "banannas", "banannas", "banannas", "carrots", "carrots", "carrots", "none", "none", "none...
$ Funnel   <chr> "Checkout", "ShippingDetails", "Transactions", "Checkout", "ShippingDetails", "Transactions", "Checkout", "ShippingDetai...
$ Sessions <dbl> 3993, 6332, 2224, 1237, 1962, 689, 2234, 3543, 1244, 42378, 4672, 28120, 87187, 7408, 2602, 611, 969, 340, 4462, 7280, 2...


filtered_funnel
Source: local data frame [108 x 4]
Groups: Channel, Promo [?]

   Channel    Promo          Funnel Sessions
     <chr>    <chr>           <chr>    <dbl>
1   Direct   apples        Checkout     3993
2   Direct   apples ShippingDetails     6332
3   Direct   apples    Transactions     2224
4   Direct banannas        Checkout     1237
5   Direct banannas ShippingDetails     1962
6   Direct banannas    Transactions      689
7   Direct  carrots        Checkout     2234
8   Direct  carrots ShippingDetails     3543
9   Direct  carrots    Transactions     1244
10  Direct     none       AddToCart    42378
# ... with 98 more rows

It looks like ggplot is adding the individual values of each component rather than the sum e.g. just for the first bar in the image (Sessions step)

> filtered_funnel %>% filter(Funnel == "Sessions")
Source: local data frame [6 x 4]
Groups: Channel, Promo [6]

   Channel Promo   Funnel Sessions
     <chr> <chr>    <chr>    <dbl>
1   Direct  none Sessions    87187
2    Email  none Sessions   110035
3 Facebook  none Sessions    79734
4  Organic  none Sessions    80768
5      SEM  none Sessions    94610
6  Youtube  none Sessions    66681

I can see the value 110035 on the image and in the table. What I actually want ggplot to do is add the sum total for sessions.

Since this is being done within a Shiny app I'm trying to avoid medling with the source data since I'm using filter box inputs to filter the data frame. Also, I saw other SO answers that seemed to suggest what I wnat is possible, I just couldn't get it to work e.g.

How can I get ggplot to add the sum total for each bar? Can I do this using ggplot grouping and summing rather than altering the source data I feed into ggplot in the aes?

---- dput of data--- Following a comment, here's the data (which has been randomly generated anyway so no NDA issues)

> dput(filtered_funnel)
structure(list(Channel = c("Direct", "Direct", "Direct", "Direct", 
"Direct", "Direct", "Direct", "Direct", "Direct", "Direct", "Direct", 
"Direct", "Direct", "Direct", "Direct", "Direct", "Direct", "Direct", 
"Email", "Email", "Email", "Email", "Email", "Email", "Email", 
"Email", "Email", "Email", "Email", "Email", "Email", "Email", 
"Email", "Email", "Email", "Email", "Facebook", "Facebook", "Facebook", 
"Facebook", "Facebook", "Facebook", "Facebook", "Facebook", "Facebook", 
"Facebook", "Facebook", "Facebook", "Facebook", "Facebook", "Facebook", 
"Facebook", "Facebook", "Facebook", "Organic", "Organic", "Organic", 
"Organic", "Organic", "Organic", "Organic", "Organic", "Organic", 
"Organic", "Organic", "Organic", "Organic", "Organic", "Organic", 
"Organic", "Organic", "Organic", "SEM", "SEM", "SEM", "SEM", 
"SEM", "SEM", "SEM", "SEM", "SEM", "SEM", "SEM", "SEM", "SEM", 
"SEM", "SEM", "SEM", "SEM", "SEM", "Youtube", "Youtube", "Youtube", 
"Youtube", "Youtube", "Youtube", "Youtube", "Youtube", "Youtube", 
"Youtube", "Youtube", "Youtube", "Youtube", "Youtube", "Youtube", 
"Youtube", "Youtube", "Youtube"), Promo = c("apples", "apples", 
"apples", "banannas", "banannas", "banannas", "carrots", "carrots", 
"carrots", "none", "none", "none", "none", "none", "none", "pears", 
"pears", "pears", "apples", "apples", "apples", "banannas", "banannas", 
"banannas", "carrots", "carrots", "carrots", "none", "none", 
"none", "none", "none", "none", "pears", "pears", "pears", "apples", 
"apples", "apples", "banannas", "banannas", "banannas", "carrots", 
"carrots", "carrots", "none", "none", "none", "none", "none", 
"none", "pears", "pears", "pears", "apples", "apples", "apples", 
"banannas", "banannas", "banannas", "carrots", "carrots", "carrots", 
"none", "none", "none", "none", "none", "none", "pears", "pears", 
"pears", "apples", "apples", "apples", "banannas", "banannas", 
"banannas", "carrots", "carrots", "carrots", "none", "none", 
"none", "none", "none", "none", "pears", "pears", "pears", "apples", 
"apples", "apples", "banannas", "banannas", "banannas", "carrots", 
"carrots", "carrots", "none", "none", "none", "none", "none", 
"none", "pears", "pears", "pears"), Funnel = c("Checkout", "ShippingDetails", 
"Transactions", "Checkout", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "AddToCart", "Checkout", 
"Registrations", "Sessions", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "Checkout", "ShippingDetails", 
"Transactions", "Checkout", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "AddToCart", "Checkout", 
"Registrations", "Sessions", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "Checkout", "ShippingDetails", 
"Transactions", "Checkout", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "AddToCart", "Checkout", 
"Registrations", "Sessions", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "Checkout", "ShippingDetails", 
"Transactions", "Checkout", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "AddToCart", "Checkout", 
"Registrations", "Sessions", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "Checkout", "ShippingDetails", 
"Transactions", "Checkout", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "AddToCart", "Checkout", 
"Registrations", "Sessions", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "Checkout", "ShippingDetails", 
"Transactions", "Checkout", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions", "AddToCart", "Checkout", 
"Registrations", "Sessions", "ShippingDetails", "Transactions", 
"Checkout", "ShippingDetails", "Transactions"), Sessions = c(3993, 
6332, 2224, 1237, 1962, 689, 2234, 3543, 1244, 42378, 4672, 28120, 
87187, 7408, 2602, 611, 969, 340, 4462, 7280, 2304, 549, 896, 
283, 2094, 3417, 1081, 42251, 5666, 29094, 110035, 9244, 2926, 
256, 418, 132, 129, 191, 85, 3078, 4557, 2039, 120, 178, 79, 
13977, 90, 9727, 79734, 134, 59, 1142, 1691, 756, 3125, 4655, 
1985, 1724, 2568, 1095, 3109, 4631, 1975, 34756, 2864, 23453, 
80768, 4266, 1819, 249, 371, 158, 1839, 2661, 1223, 1543, 2232, 
1026, 2007, 2904, 1335, 24090, 1792, 15272, 94610, 2593, 1192, 
479, 693, 318, 800, 1245, 522, 1734, 2698, 1132, 930, 1447, 607, 
22349, 1436, 14478, 66681, 2235, 937, 1579, 2457, 1031)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -108L), drop = TRUE, .Names = c("Channel", "Promo", 
"Funnel", "Sessions"), indices = list(0:2, 3:5, 6:8, 9:14, 15:17, 
    18:20, 21:23, 24:26, 27:32, 33:35, 36:38, 39:41, 42:44, 45:50, 
    51:53, 54:56, 57:59, 60:62, 63:68, 69:71, 72:74, 75:77, 78:80, 
    81:86, 87:89, 90:92, 93:95, 96:98, 99:104, 105:107), group_sizes = c(3L, 
3L, 3L, 6L, 3L, 3L, 3L, 3L, 6L, 3L, 3L, 3L, 3L, 6L, 3L, 3L, 3L, 
3L, 6L, 3L, 3L, 3L, 3L, 6L, 3L, 3L, 3L, 3L, 6L, 3L), biggest_group_size = 6L, labels = structure(list(
    Channel = c("Direct", "Direct", "Direct", "Direct", "Direct", 
    "Email", "Email", "Email", "Email", "Email", "Facebook", 
    "Facebook", "Facebook", "Facebook", "Facebook", "Organic", 
    "Organic", "Organic", "Organic", "Organic", "SEM", "SEM", 
    "SEM", "SEM", "SEM", "Youtube", "Youtube", "Youtube", "Youtube", 
    "Youtube"), Promo = c("apples", "banannas", "carrots", "none", 
    "pears", "apples", "banannas", "carrots", "none", "pears", 
    "apples", "banannas", "carrots", "none", "pears", "apples", 
    "banannas", "carrots", "none", "pears", "apples", "banannas", 
    "carrots", "none", "pears", "apples", "banannas", "carrots", 
    "none", "pears")), class = "data.frame", row.names = c(NA, 
-30L), drop = TRUE, .Names = c("Channel", 
"Promo")))

Solution

  • You can use stat_summary to calculate the sums, and use that for both the y position and the label:

    ggplot(filtered_funnel, aes(x = reorder(Funnel, -Sessions), y = Sessions)) +
      geom_bar(stat = "identity", fill = "#008080", alpha = 0.6) +
      stat_summary(
        aes(label = after_stat(y)), fun = 'sum', geom = 'text', col = 'white', vjust = 1.5
      ) +
      scale_y_continuous(labels = function(l) {l = l / 1000; paste0(l, "K")})  +
      xlab("Step") + ylab("Events")
    

    enter image description here