Search code examples
rggplot2tidyverseggnewscale

Plotting continuous distribution in horizontal bar plot


This was my earlier question where it was solved using multiple distribution.

I want to plot the continuous variable like age or tumor mutation burden as shown in first figure with a range like a window such 20-30 age group or some mutational burden range

The frequencies are calculated for all the variables of the metadata, but when plotting the age is not mapped to the final plot as show in the second plot.

Does the age need to be converted into other class before plotting?

distribution

library(dplyr)
library(ggplot2)
library(tidyverse)
library(ggnewscale)

plot_meta <- read.csv("figure1/TCGA_data/BEAT_AML/TCGA_AML/full_cleaned_meta.txt",sep = "\t")
head(plot_meta)


colnames(plot_meta)
x <- colnames(plot_meta)
b <- gsub(".", "-", x, fixed = TRUE)
colnames(plot_meta) <- b
colnames(plot_meta)
 

df1 <-
  plot_meta %>%
  mutate(across(everything(), as.character)) %>%
  pivot_longer(everything()) %>%
  count(name, value) %>%
  group_by(name) %>%
  mutate(perc = n / sum(n) * 100)
df1

df1 %>%
  ggplot(aes(name, perc)) +
  geom_col(data = ~ filter(.x, name == "FAB") %>% rename(FAB = value), mapping = aes(fill = FAB)) +
  new_scale_fill() +
  geom_col(data = ~ filter(.x, name == "Sex") %>% rename(Sex = value), mapping = aes(fill = Sex)) +
  new_scale_fill() +
  geom_col(data = ~ filter(.x, name == "Age") %>% rename(Age = value), mapping = aes(fill = Age)) +
  coord_flip()

The issue is I'm not able to plot the continuous distribution like the age as i have plotted only two other variables,what is going wrong here. Any help or suggestion would be really appreciated

My databsubset

a <- dput(head(plot_meta))
structure(list(patient = structure(c(36L, 33L, 122L, 95L, 66L, 
49L), .Label = c("TCGA-AB-2805", "TCGA-AB-2806", "TCGA-AB-2808", 
"TCGA-AB-2810", "TCGA-AB-2811", "TCGA-AB-2812", "TCGA-AB-2813", 
"TCGA-AB-2814", "TCGA-AB-2815", "TCGA-AB-2817", "TCGA-AB-2818", 
"TCGA-AB-2819", "TCGA-AB-2820", "TCGA-AB-2821", "TCGA-AB-2822", 
"TCGA-AB-2823", "TCGA-AB-2825", "TCGA-AB-2826", "TCGA-AB-2828", 
"TCGA-AB-2830", "TCGA-AB-2834", "TCGA-AB-2835", "TCGA-AB-2836", 
"TCGA-AB-2839", "TCGA-AB-2840", "TCGA-AB-2841", "TCGA-AB-2842", 
"TCGA-AB-2843", "TCGA-AB-2844", "TCGA-AB-2845", "TCGA-AB-2846", 
"TCGA-AB-2847", "TCGA-AB-2849", "TCGA-AB-2851", "TCGA-AB-2853", 
"TCGA-AB-2856", "TCGA-AB-2857", "TCGA-AB-2858", "TCGA-AB-2859", 
"TCGA-AB-2861", "TCGA-AB-2862", "TCGA-AB-2863", "TCGA-AB-2865", 
"TCGA-AB-2866", "TCGA-AB-2867", "TCGA-AB-2869", "TCGA-AB-2870", 
"TCGA-AB-2871", "TCGA-AB-2872", "TCGA-AB-2873", "TCGA-AB-2874", 
"TCGA-AB-2875", "TCGA-AB-2876", "TCGA-AB-2877", "TCGA-AB-2878", 
"TCGA-AB-2880", "TCGA-AB-2881", "TCGA-AB-2882", "TCGA-AB-2883", 
"TCGA-AB-2884", "TCGA-AB-2885", "TCGA-AB-2886", "TCGA-AB-2888", 
"TCGA-AB-2889", "TCGA-AB-2890", "TCGA-AB-2891", "TCGA-AB-2892", 
"TCGA-AB-2893", "TCGA-AB-2894", "TCGA-AB-2895", "TCGA-AB-2896", 
"TCGA-AB-2897", "TCGA-AB-2898", "TCGA-AB-2899", "TCGA-AB-2900", 
"TCGA-AB-2901", "TCGA-AB-2908", "TCGA-AB-2910", "TCGA-AB-2911", 
"TCGA-AB-2912", "TCGA-AB-2913", "TCGA-AB-2914", "TCGA-AB-2915", 
"TCGA-AB-2916", "TCGA-AB-2917", "TCGA-AB-2918", "TCGA-AB-2919", 
"TCGA-AB-2920", "TCGA-AB-2921", "TCGA-AB-2924", "TCGA-AB-2925", 
"TCGA-AB-2927", "TCGA-AB-2928", "TCGA-AB-2929", "TCGA-AB-2930", 
"TCGA-AB-2931", "TCGA-AB-2932", "TCGA-AB-2933", "TCGA-AB-2934", 
"TCGA-AB-2935", "TCGA-AB-2936", "TCGA-AB-2937", "TCGA-AB-2938", 
"TCGA-AB-2939", "TCGA-AB-2940", "TCGA-AB-2941", "TCGA-AB-2942", 
"TCGA-AB-2943", "TCGA-AB-2944", "TCGA-AB-2946", "TCGA-AB-2948", 
"TCGA-AB-2949", "TCGA-AB-2950", "TCGA-AB-2952", "TCGA-AB-2955", 
"TCGA-AB-2956", "TCGA-AB-2959", "TCGA-AB-2963", "TCGA-AB-2965", 
"TCGA-AB-2966", "TCGA-AB-2970", "TCGA-AB-2971", "TCGA-AB-2973", 
"TCGA-AB-2975", "TCGA-AB-2976", "TCGA-AB-2977", "TCGA-AB-2979", 
"TCGA-AB-2980", "TCGA-AB-2981", "TCGA-AB-2982", "TCGA-AB-2983", 
"TCGA-AB-2984", "TCGA-AB-2986", "TCGA-AB-2987", "TCGA-AB-2988", 
"TCGA-AB-2990", "TCGA-AB-2991", "TCGA-AB-2992", "TCGA-AB-2994", 
"TCGA-AB-2995", "TCGA-AB-2996", "TCGA-AB-2998", "TCGA-AB-2999", 
"TCGA-AB-3000", "TCGA-AB-3001", "TCGA-AB-3002", "TCGA-AB-3007", 
"TCGA-AB-3008", "TCGA-AB-3009", "TCGA-AB-3011", "TCGA-AB-3012"
), class = "factor"), Sex = structure(c(2L, 2L, 1L, 1L, 2L, 2L
), .Label = c("Female", "Male"), class = "factor"), FAB = structure(c(5L, 
1L, 5L, 3L, 2L, 4L), .Label = c("M0", "M1", "M2", "M3", "M4", 
"M5", "M6", "M7", "nc"), class = "factor"), `Diagnosis-Age` = c(63L, 
39L, 76L, 62L, 42L, 42L), `Bone-Marrow-Blast-Percentage` = c(82L, 
83L, 91L, 72L, 68L, 88L), Cytogenetics = structure(c(75L, 93L, 
51L, 27L, 21L, 57L), .Label = c("37~49,XY,+Y,der(1)add(1)(p13)del(1)(q21q25),-5,der(7)inv(7)(p15q11.2)?inv(7)(q22q32),+17,add(17)(p13),+21,+mar[cp20]", 
"39~47,XX,del(5)(q13q33),-7,der(8)t(8;?8;8)(p23;?p11.2p23;q11.2),der(14)t(1;14)(p12;p11.2)der(1)t(7;16)(p15;q22),+2mar[cp19]", 
"41~44,X,?i(X)(p10),-7,der(12)t(8;12)(q11.2;p11.2),-8 [cp11]/46,XX[8[", 
"42,XY,-5,-7,add(12)(p13),t(14;15)(q10;q10),der(17)t(5;17)(p13;p11.2),-18[6]/40,idem,-11,-add(12)(p13),der(12)t(?;12)(?;p13),-19[6]/41,idem,-der(17)[3]/41,idem,-der(17),+mar1,+mar[3]/41,idem,der(1)der(1)(p12)add(1)(p12),+der(1)(q21)add(1)(q21),-3,-8[2]", 
"43,XY-3,del(5)(q12q33),-7,der(10)t(10;11)(q26;q13),-12,-18,+2mar[20]", 
"44-45,X,-Y,-5,add(16)(q22),-17,-18,iso(21),+mars[cp5]/82-84,XX,-Y,-3,-4,-11,-12,-19,-21,+21[cp5}", 
"44~46,XX,del(11)(q23),der(19)?t(11;19)(q23;p13.1)[cp11]/44~45,XX,-19[cp4]/46,XX [5]", 
"44~47,XX,t(1;15)(q32;q26)[14],del(5)(q13q33)[19],-7[20],+8[7],del(12)(p11.2p11.2)[15],del(17)(q21)[8],der(22)t(1;22)(p13;p11.2)[20],+mar[13][cp20]", 
"44~47,XY,del(5)(q22q35)[20],-7[14],-8[6],der(12)t(10;12)(p11.2q21)[2],add(14)(p12)[11],-17[13],der(17)t(10;17)(q11.2;p13)[14],-18[7],add(18)(p11.2)[7],-21[10],i(21)(q10)[4],-22[4],+mar[10],+mar1x2[6][cp20]", 
"45,X,-X,t(8;21)(q22;q22)[20]", "45,X,-Y, t(8;7;21)(q22;p15;q22[22]/46,XY[3]", 
"45,X,-Y,t(8;21)(q22;q22)[13]/45,idem,del(9)(q22;q32)[7]", "45,X,-Y,t(8;21)(q22;q22)[19]/46,XY[1]", 
"45,X,-Y[3]/46,XY [17]", "45,XX-7[5]-only 5 metaphases", "45,XX,-7,t(9;11)(p22;q23)[19]/46,XX[1]", 
"45,XX,-7[12]/46,XX[8]", "45,XX,-7[20]", "45,XY,-7, t(9;22)(q34;q11.20) [19]/46,XY[1]", 
"45,XY,-7[20]", "45,XY,der(7)(t:7;12)(p11.1;p11.2),-12,-13,+mar[19]/46,XY[1]", 
"45~46,XY,add(X)(q22)[7],Y[4],der(5)t(5;17)(q13;21)[18],-7[18],+8[17],del(12)(q23)[16],-17[18],add(18)(p11.2)[14][cp18]", 
"46, XX[14]", "46, XX[15]", "46, XX[16]", "46, XX[19]", "46, XX[20]", 
"46, XY[15]", "46, XY[20]", "46,XX,1~50dmin[12]/46,idem,der(6)t(6;?)(q22;?)[2]/46,XX[6]", 
"46,XX,9qh+[20]", "46,XX,del(3)(q23q26.2),der(7)t(1:7)(q32;q32),del(10)(q22q25),t(13;16)(q34;p11.2)dup(21)(q22)[cp20]", 
"46,XX,del(5)(q11.2q33)[1]/48~52,idem,+1,+?del(5)(q15q33),+11,+11,?t(12;22)(p13;q12),-13,-17,+i(22)(q10),+i(22)(q10),+mar[cp19]", 
"46,XX,del(5)(q22q33)[4]/46,XX[16]", "46,XX,i(17)(q10)[1]/45,sl-7[2]/48,sl,+13,+19[3]/46,XX[15]", 
"46,XX,inv(16)(p13q22)[15]/46,XX[2]", "46,XX,inv(16)(p13q22)[19]/46,XX[1]", 
"46,XX,inv(16)(p13q22)[20]", "46,XX,inv(16)(p13q22)[5]/46,idem,t(3;3)(p13;q?28)[5]/46,XX[6]", 
"46,XX,t(15;17)(q22;q21.1)[19]/47,idem,+8 [1]", "46,XX,t(15;17)(q22;q21),t(16;19)(p13.3;p13.1)[17]/46,XX[3]", 
"46,XX,t(15;17)(q22;q21)[11]/46,XX[9]", "46,XX,t(15;17)(q22;q21)[12]/46,XX[8]", 
"46,XX,t(15;17)(q22;q21)[20]", "46,XX,t(8;21)(q22;q22)[17]/46,XX[3]", 
"46,XX,t(8;21)(q22;q22)[20]", "46,XX,t(8;21)[15]/46,idem,del(9)(q12q22)[5]", 
"46,XX[15]", "46,XX[18]", "46,XX[19]/46,XX,add(7)(p?22)[1]", 
"46,XX[20]", "46,XX+13,21[cp17]/46,XX[3]", "46,XY,9qh+[19]", 
"46,XY,del(11)(p12)[2]/46,XY[18]", "46,XY,del(20)(q11.2)[23]/92,XXYY,del(20)(q11.2)x2[2]/46,XY[3]", 
"46,XY,del(7)(q21q36)[18]/46,XY[2]", "46,XY,del(9)(q13:q22),t(11:21)(p13;q22),t(15;17)(q22;q210[20]", 
"46,XY,i(17)(q10)[15]/47,XY,idem+13[3]/46,XY[2]", "46,XY,inv(16)(p13;q22)[20]", 
"46,XY,inv(16)(p13q22)[17]/46,XY[3]", "46,XY,inv(16)(p13q22)[9]/46,XY[10]", 
"46,XY,t(11;19)(q23;p13)[17]/46,XY,t(11;19)(q23;p13),inv(12)(p12p13)[3]", 
"46,XY,t(11;19)(q23;p13)[20]", "46,XY,t(15;17)(q22;q21)[19]/46,XY[1]", 
"46,XY,t(15;17)(q22;q21)[20]", "46,XY,t(15;17)(q22:q21)[11]/46,XY[9]", 
"46,XY,t(2;4)(q34;q21)inv(16)(p13q22) [20]", "46,XY,t(6;11)(q27;q23)[15]", 
"46,XY,t(9;11)(p22;q23)[7]/47,XY,t(9;11)(p22;q23)[7]/46,XY[4]", 
"46,XY,t(9;22)(q34;q11.2)[13]/34~37,idem,-3,del(4),-4,-5,-7,-9,-10,t?(11;12),-12,-14,-14,-16,-17,-22[cp6]/46,XY[1]", 
"46,XY,t(9;22)(q34;q11.2[4]/50,idem,+8,+10,+21,+der(22)(t(9;22)(q34;q11.2)[16]", 
"46,XY[13]", "46,XY[15]", "46,XY[19]", "46,XY[20]", "46,XY[30]", 
"46~49,XY,del(3)(p14),del(5)(p11.2q33),del(17)(q21q21),add(21)(p11.2),+22,mar[cp20]", 
"47,XX,+der(5)t(2;5)(p11.2;q11.2)?,t(8;16)(p11.2;p13.3)[19]", 
"47,XX,i(11)(q10)[18]/46,XX [2]", "47,XX,t(15;17)(q22:q21)+mar[20]", 
"47,XX+11 [20]", "47,XX+8 [20]", "47,XXY [17]", "47,XY,+13[5]/46,XY[15]", 
"47,XY,+21 [6]/46,XY[13]", "47,XY,+21[11]/48,XY,+3,+21[8]", "47,XY,+22[10]/47,XY,+8[7]/45,XY,del(3)(p21),del(4)(p12p15),-7,?dup(7)(q11.2q36)[3]", 
"47,XY,+8 [10]/46,XY [10]", "47,XY,+8 [19]", "47,XY,+8 [20]", 
"47,XY,+8[15]/46,+8,-17[3]", "47,XY,+9[10]/46,XY[10]", "47,XY,del(5)(q22q33),t(10;11)(p13~p15;q22~23),i(17)(q10)[3]/46,XY[17]", 
"47,XY,del(7)(q22),+8,t(15;17)(q22;q21)[18]/46,XY,del(7)(q22),t(15;17)(q22;q21)[2]", 
"47,XY+8 [15]/48,XY+8+8[4]/46,XY[1]", "48,XY,+8,+8[16]/46,XY[4]", 
"52~54,XY,+2,+4,+6,+8,del(11)(q23),+19,+19,+21[17]/46,XY[3]", 
"53~56,XY,+1,del(2)(q33q34),+8,+10,+11x2,+13x1-2,+14,del(17)(p11.2),+19,add(21)(q22),+22[cp20]", 
"incomplete-46,XY,del(12)(p11.20[2]/46,XY[3]", "N.D.", "ND", 
"Outside hospital with inv(16)"), class = "factor"), `Cytogenetic-Code--Other-` = structure(c(8L, 
3L, 8L, 8L, 3L, 9L), .Label = c("BCR-ABL1", "CBFB-MYH11", "Complex Cytogenetics", 
"Intermediate Risk Cytogenetic Abnormality", "MLL translocation, poor risk", 
"MLL translocation, t(9;11)", "N.D.", "Normal Karyotype", "PML-RARA", 
"Poor Risk Cytogenetic Abnormality", "RUNX1-RUNX1T1"), class = "factor"), 
    Induction = structure(c(11L, 4L, 1L, 8L, 4L, 9L), .Label = c("7+3", 
    "7+3, dauna", "7+3, IT", "7+3+3", "7+3+3, gleevec", "7+3+3, then 5+2+2", 
    "7+3+3+PSC", "7+3+AMD", "7+3+ATRA", "7+3+dauno", "7+3+Genasense", 
    "7+3+study drug", "7+4+ATRA", "Azacitidine", "CLAM", "Cytarabine only", 
    "Decitabine", "Decitabine then 7+3", "Hydrea & Idarubicin", 
    "Hydrea, ATRA started", "hydrea, didn't get add'l chemo", 
    "LBH/Decitabine", "low dose Ara C", "no treatment", "Revlimid", 
    "Revlmd then Decitbne,7+3,5+2"), class = "factor")), row.names = c(NA, 
6L), class = "data.frame")

my figure


Solution

  • Rename Diagnosis-Age and use cut to convert to a factor. Add labels as required for appearance of age groups in legend.

    Note I have swapped name and perc in the call to aes to avoid the call to coord_flip.

    library(dplyr)
    library(ggplot2)
    library(tidyr)
    library(ggnewscale)
    
    plot_meta <- a
    df1 <-
      plot_meta %>%
      rename(Age = `Diagnosis-Age`) %>% 
      mutate(Age = cut(Age, breaks = seq(30, 80, by = 10))) %>% 
      mutate(across(everything(), as.character)) %>%
      pivot_longer(everything()) %>%
      count(name, value) %>%
      group_by(name) %>%
      mutate(perc = n / sum(n) * 100)
    
    df1 %>%
      ggplot(aes(perc, name)) +
      geom_col(data = ~ filter(.x, name == "FAB") %>% rename(FAB = value), mapping = aes(fill = FAB)) +
      new_scale_fill() +
      geom_col(data = ~ filter(.x, name == "Sex") %>% rename(Sex = value), mapping = aes(fill = Sex)) +
      new_scale_fill() +
      geom_col(data = ~ filter(.x, name == "Age") %>% rename(Age = value), mapping = aes(fill = Age))
    

    Created on 2022-03-14 by the reprex package (v2.0.1)