Search code examples
rggplot2density-plot

density shadow around the data with ggplot2 (R)


I am trying to have 2 "shadows" on the background of the below plot. These shadows should represent the density of the orange and blue points separately. Does it make sense?

Here is the ggplot to improve: enter image description here

Here is the code and the data (matrix df) I used to create this plot:

                         PC1           PC2 aa
A_akallopisos    0.043272525  0.0151023307  2
A_akindynos     -0.020707141 -0.0158198405  1
A_allardi       -0.020277664 -0.0221016281  2
A_barberi       -0.023165596  0.0389906701  2
A_bicinctus     -0.025354572 -0.0059122384  2
A_chrysogaster   0.012608835 -0.0339330213  2
A_chrysopterus  -0.022402365 -0.0092476009  1
A_clarkii       -0.014474658 -0.0127024469  1
A_ephippium     -0.016859412  0.0320034231  2
A_frenatus      -0.024190876  0.0238499714  2
A_latezonatus   -0.010718845 -0.0289904165  1
A_latifasciatus -0.005645811 -0.0183202248  2
A_mccullochi    -0.031664307 -0.0096059126  2
A_melanopus     -0.026915545  0.0308399009  2
A_nigripes       0.023420045  0.0293801537  2
A_ocellaris      0.052042539  0.0126144250  2
A_omanensis     -0.020387101  0.0010944998  2
A_pacificus      0.042406273 -0.0260308092  2
A_percula        0.034591721  0.0071153133  2
A_perideraion    0.052830132  0.0064495142  2
A_polymnus       0.030902254 -0.0005091421  2
A_rubrocinctus  -0.033318659  0.0474995722  2
A_sandaracinos   0.055839755  0.0093724082  2
A_sebae          0.021767793 -0.0218640814  2
A_tricinctus    -0.016230301 -0.0018526482  1
P_biaculeatus   -0.014466403  0.0024864574  2



 ggplot(data=df,aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) + ggtitle(paste('Site n° ',Sites_names[j],sep='')) +geom_smooth(se=F, method='lm')+ geom_point() + scale_color_manual(name='mutation', values = c("darkorange2","cornflowerblue"), labels = c("A","S")) + geom_text(hjust=0.5, vjust=-1 ,size=3) + xlim(-0.05,0.07)

Solution

  • Here are some possible approaches using stat_density2d() with geom="polygon" and mapping or setting alpha transparency for the density fill regions. If you are willing to experiment with some the parameters, I think you can get some very useful plots. Specifically, you may want to adjust the following:

    1. n controls the smoothness of the density polygon.
    2. h is the bandwidth of the density estimation.
    3. bins controls the number of density levels.

    enter image description here

    df = read.table(header=TRUE, text=
    "                         PC1           PC2 aa
    A_akallopisos    0.043272525  0.0151023307  2
    A_akindynos     -0.020707141 -0.0158198405  1
    A_allardi       -0.020277664 -0.0221016281  2
    A_barberi       -0.023165596  0.0389906701  2
    A_bicinctus     -0.025354572 -0.0059122384  2
    A_chrysogaster   0.012608835 -0.0339330213  2
    A_chrysopterus  -0.022402365 -0.0092476009  1
    A_clarkii       -0.014474658 -0.0127024469  1
    A_ephippium     -0.016859412  0.0320034231  2
    A_frenatus      -0.024190876  0.0238499714  2
    A_latezonatus   -0.010718845 -0.0289904165  1
    A_latifasciatus -0.005645811 -0.0183202248  2
    A_mccullochi    -0.031664307 -0.0096059126  2
    A_melanopus     -0.026915545  0.0308399009  2
    A_nigripes       0.023420045  0.0293801537  2
    A_ocellaris      0.052042539  0.0126144250  2
    A_omanensis     -0.020387101  0.0010944998  2
    A_pacificus      0.042406273 -0.0260308092  2
    A_percula        0.034591721  0.0071153133  2
    A_perideraion    0.052830132  0.0064495142  2
    A_polymnus       0.030902254 -0.0005091421  2
    A_rubrocinctus  -0.033318659  0.0474995722  2
    A_sandaracinos   0.055839755  0.0093724082  2
    A_sebae          0.021767793 -0.0218640814  2
    A_tricinctus    -0.016230301 -0.0018526482  1
    P_biaculeatus   -0.014466403  0.0024864574  2")
    
    
    library(ggplot2)
    
    p1 = ggplot(data=df, aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) + 
         ggtitle(paste('Site n° ',sep='')) +
         stat_density2d(aes(fill=factor(aa), alpha = ..level..), 
                        geom="polygon", color=NA, n=200, h=0.03, bins=4) + 
         geom_smooth(se=F, method='lm') + 
         geom_point() + 
         scale_color_manual(name='mutation', 
                            values = c("darkorange2","cornflowerblue"), 
                            labels = c("A","S")) + 
         scale_fill_manual( name='mutation', 
                            values = c("darkorange2","cornflowerblue"), 
                            labels = c("A","S")) + 
         geom_text(hjust=0.5, vjust=-1 ,size=3, color="black") + 
         scale_x_continuous(expand=c(0.3, 0)) + # Zooms out so that density polygons
         scale_y_continuous(expand=c(0.3, 0)) + # don't reach edges of plot.
         coord_cartesian(xlim=c(-0.05, 0.07),
                         ylim=c(-0.04, 0.05)) # Zooms back in for the final plot.
    
    
    p2 = ggplot(data=df, aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) + 
         ggtitle(paste('Site n° ',sep='')) +
         stat_density2d(aes(fill=factor(aa)), alpha=0.2,
                        geom="polygon", color=NA, n=200, h=0.045, bins=2) + 
         geom_smooth(se=F, method='lm', size=1) + 
         geom_point(size=2) + 
         scale_color_manual(name='mutation', 
                            values = c("darkorange2","cornflowerblue"), 
                            labels = c("A","S")) + 
         scale_fill_manual( name='mutation', 
                            values = c("darkorange2","cornflowerblue"), 
                            labels = c("A","S")) + 
         geom_text(hjust=0.5, vjust=-1 ,size=3) + 
         scale_x_continuous(expand=c(0.3, 0)) + # Zooms out so that density polygons
         scale_y_continuous(expand=c(0.3, 0)) + # don't reach edges of plot.
         coord_cartesian(xlim=c(-0.05, 0.07),
                         ylim=c(-0.04, 0.05)) # Zooms back in for the final plot.
    
    library(gridExtra)
    ggsave("plots.png", plot=arrangeGrob(p1, p2, ncol=1), width=8, height=11, dpi=120)