Search code examples
rggplot2manhattan

Change shape of the data point based on positive or negative value in ggplot2


I have the following dataset:

beta pval Category_name
0.5 0.005 One
-0.3 0.6 Two
0.2 0.03 Three
-0.1 0.7 Four

I am trying to make a figure using pval (y axis) and Category_name (x axis). I would like to use shape 24 when beta is positive, and shape 25 when beta is negative, but still plotting pval.

My script looks like this:

library(ggplot2)
library(RColorBrewer)

colourCount <- length(unique(dup_categories_merged_allSNP$Category_name))
getPalette <- colorRampPalette(brewer.pal(9, "Set1"))

dup_categories_merged_allSNP$shape <- ifelse(dup_categories_merged_allSNP$beta > 0, 24, 25)

PheWAS_all <- ggplot(dup_categories_merged_allSNP, aes(x = Category_name, y = -log10(pval), shape = shape, colour = Category_name)) + 
geom_point(size = 2) +
  geom_jitter() +
  theme_classic() + 
  scale_colour_manual(values = getPalette(colourCount)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0, size = 10),
        axis.title.x = element_blank(),
        legend.position = "none") + 
  labs(color = "Category_name", x = "", y = "-log10(pval)") +
  geom_hline(yintercept = -log10(3.65E-09), color = "darkmagenta", linetype = "dashed", size = 1, alpha = 0.5) +
  geom_hline(yintercept = -log10(0.000740457), color = "gray32", linetype = "dashed", size = 1, alpha = 0.5)

I get the following error message: Error in scale_f():! A continuous variable can not be mapped to shape.

Edit: Original figure:

enter image description here

Figure with the changes based on answer(linear on the y axis):

enter image description here

Subset of the data:

enter image description here


Solution

  • One way is to create a column using mutate() and ifelse() condition using tidyverse. To show the plot I generated example dataset to increase size of data. For the shapes, you can add scale_shape_manual() in your ggplot function and your favorite shape values inside the function. I added also legend to indicate the shapes but you could remove it from the code:

    some example data

    library(tidyverse)
    library(ggplot2)
    library(RColorBrewer)
    library(ggh4x)
    
    set.seed(1)
    phenotype <- sample(1:3, 80, replace=TRUE)
    beta <- runif(80, min=-1, max=1)
    pval <- runif(80, min=0.005, max=1)
    Category_name <- rep(c("One", "Two", "Three","Four"), each=20)
    dup_categories_merged_allSNP <- data.frame(phenotype,beta,pval,Category_name)
    

    add shape category based on beta values and provide data for plot

    dup_categories_merged_allSNP <- mutate(dup_categories_merged_allSNP,shape = ifelse((beta < 0), "negative", "positive"))
    
    dup_categories_merged_allSNP$Category_name <- factor(dup_categories_merged_allSNP$Category_name
                                                         ,levels = c("One","Two","Three","Four"))
    
    colourCount <- length(unique(dup_categories_merged_allSNP$Category_name))
    getPalette <- colorRampPalette(brewer.pal(9, "Set1"))
    

    Create plot with shapes based on positive/negative values. The phenotype values must be added to avoid overlapping pvalues within each category so:

    PheWAS_all <- ggplot(dup_categories_merged_allSNP, aes(x = factor(phenotype), y = -log10(pval))) + 
      
      geom_point(aes(x=factor(phenotype),shape=shape, color=Category_name, fill=Category_name),size = 2.5)+
      theme_classic() + 
      scale_color_manual(values = getPalette(colourCount)) +
      scale_fill_manual(values = getPalette(colourCount)) +
      scale_shape_manual(values = c(25, 24)) +
      facet_grid2(. ~ Category_name, scale = "free_y",switch = "x", strip = strip_themed(background_x = elem_list_rect(fill=getPalette(colourCount))))+
      scale_y_continuous(expand = c(.01, 0) ) + 
      theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0, size = 10),
            axis.title.x = element_blank(),
            legend.position = "none")+
      theme(legend.position = "right")+
      theme(axis.text.x = element_blank()) +
      theme(axis.ticks = element_blank()) +
      theme(panel.spacing = unit(0, "lines")) +
      labs(x=NULL) +
      labs(color = "Category_name", x = "", y = "-log10(pval)") +
      geom_hline(yintercept = -log10(3.65E-09), color = "darkmagenta", linetype = "dashed", size = 1, alpha = 0.5) +
      geom_hline(yintercept = -log10(0.000740457), color = "gray32", linetype = "dashed", size = 1, alpha = 0.5)+
    theme(strip.text = element_text(vjust =.8,face = "bold",
                                    size = 9,margin = margin(b = 3)))
    PheWAS_all
    

    enter image description here