Search code examples
rggplot2facet-wrappolar-plot

Geom_bar has missing values in polar plot despite standardized y-axis (Siber r package)


I am modifying this tutorial to produce a ggplot of polar vectors. Within my own data, and absent from the tutorial, are test groups by which I would like to use to facet my plot (hist.by.groups_deg$Testnames). When I facet the plot by the variable intended in the tutorial (comparison), each facet has only one polar vector, and the y-axis appears to be scaled correctly. However, when I switch to Testnames and try to plot more than one vector on the same facet, I get the following error:

Warning message: Removed 6 rows containing missing values (geom_bar()).

The tutorial code standardizes the y-axis histogram counts by dividing those counts by the highest number of counts in the histogram, such that the data range for the y-axis should be 0 to 1, which appears to be the case when I check the dataframe before plotting. However, after receiving the warning message, I increased the maximum value on my y-axis and several were suddenly exceeding 1. Can someone help me properly standardize my y-axis so I can compare my groups on the same facet?

enter image description here

rm(list=ls())
graphics.off()

set.seed(1)

library(SIBER)
library(ggplot2)
library(magrittr) # to enable piping
library(dplyr)

# load in the included demonstration dataset
data("demo.siber.data")

# create the siber object
siber.example <- createSiberObject(demo.siber.data)

# options for running jags
parms <- list()
parms$n.iter <- 2 * 10^4   # number of iterations to run the model for
parms$n.burnin <- 1 * 10^3 # discard the first set of values
parms$n.thin <- 10     # thin the posterior by this many
parms$n.chains <- 2        # run this many chains

parms$save.output = FALSE
parms$save.dir = tempdir()

# define the priors
priors <- list()
priors$R <- 1 * diag(2)
priors$k <- 2
priors$tau.mu <- 1.0E-3

# fit the ellipses which uses an Inverse Wishart prior
# on the covariance matrix Sigma, and a vague normal prior on the 
# means. Fitting is via the JAGS method.
ellipses.posterior <- siberMVN(siber.example, parms, priors)

# extract the centroids from the fitted model object
centroids <- siberCentroids(ellipses.posterior)

# calculate pairwise polar vectors among all groups
angles_distances <- allCentroidVectors(centroids, do.plot = FALSE)

my.hist <- function(df){
  test <- hist(df$angles, 
               breaks = seq(from = -pi, to = pi, length = 60), 
               plot = FALSE)
  
  X <- data.frame(counts = test$counts, mids = test$mids, dens = test$density, 
                  counts.stdzd = test$counts / max(test$counts))
  
  return(X)
  
}

# calculate the points for each group's ellipse
hist.by.groups <- angles_distances %>% group_by(comparison) %>%
  do(my.hist(.))

### HERE IS WHERE I START MODIFYING THINGS ###

# Radians stress me out so I'm going to try converting them to degrees
hist.by.groups_deg <- hist.by.groups %>% 
  mutate(Degrees = mids*(180/pi))

# Creating test names to facet by
hist.by.groups_deg$TestName = c(rep("A", 125),
                                rep("B", 125),
                                rep("C", 635))
                                

# Generating polar histograms
cols <- c("#0570b0", "#8c96c6", "#74a9cf", "#8856a7", "#d7b5d8", "#b3cde3", "#810f7c", "#66c2a4", "#bdc9e1", "#df65b0","salmon","pink","gray","beige","tan")


all.roses <- ggplot(data = hist.by.groups_deg, aes(x = Degrees, y = counts.stdzd, color = comparison, fill = comparison)) +
  # Default here is stat = "count", but we are providing the y values so we override with stat = "identity"
  geom_hline(yintercept = seq(0, 1, by = 0.25), colour = "grey90", linewidth = 0.2) +
  geom_vline(xintercept = seq(0, 180, by = 45), colour = "grey90", linewidth = 0.2) +
  geom_vline(xintercept = seq(0, -180, by = -45), colour = "grey90", linewidth = 0.2) +
  geom_bar(stat = "identity", alpha = .7) + 
  scale_x_continuous(
    limits = c(-180,180),
    breaks = c(-180, -90, 0, 90, 180),
    labels = c("","-90","0","90", "")) +
  scale_y_continuous(
    limits = c(0,1)) +
  # Even though we've changed our units to degrees, start argument is in radians
  coord_polar(start = pi/2, direction = -1) +
  facet_wrap( ~ TestName, nrow = 2) +
  scale_fill_manual(
    values = cols) +
  scale_color_manual(
    values = cols) +
  theme(axis.ticks.y = element_blank(), 
        axis.text.y = element_blank(),
        legend.position = "none",
        panel.border = element_blank(),
        panel.grid = element_blank()) + 
  labs(y = "Counts (standardized by the proportion of total values)",
       x = expression("Angle between ellipse centroids (\u00B0)"))
print(all.roses)

Solution

  • This turned out to be a problem with geom_bar rather than a problem with SIBER or coord_polar. Despite having standardized my own y-axis in the input dataframe, this was overridden by the default position argument in geom_bar ("stacked"), so that when I added several bar plots to the same facet, any overlapping bars were stacked on top of each other, sometimes doubling or tripling the scale of the y axis. Specifying position = "identity" allows the bar charts to overlap without stacking:

    rm(list=ls())
    graphics.off()
    
    set.seed(1)
    
    library(SIBER)
    library(ggplot2)
    library(magrittr) # to enable piping
    library(dplyr)
    
    # load in the included demonstration dataset
    data("demo.siber.data")
    
    # create the siber object
    siber.example <- createSiberObject(demo.siber.data)
    
    # options for running jags
    parms <- list()
    parms$n.iter <- 2 * 10^4   # number of iterations to run the model for
    parms$n.burnin <- 1 * 10^3 # discard the first set of values
    parms$n.thin <- 10     # thin the posterior by this many
    parms$n.chains <- 2        # run this many chains
    
    parms$save.output = FALSE
    parms$save.dir = tempdir()
    
    # define the priors
    priors <- list()
    priors$R <- 1 * diag(2)
    priors$k <- 2
    priors$tau.mu <- 1.0E-3
    
    # fit the ellipses which uses an Inverse Wishart prior
    # on the covariance matrix Sigma, and a vague normal prior on the 
    # means. Fitting is via the JAGS method.
    ellipses.posterior <- siberMVN(siber.example, parms, priors)
    
    # extract the centroids from the fitted model object
    centroids <- siberCentroids(ellipses.posterior)
    
    # calculate pairwise polar vectors among all groups
    angles_distances <- allCentroidVectors(centroids, do.plot = FALSE)
    
    my.hist <- function(df){
      test <- hist(df$angles, 
                   breaks = seq(from = -pi, to = pi, length = 60), 
                   plot = FALSE)
      
      X <- data.frame(counts = test$counts, mids = test$mids, dens = test$density, 
                      counts.stdzd = test$counts / max(test$counts))
      
      return(X)
      
    }
    
    # calculate the points for each group's ellipse
    hist.by.groups <- angles_distances %>% group_by(comparison) %>%
      do(my.hist(.))
    
    ### HERE IS WHERE I START MODIFYING THINGS ###
    
    # Radians stress me out so I'm going to try converting them to degrees
    hist.by.groups_deg <- hist.by.groups %>% 
      mutate(Degrees = mids*(180/pi))
    
    # Creating test names to facet by
    hist.by.groups_deg$TestName = c(rep("A", 125),
                                    rep("B", 125),
                                    rep("C", 635))
                                    
    
    # Generating polar histograms
    cols <- c("#0570b0", "#8c96c6", "#74a9cf", "#8856a7", "#d7b5d8", "#b3cde3", "#810f7c", "#66c2a4", "#bdc9e1", "#df65b0","salmon","pink","gray","beige","tan")
    
    
    all.roses <- ggplot(data = hist.by.groups_deg, aes(x = Degrees, y = counts.stdzd, color = comparison, fill = comparison)) +
      # Default here is stat = "count", but we are providing the y values so we override with stat = "identity"
      geom_hline(yintercept = seq(0, 1, by = 0.25), colour = "grey90", linewidth = 0.2) +
      geom_vline(xintercept = seq(0, 180, by = 45), colour = "grey90", linewidth = 0.2) +
      geom_vline(xintercept = seq(0, -180, by = -45), colour = "grey90", linewidth = 0.2) +
      geom_bar(stat = "identity", alpha = .7, position = "identity") + 
      scale_x_continuous(
        limits = c(-180,180),
        breaks = c(-180, -90, 0, 90, 180),
        labels = c("","-90","0","90", "")) +
      scale_y_continuous(
        limits = c(0,1)) +
      # Even though we've changed our units to degrees, start argument is in radians
      coord_polar(start = pi/2, direction = -1) +
      facet_wrap( ~ TestName, nrow = 2) +
      scale_fill_manual(
        values = cols) +
      scale_color_manual(
        values = cols) +
      theme(axis.ticks.y = element_blank(), 
            axis.text.y = element_blank(),
            legend.position = "none",
            panel.border = element_blank(),
            panel.grid = element_blank()) + 
      labs(y = "Counts (standardized by the proportion of total values)",
           x = expression("Angle between ellipse centroids (\u00B0)"))
    print(all.roses)