Search code examples
rgeometrytrigonometryangle

Circular mean incorrect


I want to estimate the mean and spread of angles per year for moving individuals. Data set will be pasted at the end.

This is what I have so far:

    library(tidyverse)
    library(sf)
    library(lwgeom)
    library(geosphere)
    library(circular)

This gets the bearings of locations from the point

HI=data.frame(x=rep(151.9155, 217), y=rep(-23.44288, 217))

bears<-trips%>%
  mutate(bearing=geosphere::bearing(st_multipoint(cbind(HI$x, HI$y)), 
                                    st_multipoint(cbind(.$x, .$y))),
     bearing360 = (bearing + 360) %% 360)

Get mean direction and variance

mnbears<-bears%>%
      mutate(bearing360 = circular(bearing)) %>%
      group_by(Year) %>%
      summarise(mnDir = mean.circular(bearing), var = var.circular(bearing)) %>%
      mutate(mnDir = conversion.circular(mnDir, units = 'degrees', modulo = 'asis', zero = 0), 
           var = conversion.circular(circular(var), units = 'degrees'))
        

Throw them together for ggplot

    jb<-left_join(bears, mnbears) 
    
    
    bDist<-ggplot(jb, aes(x = bearing360, fill = Year)) +
      geom_histogram(binwidth = 15, boundary = -7.5, colour = "black", size = .25) +
                geom_segment(aes(x = mnDir, y = 10, xend = mnDir, yend = 20), col='black') +
      geom_segment(aes(x = mnDir-(var/2), y = 20, xend = mnDir+(var/2), yend = 20), col='black') +
      guides(fill = guide_legend(reverse = TRUE)) +
      coord_polar() +
      scale_x_continuous(limits = c(0,360),
                         breaks = seq(0, 360, by = 90),
                         minor_breaks = seq(0, 360, by = 45)) +
      scale_fill_brewer() +
      facet_grid(~Year) +
      theme(panel.background = element_rect(fill = "white"),
            panel.grid = element_line(color = "grey"),
            strip.background = element_blank(),
            strip.text.x = element_text(face = 'bold', size = 11),
            axis.title.x = element_blank(),
            axis.title.y = element_blank())
    
    
    bDist

enter image description here

Hopefully from the plots you can see the estimate mean and variance is really wrong (indicated by the black 'T' shapes)! I can't figure out how to get the estimates correct. I suspect it is some technical circular data concept that I haven't experienced. Either something to do with the 'modulo' settings or the direction in which bearing was measured is my best get but I've tried all combinations I can think of.

How can I get the correct estimate for the mean and variance?

Data (trips) for reprex:

|Year |        x|         y|
|:----|--------:|---------:|
|2019 | 151.3549| -22.51087|
|2020 | 151.7959| -23.57362|
|2020 | 151.8792| -23.48739|
|2020 | 151.8875| -23.54251|
|2020 | 151.8026| -23.43382|
|2020 | 151.8458| -23.56966|
|2020 | 151.9292| -23.38716|
|2020 | 151.8155| -23.03113|
|2020 | 151.8536| -23.13598|
|2020 | 151.8660| -23.49086|
|2020 | 151.8625| -23.49980|
|2020 | 151.8288| -23.47545|
|2020 | 151.8630| -23.49681|
|2020 | 151.8740| -23.48952|
|2020 | 151.8401| -23.47993|
|2020 | 151.8583| -23.49799|
|2020 | 151.8502| -23.46673|
|2020 | 151.8654| -23.49062|
|2020 | 151.8626| -23.51057|
|2020 | 151.8958| -23.52990|
|2020 | 151.7507| -23.45382|
|2020 | 151.7409| -23.45942|
|2020 | 151.8557| -23.50075|
|2020 | 151.8834| -23.58453|
|2020 | 151.9233| -23.60095|
|2020 | 151.8564| -23.43141|
|2020 | 151.9685| -23.65540|
|2020 | 151.7681| -23.57841|
|2020 | 151.7986| -23.65532|
|2020 | 151.7582| -23.70650|
|2020 | 151.7328| -23.48951|
|2020 | 151.8876| -23.61944|
|2020 | 151.8471| -23.26857|
|2020 | 151.9005| -23.38162|
|2020 | 151.8629| -23.63405|
|2020 | 151.8518| -23.41013|
|2020 | 152.0313| -23.41983|
|2020 | 151.3282| -23.71458|
|2020 | 151.8332| -23.33175|
|2020 | 151.8134| -23.33415|
|2020 | 151.7663| -23.38577|
|2020 | 151.7972| -23.43462|
|2020 | 151.7085| -23.29203|
|2020 | 151.8643| -23.42937|
|2020 | 151.8519| -23.32817|
|2020 | 151.9212| -23.58503|
|2020 | 151.8469| -23.51568|
|2020 | 151.7841| -23.43410|
|2020 | 151.8045| -23.40979|
|2020 | 151.8764| -23.78642|
|2020 | 151.3802| -23.44065|
|2020 | 151.8101| -23.44553|
|2020 | 151.8493| -23.46276|
|2019 | 151.9488| -23.43558|
|2019 | 152.0061| -23.48749|
|2019 | 151.9449| -23.46720|
|2019 | 152.0566| -23.42244|
|2020 | 151.7048| -23.73499|
|2020 | 151.7205| -23.83130|
|2020 | 151.9030| -23.51964|
|2020 | 151.6201| -23.91792|
|2020 | 151.8511| -23.69530|
|2020 | 151.8494| -23.57195|
|2020 | 151.8912| -23.66548|
|2020 | 151.7875| -23.76454|
|2020 | 151.9273| -23.60010|
|2020 | 151.7049| -23.29387|
|2020 | 151.8574| -23.42830|
|2020 | 151.6925| -23.41902|
|2020 | 151.9542| -23.33343|
|2020 | 151.8367| -23.41281|
|2020 | 151.7874| -23.74106|
|2020 | 151.9670| -23.57370|
|2020 | 152.0281| -23.40727|
|2020 | 151.8325| -23.34148|
|2020 | 151.8641| -23.52120|
|2020 | 151.8446| -23.48096|
|2020 | 151.8731| -23.44420|
|2020 | 152.0092| -23.40491|
|2020 | 151.8099| -23.42122|
|2020 | 151.8518| -23.50224|
|2020 | 151.8691| -23.49820|
|2020 | 151.8528| -23.43173|
|2021 | 151.9299| -23.48949|
|2021 | 151.9988| -23.62274|
|2021 | 152.0649| -23.62837|
|2021 | 151.7091| -23.51718|
|2021 | 151.9607| -23.41935|
|2021 | 152.0153| -23.41470|
|2021 | 151.7294| -23.53706|
|2021 | 151.8953| -23.42542|
|2021 | 151.8300| -23.48532|
|2021 | 151.8654| -23.42682|
|2021 | 151.9111| -23.41984|
|2021 | 152.0536| -23.41074|
|2021 | 152.0548| -23.41915|
|2021 | 151.9443| -23.63787|
|2021 | 152.0069| -23.41399|
|2021 | 152.0580| -23.42258|
|2021 | 151.9596| -23.42265|
|2021 | 151.8786| -23.41094|
|2021 | 151.8852| -23.44104|
|2021 | 151.9388| -23.35891|
|2021 | 151.9058| -23.40970|
|2021 | 151.9490| -23.42408|
|2019 | 151.8181| -23.38416|
|2019 | 150.8755| -22.88907|
|2021 | 152.0784| -23.71287|
|2021 | 152.0142| -23.48147|
|2021 | 152.0238| -23.58956|
|2021 | 151.9565| -23.59669|
|2021 | 151.9782| -23.61075|
|2021 | 151.9784| -23.47652|
|2021 | 152.1223| -23.43406|
|2021 | 151.9975| -23.47265|
|2021 | 151.9509| -23.47858|
|2021 | 151.8659| -23.40042|
|2021 | 151.9835| -23.47445|
|2021 | 151.9942| -23.49223|
|2021 | 152.0362| -23.46816|
|2021 | 152.0086| -23.48778|
|2021 | 151.9413| -23.47545|
|2021 | 151.9886| -23.48856|
|2021 | 151.9430| -23.47246|
|2021 | 151.9817| -23.49972|
|2021 | 151.9993| -23.42002|
|2021 | 152.0099| -23.48919|
|2021 | 151.9259| -23.29711|
|2021 | 151.9757| -23.40455|
|2021 | 152.0466| -23.46832|
|2021 | 152.0223| -23.42812|
|2021 | 152.0749| -23.42492|
|2021 | 151.8081| -23.48507|
|2021 | 151.8238| -23.51239|
|2021 | 151.8305| -23.50929|
|2021 | 151.8562| -23.61330|
|2021 | 151.9660| -23.42000|
|2021 | 152.0922| -23.51141|
|2021 | 151.8327| -23.32961|
|2021 | 151.7930| -23.49821|
|2021 | 151.8765| -23.51462|
|2021 | 151.8296| -23.43750|
|2021 | 151.7995| -23.47485|
|2021 | 151.8632| -23.45549|
|2021 | 151.8266| -23.50840|
|2021 | 151.8700| -23.43128|
|2021 | 151.8665| -23.70580|
|2021 | 151.8225| -23.62326|
|2021 | 151.9601| -23.49017|
|2021 | 151.9737| -23.47956|
|2021 | 151.9673| -23.53141|
|2021 | 151.9732| -23.70913|
|2019 | 151.5390| -23.49592|
|2019 | 151.5404| -23.50782|
|2019 | 151.7217| -23.34590|
|2021 | 151.9210| -23.48402|
|2021 | 151.9782| -23.48022|
|2021 | 151.8481| -23.56943|
|2021 | 152.0031| -23.59803|
|2021 | 152.0144| -23.48938|
|2021 | 151.8186| -23.47955|
|2021 | 151.8250| -23.57878|
|2021 | 151.8225| -23.48596|
|2021 | 151.9910| -23.47478|
|2021 | 151.8382| -23.49800|
|2021 | 151.8084| -23.60350|
|2021 | 151.8023| -23.61190|
|2021 | 151.8164| -23.62010|
|2021 | 151.9611| -23.49243|
|2021 | 151.9564| -23.51577|
|2021 | 151.8836| -23.49689|
|2021 | 152.0077| -23.47821|
|2021 | 151.8216| -23.48753|
|2021 | 151.9700| -23.43557|
|2021 | 152.0044| -23.45532|
|2021 | 152.0447| -23.44921|
|2021 | 151.8647| -23.54219|
|2021 | 151.9037| -23.49741|
|2021 | 151.9136| -23.49116|
|2021 | 151.7988| -23.48745|
|2021 | 152.0451| -23.40705|
|2021 | 152.0415| -23.40918|
|2021 | 152.0157| -23.40652|
|2021 | 151.9719| -23.42479|
|2021 | 151.8049| -23.62434|
|2021 | 151.9203| -23.48346|
|2021 | 151.9228| -23.47228|
|2021 | 151.9705| -23.47643|
|2021 | 151.9705| -23.43327|
|2021 | 151.9284| -23.49097|
|2021 | 151.7945| -23.69604|
|2021 | 151.8789| -23.61902|
|2021 | 151.8977| -23.49126|
|2021 | 151.8814| -23.60803|
|2021 | 151.8250| -23.61652|
|2021 | 151.8473| -23.58226|
|2020 | 151.8334| -23.43713|
|2020 | 151.8650| -23.39184|
|2020 | 151.7018| -23.29463|
|2020 | 151.6709| -23.33659|
|2020 | 152.0276| -23.40897|
|2020 | 151.8111| -23.42271|
|2020 | 151.9733| -23.42767|
|2020 | 152.0211| -23.41244|
|2020 | 151.8712| -23.65240|
|2020 | 151.9095| -23.51222|
|2020 | 151.9301| -23.52330|
|2020 | 151.8817| -23.60455|
|2020 | 151.8553| -23.62631|
|2020 | 151.8530| -23.59368|
|2020 | 151.9107| -23.69545|
|2020 | 151.9766| -23.47657|
|2020 | 152.0145| -23.53364|
|2020 | 151.8122| -23.62238|
|2020 | 151.8008| -23.65538|
|2020 | 151.6813| -22.93562|
|2020 | 151.7781| -22.89655|

Solution

  • As others have commented, the main problem is that circular() by default expects different kind of circular data and when you are working with geographical angles you have to specify the units (degrees), rotation (clockwise) and where zero is located (π/2). When you do this, you will get the correct mean values.

    Another problem, however, may arise with the variance. Circular variance varies from 0 to 1. A variance of one means that a variable has a very large spread and a variance of 0 means that all data are concentrated at one point (Cremers & Klugkist 2018). I am not sure what the conventions are for visualizing it (if there are any), so I drew it so that it encompasses the respective proportion of the full circle. A technical issue one runs into is that using polar coordinates, when this range crosses 0/360 it is not possible to draw it with one segment. Therefore, I had to divide such cases in two parts (before and after 0/360).

    mnbears <- bears %>%
      mutate(bearing.circ = circular(bearing, units='degrees', rotation='clock', 
                                     zero=pi/2, modulo='2pi')) %>%
      group_by(Year) %>%
      summarise(mnDir = mean(bearing.circ),
                var = var(bearing.circ)) %>%
      mutate(lower = (mnDir - 180 * var) %% 360,
             upper = (mnDir + 180 * var) %% 360)
    
    # split [lower, upper] ranges which cross zero/360 
    #   into [lower, 360] and [0, upper]
    cross.zero <- mnbears$lower > mnbears$upper
    if (any(cross.zero)) {
      for (i in which(cross.zero)) {
        
        tmp <- mnbears[rep(i, 2), ]
        tmp$upper[1] <- 360
        tmp$lower[2] <- 0
        mnbears <- rbind(mnbears, tmp)
      }
      
      mnbears <- mnbears[-which(cross.zero), ]
    }
    

    I didn't join bears and mnbears as the mean/variance segments would be drawn over and over. Also, I had to change Year to factor as otherwise there was na error.

    bears$Year <- factor(bears$Year)
    mnbears$Year <- factor(mnbears$Year)
    
    bDist<-ggplot(bears, aes(x = bearing360, fill = Year)) +
      geom_histogram(binwidth = 15, boundary = -7.5, colour = "black", size = .25) +
      guides(fill = guide_legend(reverse = TRUE)) +
      coord_polar() +
      scale_x_continuous(limits = c(0,360),
                         breaks = seq(0, 360, by = 90),
                         minor_breaks = seq(0, 360, by = 45)) +
      scale_fill_brewer() +
      facet_grid(~Year) +
      theme(panel.background = element_rect(fill = "white"),
            panel.grid = element_line(color = "grey"),
            strip.background = element_blank(),
            strip.text.x = element_text(face = 'bold', size = 11),
            axis.title.x = element_blank(),
            axis.title.y = element_blank())
    
    bDist + 
      geom_segment(aes(x = mnDir, y = 10, xend = mnDir, yend = 20), col='black', 
                   data=mnbears) +
      geom_segment(aes(x = lower, y = 20, xend = upper, yend = 20), col='black', 
                   data=mnbears) 
    

    circular data with mean and variance