Search code examples
rdata-wranglingeventdata

transform event to country-month dataset


I have an event dataset counting violent events and casualties. For my research, I need to transform it to a country-month basis. The challenging part is, that I need to distribute the casualties from one violent event equally throughout the event-period. For example, if event A started on 15.07.2020, ended on 31.08.2020, and resulted in 100 casualties, the dataset would record 37 casualties for July and 63 for August.

The desired outcome would look like this:

Country Month deahts_a deaths_b deaths_civilians
Afghanistan 01/2020 10 1 0
Afghanistan 02/2020 12 19 0
Afghanistan 03/2020 1 3 4
Afghanistan 04/2020 2 10 9
Afghanistan 05/2020 19 0 0

Here is a subset of my data:

structure(list(country = c("Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan"), date_start = structure(c(1501459200, 
1629936000, 1630108800, 1630195200, 600134400, 600825600, 601516800, 
602121600, 602208000, 602553600, 602899200, 603072000, 603417600, 
603590400, 603849600, 604022400, 604022400, 604022400, 605318400, 
605318400, 605923200, 607651200, 607651200, 607737600, 607824000, 
608601600, 608947200, 609033600, 609120000, 609292800, 609638400, 
611280000, 612576000, 612662400, 612662400, 612835200, 613699200, 
613785600, 614476800, 614563200, 619228800, 619747200, 620956800, 
621475200, 621475200, 621475200, 621475200, 621475200, 622252800, 
625449600, 625449600, 625536000, 625622400, 625795200, 630460800, 
638928000, 639360000, 647654400, 647827200, 648432000, 648432000, 
655689600, 656726400, 657849600, 657936000, 669168000, 669945600, 
673574400, 674784000, 675388800, 678585600, 679708800, 682300800, 
682300800, 682300800, 682387200, 693792000, 699062400, 699321600, 
843868800, 843955200, 844473600, 844473600, 844732800, 844905600, 
617155200, 628560000, 654825600, 655084800, 655516800, 655516800, 
656208000, 657849600, 657936000, 659318400, 659664000, 666748800, 
672105600, 672278400, 681004800, 681264000, 687571200, 687744000, 
687744000, 687916800, 703555200, 703814400, 703900800, 704160000, 
704160000, 704246400, 704246400, 704246400, 704419200, 704937600, 
704937600, 705024000, 706406400, 706665600, 707356800, 707356800, 
709257600, 710035200, 710208000, 710208000, 712627200, 712713600, 
712886400, 713059200, 713232000, 713318400, 713404800, 713404800, 
713404800, 713404800, 713404800, 713664000, 713836800, 714182400, 
714182400, 714355200, 714355200, 714528000, 714528000, 714614400, 
714960000, 715305600, 715305600, 715305600, 715305600, 716169600, 
716256000, 717984000, 725932800, 725932800, 725932800, 726278400, 
726451200, 727401600, 727401600, 727401600, 727401600, 727401600, 
727401600, 727401600, 727747200, 727920000, 728006400, 728092800, 
728092800, 728352000, 728352000, 728438400, 728611200, 728784000, 
728784000, 729129600, 729475200, 729475200, 729993600, 730857600, 
734659200, 734918400, 735091200, 736732800, 737164800, 737164800, 
737164800, 737164800, 737164800, 737251200, 737510400, 737510400, 
737683200, 737769600, 737942400, 744681600, 744940800, 745632000, 
745977600), tzone = "UTC", class = c("POSIXct", "POSIXt")), date_end = structure(c(1501459200, 
1629936000, 1630108800, 1630195200, 600652800, 601084800, 601603200, 
602640000, 602208000, 602553600, 602899200, 603072000, 603417600, 
603590400, 603849600, 604022400, 604022400, 604454400, 605318400, 
605318400, 607305600, 607651200, 607737600, 607737600, 607824000, 
608601600, 608947200, 609033600, 609120000, 609292800, 609638400, 
611280000, 612921600, 612835200, 612662400, 612835200, 613699200, 
613785600, 614476800, 614563200, 619228800, 619747200, 620956800, 
621475200, 621648000, 621648000, 621648000, 621648000, 622252800, 
625449600, 625449600, 625536000, 625622400, 625795200, 630633600, 
647481600, 639360000, 647654400, 6.48e+08, 648432000, 648432000, 
655689600, 656899200, 657849600, 657936000, 669168000, 669945600, 
673574400, 674784000, 675820800, 678585600, 679708800, 682387200, 
682387200, 682387200, 682387200, 693792000, 699062400, 699321600, 
843868800, 843955200, 844473600, 844819200, 844819200, 844905600, 
617155200, 628560000, 654998400, 655084800, 655689600, 655689600, 
656467200, 657849600, 657936000, 659318400, 659664000, 666748800, 
672105600, 672624000, 683510400, 681264000, 687571200, 687744000, 
687744000, 687916800, 703555200, 703814400, 703900800, 704160000, 
704678400, 704246400, 704246400, 704246400, 704419200, 704937600, 
705024000, 705024000, 706406400, 706752000, 709862400, 709862400, 
709257600, 710035200, 710208000, 710208000, 712627200, 712713600, 
712886400, 713145600, 713232000, 713318400, 713577600, 713836800, 
714960000, 714528000, 714960000, 713664000, 713836800, 714268800, 
714182400, 714355200, 714355200, 714614400, 714528000, 714614400, 
714960000, 715305600, 715392000, 715824000, 715824000, 716169600, 
716256000, 718156800, 726710400, 727315200, 726019200, 726278400, 
726451200, 728611200, 729043200, 729302400, 727401600, 727488000, 
727574400, 729820800, 727747200, 727920000, 728006400, 728092800, 
728179200, 728352000, 728352000, 728438400, 728611200, 728784000, 
728784000, 729129600, 729475200, 729475200, 729993600, 730857600, 
734659200, 734918400, 735091200, 736819200, 737510400, 737164800, 
737424000, 737942400, 738374400, 737251200, 737596800, 737510400, 
737683200, 737769600, 737942400, 744681600, 744940800, 745632000, 
746236800), tzone = "UTC", class = c("POSIXct", "POSIXt")), deaths_a = c(0, 
13, 0, 0, 6, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 68, 0, 0, 7, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 
0, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 50, 2, 0, 0, 0, 0, 1, 
0, 0, 0, 0, 2, 0, 0, 50, 17, 10, 0, 0, 0, 0, 0, 0, 0, 5, 1, 5, 
165, 100, 0, 0, 0, 95, 0, 0, 0, 125, 0, 0, 0, 0, 0, 0, 0, 100, 
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 40, 1, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 
0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), deaths_b = c(4, 
1, 2, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 3, 10, 0, 0, 0, 0, 0, 
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 17, 
0, 0, 9, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 100, 2, 8, 15, 15, 
0, 0, 45, 0, 0, 0, 0, 28, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 
0, 15, 0, 0, 0, 0, 1, 0, 7, 36, 35, 0, 0, 0, 0, 0, 5, 6, 0, 0, 
16, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 
0, 0, 0, 0, 20, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
350, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    deaths_civilians = c(0, 141, 0, 10, 0, 0, 0, 0, 0, 0, 0, 
    0, 5, 7, 0, 0, 0, 0, 0, 4, 0, 6, 4, 6, 0, 2, 1, 0, 2, 2, 
    20, 0, 0, 0, 4, 2, 1, 0, 2, 2, 1, 0, 8, 3, 0, 0, 0, 0, 0, 
    0, 19, 0, 0, 0, 4, 0, 3, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 20, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 
    0, 0, 0, 700, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 1, 0, 
    1, 0, 0, 0, 0, 0, 0, 0, 0, 766, 0, 80, 0, 24, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 3, 62, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, 
-200L), class = c("tbl_df", "tbl", "data.frame"))

Solution

  • I reduced your example to a single event for clarity:

    library(tidyverse)
    
    df <- tibble(
      event = c("A"),
      date_start = ymd("2020-07-15"),
      date_end = ymd("2020-08-31"),
      event_length = time_length(interval(date_start, date_end), unit = "days") + 1,
      casualties = 100
    )
    
    month_grid <- tibble(
      month_start = seq(ymd("2020-06-01"), ymd("2020-10-01"), by = "month"),
      month_end = rollforward(month_start)
    )
    
    month_grid |> 
      left_join(df, join_by(overlaps(month_start, month_end, date_start, date_end))) |> 
      mutate(days = time_length(interval(pmax(month_start, date_start), 
             pmin(month_end, date_end)), unit = "day") + 1) |> 
      summarize(
        casualties = days / event_length * casualties,
        .by = c(month_start, event)
      )
    #> # A tibble: 5 × 3
    #>   month_start event casualties
    #>   <date>      <chr>      <dbl>
    #> 1 2020-06-01  <NA>        NA  
    #> 2 2020-07-01  A           35.4
    #> 3 2020-08-01  A           64.6
    #> 4 2020-09-01  <NA>        NA  
    #> 5 2020-10-01  <NA>        NA
    

    Created on 2024-01-08 with reprex v2.0.2