I have an event dataset counting violent events and casualties. For my research, I need to transform it to a country-month basis. The challenging part is, that I need to distribute the casualties from one violent event equally throughout the event-period. For example, if event A started on 15.07.2020, ended on 31.08.2020, and resulted in 100 casualties, the dataset would record 37 casualties for July and 63 for August.
The desired outcome would look like this:
Country | Month | deahts_a | deaths_b | deaths_civilians |
---|---|---|---|---|
Afghanistan | 01/2020 | 10 | 1 | 0 |
Afghanistan | 02/2020 | 12 | 19 | 0 |
Afghanistan | 03/2020 | 1 | 3 | 4 |
Afghanistan | 04/2020 | 2 | 10 | 9 |
Afghanistan | 05/2020 | 19 | 0 | 0 |
Here is a subset of my data:
structure(list(country = c("Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan"), date_start = structure(c(1501459200,
1629936000, 1630108800, 1630195200, 600134400, 600825600, 601516800,
602121600, 602208000, 602553600, 602899200, 603072000, 603417600,
603590400, 603849600, 604022400, 604022400, 604022400, 605318400,
605318400, 605923200, 607651200, 607651200, 607737600, 607824000,
608601600, 608947200, 609033600, 609120000, 609292800, 609638400,
611280000, 612576000, 612662400, 612662400, 612835200, 613699200,
613785600, 614476800, 614563200, 619228800, 619747200, 620956800,
621475200, 621475200, 621475200, 621475200, 621475200, 622252800,
625449600, 625449600, 625536000, 625622400, 625795200, 630460800,
638928000, 639360000, 647654400, 647827200, 648432000, 648432000,
655689600, 656726400, 657849600, 657936000, 669168000, 669945600,
673574400, 674784000, 675388800, 678585600, 679708800, 682300800,
682300800, 682300800, 682387200, 693792000, 699062400, 699321600,
843868800, 843955200, 844473600, 844473600, 844732800, 844905600,
617155200, 628560000, 654825600, 655084800, 655516800, 655516800,
656208000, 657849600, 657936000, 659318400, 659664000, 666748800,
672105600, 672278400, 681004800, 681264000, 687571200, 687744000,
687744000, 687916800, 703555200, 703814400, 703900800, 704160000,
704160000, 704246400, 704246400, 704246400, 704419200, 704937600,
704937600, 705024000, 706406400, 706665600, 707356800, 707356800,
709257600, 710035200, 710208000, 710208000, 712627200, 712713600,
712886400, 713059200, 713232000, 713318400, 713404800, 713404800,
713404800, 713404800, 713404800, 713664000, 713836800, 714182400,
714182400, 714355200, 714355200, 714528000, 714528000, 714614400,
714960000, 715305600, 715305600, 715305600, 715305600, 716169600,
716256000, 717984000, 725932800, 725932800, 725932800, 726278400,
726451200, 727401600, 727401600, 727401600, 727401600, 727401600,
727401600, 727401600, 727747200, 727920000, 728006400, 728092800,
728092800, 728352000, 728352000, 728438400, 728611200, 728784000,
728784000, 729129600, 729475200, 729475200, 729993600, 730857600,
734659200, 734918400, 735091200, 736732800, 737164800, 737164800,
737164800, 737164800, 737164800, 737251200, 737510400, 737510400,
737683200, 737769600, 737942400, 744681600, 744940800, 745632000,
745977600), tzone = "UTC", class = c("POSIXct", "POSIXt")), date_end = structure(c(1501459200,
1629936000, 1630108800, 1630195200, 600652800, 601084800, 601603200,
602640000, 602208000, 602553600, 602899200, 603072000, 603417600,
603590400, 603849600, 604022400, 604022400, 604454400, 605318400,
605318400, 607305600, 607651200, 607737600, 607737600, 607824000,
608601600, 608947200, 609033600, 609120000, 609292800, 609638400,
611280000, 612921600, 612835200, 612662400, 612835200, 613699200,
613785600, 614476800, 614563200, 619228800, 619747200, 620956800,
621475200, 621648000, 621648000, 621648000, 621648000, 622252800,
625449600, 625449600, 625536000, 625622400, 625795200, 630633600,
647481600, 639360000, 647654400, 6.48e+08, 648432000, 648432000,
655689600, 656899200, 657849600, 657936000, 669168000, 669945600,
673574400, 674784000, 675820800, 678585600, 679708800, 682387200,
682387200, 682387200, 682387200, 693792000, 699062400, 699321600,
843868800, 843955200, 844473600, 844819200, 844819200, 844905600,
617155200, 628560000, 654998400, 655084800, 655689600, 655689600,
656467200, 657849600, 657936000, 659318400, 659664000, 666748800,
672105600, 672624000, 683510400, 681264000, 687571200, 687744000,
687744000, 687916800, 703555200, 703814400, 703900800, 704160000,
704678400, 704246400, 704246400, 704246400, 704419200, 704937600,
705024000, 705024000, 706406400, 706752000, 709862400, 709862400,
709257600, 710035200, 710208000, 710208000, 712627200, 712713600,
712886400, 713145600, 713232000, 713318400, 713577600, 713836800,
714960000, 714528000, 714960000, 713664000, 713836800, 714268800,
714182400, 714355200, 714355200, 714614400, 714528000, 714614400,
714960000, 715305600, 715392000, 715824000, 715824000, 716169600,
716256000, 718156800, 726710400, 727315200, 726019200, 726278400,
726451200, 728611200, 729043200, 729302400, 727401600, 727488000,
727574400, 729820800, 727747200, 727920000, 728006400, 728092800,
728179200, 728352000, 728352000, 728438400, 728611200, 728784000,
728784000, 729129600, 729475200, 729475200, 729993600, 730857600,
734659200, 734918400, 735091200, 736819200, 737510400, 737164800,
737424000, 737942400, 738374400, 737251200, 737596800, 737510400,
737683200, 737769600, 737942400, 744681600, 744940800, 745632000,
746236800), tzone = "UTC", class = c("POSIXct", "POSIXt")), deaths_a = c(0,
13, 0, 0, 6, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 68, 0, 0, 7,
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0,
0, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 50, 2, 0, 0, 0, 0, 1,
0, 0, 0, 0, 2, 0, 0, 50, 17, 10, 0, 0, 0, 0, 0, 0, 0, 5, 1, 5,
165, 100, 0, 0, 0, 95, 0, 0, 0, 125, 0, 0, 0, 0, 0, 0, 0, 100,
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 40, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17,
0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), deaths_b = c(4,
1, 2, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 3, 10, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 17,
0, 0, 9, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 100, 2, 8, 15, 15,
0, 0, 45, 0, 0, 0, 0, 28, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18,
0, 15, 0, 0, 0, 0, 1, 0, 7, 36, 35, 0, 0, 0, 0, 0, 5, 6, 0, 0,
16, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0,
0, 0, 0, 0, 20, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
350, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
deaths_civilians = c(0, 141, 0, 10, 0, 0, 0, 0, 0, 0, 0,
0, 5, 7, 0, 0, 0, 0, 0, 4, 0, 6, 4, 6, 0, 2, 1, 0, 2, 2,
20, 0, 0, 0, 4, 2, 1, 0, 2, 2, 1, 0, 8, 3, 0, 0, 0, 0, 0,
0, 19, 0, 0, 0, 4, 0, 3, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 20, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0,
0, 0, 0, 700, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 766, 0, 80, 0, 24, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 3, 62,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA,
-200L), class = c("tbl_df", "tbl", "data.frame"))
I reduced your example to a single event for clarity:
library(tidyverse)
df <- tibble(
event = c("A"),
date_start = ymd("2020-07-15"),
date_end = ymd("2020-08-31"),
event_length = time_length(interval(date_start, date_end), unit = "days") + 1,
casualties = 100
)
month_grid <- tibble(
month_start = seq(ymd("2020-06-01"), ymd("2020-10-01"), by = "month"),
month_end = rollforward(month_start)
)
month_grid |>
left_join(df, join_by(overlaps(month_start, month_end, date_start, date_end))) |>
mutate(days = time_length(interval(pmax(month_start, date_start),
pmin(month_end, date_end)), unit = "day") + 1) |>
summarize(
casualties = days / event_length * casualties,
.by = c(month_start, event)
)
#> # A tibble: 5 × 3
#> month_start event casualties
#> <date> <chr> <dbl>
#> 1 2020-06-01 <NA> NA
#> 2 2020-07-01 A 35.4
#> 3 2020-08-01 A 64.6
#> 4 2020-09-01 <NA> NA
#> 5 2020-10-01 <NA> NA
Created on 2024-01-08 with reprex v2.0.2