I have been working in R with the Correlates of War dataset about disputes and war. I have been trying to mutate a "stateadem" variable which tells us if statea is democratic based on their ID number.
To do this, I used a long case_when statement inside mutate. However, when I view the dataframe, some of the disputes with Russsia as state a come up as 1 in stateadem! I am very confused on why this is the case. I have dplyr 8.5
install.packages("dplyr")
...
library(dplyr)
df<-read.dta13("dyads1.dta",
nonint.factors = T,
generate.factors = T)
df_use <- df_use %>%
mutate(stateadem = case_when(is.na(statea) ~ -1,
statea == 2 ~ 1,
statea == 20 ~ 1,
statea == 40 ~ 0,
statea == 41 ~ 0,
((statea == 42)&(year>1980)) ~ 1,
((statea == 42)&(year<=1980)) ~ 1,
statea == 51 ~ 1,
statea == 52 ~ 1,
statea == 53 ~ 1,
statea == 54 ~ 1,
statea == 55 ~ 1,
statea == 56 ~ 1,
statea == 57 ~ 1,
statea == 58 ~ 1,
((statea == 70)&(year>2000)) ~ 1,
((statea == 70)&(year <= 2000))~0,
statea == 80 ~ 1,
((statea == 90)&(year>1997)) ~ 1,
((statea == 90)&(year <= 1997))~0,
((statea == 91)&(year>1982)) ~ 1,
((statea == 91)&(year <= 1982))~0,
((statea == 92)&(year>1991)) ~ 1,
((statea == 92)&(year <= 1991))~0,
((statea == 93)&(year>1991)) ~ 1,
((statea == 93)&(year <= 1991))~0,
statea == 94 ~ 1,
((statea == 95)&(year>1991)) ~ 1,
((statea == 95)&(year <= 1991))~0,
((statea == 100)&(year>1959)) ~ 1,
((statea == 100)&(year <= 1959))~0,
((statea == 101)&(year<1959)) ~ 0,
((statea == 101)&(year >= 1959)&(year<2002))~1,
((statea == 101)&(year >= 2002))~0,
((statea == 110)&(year<1994)) ~ 0,
((statea == 110)&(year >=1994))~1,
statea == 115 ~ 0,
((statea == 130)&(year<1981)) ~ 0,
((statea == 130)&(year>=1981)&(year<2005)) ~ 1,
((statea == 130)&(year>= 2005)) ~ 1,
((statea == 135)&(year<1981)) ~ 0,
((statea == 135)&(year>=1981)&(year<1991))~1,
((statea == 135)&(year>=1991)&(year<2000))~0,
((statea == 135)&(year>=2000))~1,
((statea == 140)&(year<1985))~0,
((statea == 140)&(year>=1985))~1,
((statea == 145)&(year<1984))~0,
((statea == 145)&(year>=1984))~1,
((statea == 150)&(year<1994))~0,
((statea == 150)&(year>=1994))~1,
((statea == 155)&(year<1991))~0,
((statea == 155)&(year>=1991))~1,
((statea == 160)&(year<1985))~0,
((statea == 160)&(year>=1985))~1,
((statea == 165)&(year<1954)) ~ 0,
((statea == 165)&(year>=1954)&(year<1971))~1,
((statea == 165)&(year>=1971)&(year<1987))~0,
((statea == 165)&(year>=1987))~1,
((statea = 200)&(year > 1884)) ~ 1,
((statea = 200)&(year < 1884)) ~ 0,
statea == 205 ~ 1,
statea == 210 ~ 1,
statea == 211 ~ 1,
statea == 212 ~ 1,
((statea == 220)&(year<1956)) ~ 1,
((statea == 220)&(year>=1956)&(year<=1970)) ~ 1,
((statea == 220)&(year>1970)) ~ 1,
statea == 225 ~ 1,
((statea == 230)&(year<1980)) ~ 0,
((statea == 230)&(year>=1980)) ~ 1,
((statea == 235)&(year<1976)) ~ 0,
((statea == 235)&(year>=1976)) ~ 1,
statea == 240 ~ 1,
statea == 245 ~ 1,
((statea == 255)&(year > 1990)) ~ 1,
((statea == 255)&(year <= 1990)) ~ 0,
statea == 260 ~ 1,
statea == 265 ~ 0,
statea == 267 ~ 1,
statea == 269 ~ 1,
statea == 271 ~ 1,
statea == 273 ~ 0,
statea == 275 ~ 0,
statea == 280 ~ 0,
((statea == 290)&(year>1990)) ~ 1,
((statea == 290)&(year<=1990)) ~ 0,
statea ==300 ~ 0,
statea == 305 ~ 1,
((statea == 310)&(year > 1990)) ~ 1,
((statea == 310)&(year <= 1990)) ~ 0,
((statea == 315)&(year <=1990)&(year>1950)) ~ 0,
((statea == 315)&(year <=1950)) ~ 1,
((statea == 315)&(year >1990)) ~ 1,
statea == 316 ~ 1,
statea == 317 ~ 1,
((statea == 325)&(year <1948)) ~ 0,
((statea == 325)&(year >=1948)) ~ 1,
statea == 327 ~ 0,
statea == 329 ~ 0,
statea == 332 ~ 0,
statea == 337 ~ 1,
statea == 338 ~ 0,
((statea == 339)&(year > 2000)) ~ 1,
((statea == 339)&(year <= 2000)) ~ 0,
statea == 343 ~ 1,
((statea == 344)&(year>2000)) ~ 1,
((statea == 344)&(year <= 2000)) ~ 0,
statea == 345 ~ 0,
statea == 346 ~ 0,
((statea == 350)&(year >= 1976)) ~ 1,
((statea == 350)&(year < 1950)) ~ 1,
((statea == 350)&(year >= 1950)&(year<1976)) ~ 0,
statea == 352 ~ 1,
((statea == 355)&(year<1991)) ~ 0,
((statea == 355)&(year >= 1991)) ~ 1,
((statea == 359)&(year>= 1994)) ~ 1,
((statea == 359)&(year< 1994)) ~ 0,
((statea == 360)&(year<1991)) ~ 0,
((statea == 360)&(year >= 1991)) ~ 1,
statea == 365 ~ 0,
statea == 366 ~ 1,
statea == 367 ~ 1,
statea == 368 ~ 1,
statea == 369 ~ 1,
statea == 370 ~ 0,
statea == 371 ~ 0,
((statea == 372)&(year > 2003 ))~1,
((statea == 372)&(year <= 2003 ))~0,
statea == 373 ~ 0,
statea == 375 ~ 1,
statea == 380 ~ 1,
statea == 385 ~ 1,
statea == 390 ~ 1,
statea == 395 ~ 1,
statea == 404 ~ 0,
statea == 411 ~ 0,
((statea == 420)&(year>1994)) ~ 0,
((statea == 420)&(year<=1994)) ~ 1,
((statea == 432)&(year<1992)) ~ 0,
((statea == 432)&(year >= 1992)) ~ 1,
((statea == 433)&(year<2000)) ~ 0,
((statea == 433)&(year >= 2000)) ~ 1,
((statea == 434)&(year<1991)) ~ 0,
((statea == 434)&(year >= 1991)) ~ 1,
statea ==435 ~ 0,
statea == 436 ~ 0,
statea == 437 ~ 0,
statea == 438 ~ 0,
statea == 439 ~ 0,
statea == 450 ~ 0,
statea == 451 ~ 0,
((statea == 452)&(year<2005)) ~ 0,
((statea == 452)&(year>=2005)) ~ 1,
statea == 461 ~ 0,
statea == 471 ~ 0,
statea == 475 ~ 0,
statea == 481 ~ 0,
statea == 482 ~ 0,
statea == 483 ~ 0,
statea == 484 ~ 0,
statea == 490 ~ 0,
statea == 500 ~ 0,
((statea == 501)&(year<=2002)) ~ 0,
((statea == 501)&(year >2002)) ~ 1,
statea == 510 ~ 0,
statea == 511 ~ 0,
((statea == 516)&(year<=2006)) ~ 0,
((statea == 516)&(year >2006)) ~ 1,
statea ==517 ~ 0,
((statea == 520)&(year<=1966)) ~ 1,
((statea == 520)&(year >1966)) ~ 0,
statea == 522 ~ 0,
statea == 530 ~ 0,
statea == 531 ~ 0,
statea == 540 ~ 0,
statea == 541 ~ 0,
statea ==551 ~ 0,
statea == 552 ~ 0,
statea == 553 ~ 0,
((statea == 560)&(year<1994)) ~ 0,
((statea == 560)&(year>=1994)) ~ 1,
statea == 565 ~ 1,
((statea == 570)&(year<1995)) ~ 0,
((statea == 570)&(year>=1995)) ~ 1,
statea == 571 ~ 1,
statea == 572 ~ 0,
((statea == 580)&(year<1991)) ~ 0,
((statea == 580)&(year >= 1991)) ~ 1,
statea == 600 ~ 0,
statea == 615 ~ 0,
statea == 616 ~ 0,
statea == 620 ~ 0,
statea == 625 ~ 0,
statea == 626 ~ 0,
statea == 630 ~ 0,
((statea == 640)&
((year == 1954)|(year==1955)|(year == 1956)|(year==1957)|(year == 1958)|(year==1960)|
(year==1961)|(year = 1971)|(year==1972)|(year==1980)|(year==1982)|(year==1983)|(year==1984)))~0,
statea == 640 ~ 1,
statea == 645 ~ 0,
statea == 651 ~ 0,
statea == 652 ~ 0,
((statea == 660)&(year<=2005)) ~ 0,
((statea == 660)&(year>2005)) ~ 1,
statea == 663 ~ 0,
statea == 666 ~ 1,
statea == 670 ~ 0,
statea == 678 ~ 0,
statea == 679 ~ 0,
statea == 680 ~ 0,
statea == 690 ~ 0,
statea == 692 ~ 0,
statea == 694 ~ 0,
statea == 696 ~ 0,
statea == 698 ~ 0,
statea == 700 ~ 0,
statea == 701 ~ 0,
statea == 702 ~ 0,
((statea == 703)&(year<=2010)) ~ 0,
statea == 704 ~ 0,
statea == 705 ~ 0,
statea == 710 ~ 0,
((statea == 713)&(year<=1991)) ~ 0,
((statea == 713)&(year>1991)) ~ 1,
((statea == 730)&(year<=1991)) ~ 0,
((statea == 730)&(year>1991)) ~ 1,
statea == 731 ~ 0,
((statea == 732)&(year<=1991)) ~ 0,
((statea == 732)&(year>1991)) ~ 1,
((statea == 740)&(year<=1950)) ~ 0,
((statea == 740)&(year>1950)) ~ 1,
statea == 750 ~ 1,
((statea == 770)&(year<1971)) ~ 0,
((statea == 770)&(year >= 1971)&(year<1977)) ~ 1,
((statea == 770)&(year >= 1977)&(year<1990))~0,
((statea == 770)&(year >= 1990)&(year<2000))~1,
((statea == 770)&(year >= 2000)&(year<=2009))~0,
((statea == 770)&(year>2009))~1,
((statea == 771)&(year<1972)) ~ 1,
((statea == 771)&(year >= 1972)&(year<=1990))~0,
((statea == 771)&(year > 1990)&(year<=2005))~1,
((statea == 771)&(year>2005))~0,
((statea == 775)&(year<=1962)) ~ 1,
((statea == 775)&(year>1962)) ~ 0,
((statea == 780)&(year<=1984)) ~ 1,
((statea == 780)&(year>1984)) ~ 0,
statea == 790 ~ 0,
((statea == 800)&(year<1991)) ~ 0,
((statea == 800)&(year >= 1991)&(year<2005))~1,
((statea == 800)&(year>=2005))~0,
statea == 811 ~ 0,
((statea == 812)&(year<=1960)) ~ 1,
((statea == 812)&(year>1960)) ~ 0,
statea == 816 ~ 0,
statea == 817 ~ 0,
((statea == 820)&(year<=1969)) ~ 1,
((statea == 820)&(year>1969)) ~ 0,
statea == 830 ~ 0,
((statea == 840)&(year<=1990)) ~ 0,
((statea == 840)&(year>1990)) ~ 1,
((statea ==850)&(year< 2000)) ~ 0,
((statea == 850)&(year>=2000)) ~ 1,
statea == 900 ~ 1,
statea == 910 ~ 0,
statea == 920 ~ 1,
statea == 940 ~ 1,
statea == 986 ~ 1,
TRUE ~ -1
))
Try make a reproducible example for me or someone can help you.
Or at least especify which ID represents Russia, or a print of the "wrong" dataframe.
Anyway, this case_when is really huge and easy to make mistakes.
I think in one way to resolve your problem, try this:
I create a fake my_list for the case my data was 1980 to 2020.
Create a new column empty, the valor doesn't matter for the first instance.
Now, you will go into a for that take every country and check the year, and attribute isDem if the year is or is not at the my_list (that represents the democracy)
I hope this works.
my_list <- list(`51` = c(1980:1985, 2000:2020),
`52` = c(1980:2020),
`54` = c(1980:2000, 2007:2020))
df$isDem <- NA
for(country in names(my_list)){
my_country = as.numeric(country) # This because the name of a list is a character
df <- df %>%
dplyr::mutate(isDem = case_when((statea == my_country) & (year %in% my_list[[country]]) ~ 1,
(statea == my_country) & !(year %in% my_list[[country]]) ~ 0,
TRUE ~ isDem
)
)
}
Unfortunately you example isn't reproducible, so I can't test if my solution works for you.
But even if has some mistakes, you can fix and try the logic.
Next time try to makean issue reproducible, please.