Search code examples
rsynth

How to fix this error: variable NOT found as character variable in synth package?


I am using Synth() package (see ftp://cran.r-project.org/pub/R/web/packages/Synth/Synth.pdf) in R.

This is a part of my data frame:

df <- structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Aruba", 
"Angola", "Anguilla", "Albania", "United Arab Emirates", "Argentina", 
"Armenia", "Antigua and Barbuda", "Australia", "Austria", "Azerbaijan", 
"Burundi", "Belgium", "Benin", "Burkina Faso", "Bangladesh", 
"Bulgaria", "Bahrain", "Bahamas", "Bosnia and Herzegovina", "Belarus", 
"Belize", "Bermuda", "Bolivia (Plurinational State of)", "Brazil", 
"Barbados", "Brunei Darussalam", "Bhutan", "Botswana", "Central African Republic", 
"Canada", "Switzerland", "Chile", "China", "Cote d'Ivoire", "Cameroon", 
"Congo, Democratic Republic", "Congo", "Colombia", "Comoros", 
"Cabo Verde", "Costa Rica", "Curacao", "Cayman Islands", "Cyprus", 
"Czech Republic", "Germany", "Djibouti", "Dominica", "Denmark", 
"Dominican Republic", "Algeria", "Ecuador", "Egypt", "Spain", 
"Estonia", "Ethiopia", "Finland", "Fiji", "France", "Gabon", 
"United Kingdom", "Georgia", "Ghana", "Guinea", "Gambia", "Guinea-Bissau", 
"Equatorial Guinea", "Greece", "Grenada", "Guatemala", "Guyana", 
"China, Hong Kong SAR", "Honduras", "Croatia", "Haiti", "Hungary", 
"Indonesia", "India", "Ireland", "Iran (Islamic Republic of)", 
"Iraq", "Iceland", "Israel", "Italy", "Jamaica", "Jordan", "Japan", 
"Kazakhstan", "Kenya", "Kyrgyzstan", "Cambodia", "Saint Kitts and Nevis", 
"Republic of Korea", "Kuwait", "Lao People's DR", "Lebanon", 
"Liberia", "Saint Lucia", "Sri Lanka", "Lesotho", "Lithuania", 
"Luxembourg", "Latvia", "China, Macao SAR", "Morocco", "Republic of Moldova", 
"Madagascar", "Maldives", "Mexico", "North Macedonia", "Mali", 
"Malta", "Myanmar", "Montenegro", "Mongolia", "Mozambique", "Mauritania", 
"Montserrat", "Mauritius", "Malawi", "Malaysia", "Namibia", "Niger", 
"Nigeria", "Nicaragua", "Netherlands", "Norway", "Nepal", "New Zealand", 
"Oman", "Pakistan", "Panama", "Peru", "Philippines", "Poland", 
"Portugal", "Paraguay", "State of Palestine", "Qatar", "Romania", 
"Russian Federation", "Rwanda", "Saudi Arabia", "Sudan", "Senegal", 
"Singapore", "Sierra Leone", "El Salvador", "Serbia", "Sao Tome and Principe", 
"Suriname", "Slovakia", "Slovenia", "Sweden", "Eswatini", "Sint Maarten (Dutch part)", 
"Seychelles", "Syrian Arab Republic", "Turks and Caicos Islands", 
"Chad", "Togo", "Thailand", "Tajikistan", "Turkmenistan", "Trinidad and Tobago", 
"Tunisia", "Turkey", "Taiwan", "U.R. of Tanzania: Mainland", 
"Uganda", "Ukraine", "Uruguay", "United States of America", "Uzbekistan", 
"St. Vincent & Grenadines", "Venezuela (Bolivarian Republic of)", 
"British Virgin Islands", "Viet Nam", "Yemen", "South Africa", 
"Zambia", "Zimbabwe"), class = "factor"), isocode = structure(c(1L, 
1L, 1L, 1L, 1L, 1L), .Label = c("ABW", "AGO", "AIA", "ALB", "ARE", 
"ARG", "ARM", "ATG", "AUS", "AUT", "AZE", "BDI", "BEL", "BEN", 
"BFA", "BGD", "BGR", "BHR", "BHS", "BIH", "BLR", "BLZ", "BMU", 
"BOL", "BRA", "BRB", "BRN", "BTN", "BWA", "CAF", "CAN", "CHE", 
"CHL", "CHN", "CIV", "CMR", "COD", "COG", "COL", "COM", "CPV", 
"CRI", "CUW", "CYM", "CYP", "CZE", "DEU", "DJI", "DMA", "DNK", 
"DOM", "DZA", "ECU", "EGY", "ESP", "EST", "ETH", "FIN", "FJI", 
"FRA", "GAB", "GBR", "GEO", "GHA", "GIN", "GMB", "GNB", "GNQ", 
"GRC", "GRD", "GTM", "GUY", "HKG", "HND", "HRV", "HTI", "HUN", 
"IDN", "IND", "IRL", "IRN", "IRQ", "ISL", "ISR", "ITA", "JAM", 
"JOR", "JPN", "KAZ", "KEN", "KGZ", "KHM", "KNA", "KOR", "KWT", 
"LAO", "LBN", "LBR", "LCA", "LKA", "LSO", "LTU", "LUX", "LVA", 
"MAC", "MAR", "MDA", "MDG", "MDV", "MEX", "MKD", "MLI", "MLT", 
"MMR", "MNE", "MNG", "MOZ", "MRT", "MSR", "MUS", "MWI", "MYS", 
"NAM", "NER", "NGA", "NIC", "NLD", "NOR", "NPL", "NZL", "OMN", 
"PAK", "PAN", "PER", "PHL", "POL", "PRT", "PRY", "PSE", "QAT", 
"ROU", "RUS", "RWA", "SAU", "SDN", "SEN", "SGP", "SLE", "SLV", 
"SRB", "STP", "SUR", "SVK", "SVN", "SWE", "SWZ", "SXM", "SYC", 
"SYR", "TCA", "TCD", "TGO", "THA", "TJK", "TKM", "TTO", "TUN", 
"TUR", "TWN", "TZA", "UGA", "UKR", "URY", "USA", "UZB", "VCT", 
"VEN", "VGB", "VNM", "YEM", "ZAF", "ZMB", "ZWE"), class = "factor"), 
    year = 1950:1955, currency = structure(c(4L, 4L, 4L, 4L, 
    4L, 4L), .Label = c("Algerian Dinar", "Argentine Peso", "Armenian Dram", 
    "Aruban Guilder", "Australian Dollar", "Azerbaijanian Manat", 
    "Bahamian Dollar", "Bahraini Dinar", "Baht", "Balboa", "Barbados Dollar", 
    "Belarussian Ruble", "Belize Dollar", "Bermudian Dollar", 
    "Bolivar Fuerte", "Boliviano", "Brazilian Real", "Brunei Dollar", 
    "Bulgarian Lev", "Burundi Franc", "CFA Franc BCEAO", "CFA Franc BEAC", 
    "Cabo Verde Escudo", "Canadian Dollar", "Cayman Islands Dollar", 
    "Cedi", "Chilean Peso", "Colombian Peso", "Comoro Franc", 
    "Convertible Marks", "Cordoba Oro", "Costa Rican Colon", 
    "Croatian Kuna", "Czech Koruna", "Dalasi", "Danish Krone", 
    "Denar", "Djibouti Franc", "Dobra", "Dominican Peso", "Dong", 
    "East Caribbean Dollar", "Egyptian Pound", "Ethiopian Birr", 
    "Euro", "Fiji Dollar", "Forint", "Franc Congolais", "Gourde", 
    "Guarani", "Guinea Franc", "Guyana Dollar", "Hong Kong Dollar", 
    "Hryvnia", "Iceland Krona", "Indian Rupee", "Iranian Rial", 
    "Iraqi Dinar", "Jamaican Dollar", "Jordanian Dinar", "Kenyan Shilling", 
    "Kip", "Kuwaiti Dinar", "Kwacha", "Kwanza", "Kyat", "Lari", 
    "Lebanese Pound", "Lek", "Lempira", "Leone", "Lilangeni", 
    "Loti", "Malagasy Ariary", "Malaysian Ringgit", "Manat", 
    "Mauritius Rupee", "Metical", "Mexican Peso", "Moldovan Leu", 
    "Moroccan Dirham", "Naira", "Namibian Dollar", "Nepalese Rupee", 
    "Netherlands Antillian Guilder", "New Israeli Sheqel", "New Leu", 
    "New Taiwan Dollar", "New Turkish Lira", "New Zealand Dollar", 
    "Ngultrum", "Norwegian Krone", "Nuevo Sol", "Ouguiya", "Pakistan Rupee", 
    "Pataca", "Peso Uruguayo", "Philippine Peso", "Pound Sterling", 
    "Pula", "Qatari Rial", "Quetzal", "Rand", "Rial Omani", "Riel", 
    "Rufiyaa", "Rupiah", "Russian Ruble", "Rwanda Franc", "Saudi Riyal", 
    "Serbian Dinar", "Seychelles Rupee", "Singapore Dollar", 
    "Som", "Somoni", "Sri Lanka Rupee", "Sudanese Pound", "Surinam Dollar", 
    "Swedish Krona", "Swiss Franc", "Syrian Pound", "Taka", "Tanzanian Shilling", 
    "Tenge", "Trinidad and Tobago Dollar", "Tugrik", "Tunisian Dinar", 
    "UAE Dirham", "US Dollar", "Uganda Shilling", "Uzbekistan Sum", 
    "Won", "Yemeni Rial", "Yen", "Yuan Renminbi", "Zloty"), class = "factor"), 
    rgdpe = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), rgdpo = c(NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_), pop = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_), emp = c(NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_), avh = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), hc = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ccon = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), cda = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), cgdpe = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), cgdpo = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), cn = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ck = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ctfp = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), cwtfp = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), rgdpna = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), rconna = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), rdana = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), rnna = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), rkna = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), rtfpna = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), rwtfpna = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), labsh = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), irr = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), delta = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), xr = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), pl_con = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), pl_da = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), pl_gdpo = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), i_cig = structure(c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), .Label = c("extrapolated", "benchmark", "interpolated", 
    "ICPPPP-benchmark+interpolated", "ICPPPP-extrapolated"), class = "factor"), 
    i_xm = structure(c(NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_), .Label = c("extrapolated", 
    "benchmark", "interpolated"), class = "factor"), i_xr = structure(c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), .Label = c("market", "estimated"), class = "factor"), 
    i_outlier = structure(c(NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_), .Label = c("no", 
    "yes"), class = "factor"), i_irr = structure(c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
    ), .Label = c("regular", "lowcapital", "lowerbound", "outlier"
    ), class = "factor"), cor_exp = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_), statcap = c(NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_), csh_c = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), csh_i = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), csh_g = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), csh_x = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), csh_m = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), csh_r = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), pl_c = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), pl_i = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), pl_g = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), pl_x = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), pl_m = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), pl_n = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), pl_k = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), id = c(1L, 
    1L, 1L, 1L, 1L, 1L)), row.names = c("ABW-1950", "ABW-1951", 
"ABW-1952", "ABW-1953", "ABW-1954", "ABW-1955"), class = "data.frame")

Now I want to run the following code:

library(dplyr)
library(ggplot2)
library(Synth)
library(pwt10)
#Experimental design

df <- pwt10.0 %>%
  mutate(id = group_indices_(pwt10.0, .dots=c('isocode')))  


comparison_states <- c("ARM", "AUS", "CAN", "CHN",
                                             "GBR", "ITA", "JPN", "LUX",
                                             "NOR", "NZL", "SGP", "SWE",
                                             "THA", "TWN", "USA")


control_ids<- df %>% 
  select(isocode, id) %>%
  filter(isocode %in% comparison_states) %>% 
  distinct() %>%
  pull(id)
  
dataprep.out<-dataprep(
        foo = as.data.frame(df),
        predictors = c("rgdpe", "avh", "rconna", "rtfpna", "rkna", "emp"),
        predictors.op = "mean",
        dependent = "labsh",
        unit.variable = "id",
        time.variable = "year",
        treatment.identifier = 94,
        controls.identifier = control_ids,
        time.predictors.prior = c(1991:1997),
        time.optimize.ssr = c(1995:1997),
        special.predictors = list(list("labsh", 1992:1997 ,"mean")),
        unit.names.variable = "isocode",
        time.plot = 1992:2005
    )

It keeps generating the following error message: Error in dataprep(foo = as.data.frame(df), predictors = c("rgdpe", "avh", : unit.names.variable not found as character variable in foo.

I think it should be working because isocode has string values. But I don't know why and want to fix this issue.


Solution

  • I cannot tell you what's going on behind the scenes, but I think that Synth wants a few things:

    First, turn factor variables into characters;

    df <- df %>% mutate_if(is.factor, as.character)
    

    Second, make sure you don't have too many NA values -- I'm replacing your NAs with 1s just to get the code to run;

    df[is.na(df)] <- 1
    

    Third, make sure your predictors are numeric.

    predictors <- c("rgdpe", "avh", "rconna", "rtfpna", "rkna", "emp")
    df[,predictors] <- sapply(df[,predictors],as.numeric)
    

    That is sufficient for me to be able to generate dataprep.out. Does that help?