Search code examples
rfixed-widthreadr

R: "Error: unexpected string constant in" with read_fwf()


I am trying to read a fixed width file from U.S. Census Bureau into R using read_fwf(). I keep getting an error in the same place in the list of column names. I have tried to change the particular column name at the location in multiple attempts and R keeps throwing the error. I restarted R to a new session and I keep getting the error. In the list of column names, it's the 39th item that seems to have the problem. I've changed the name of the 39th, and sometimes the 38th, position in one of the attempts I've included in code. The first line of code in the code block has the original column name values. In that line, the 39th name is "cbsac", but the error prints that as "... "" ". It's close to the name in the 38th position, "cbsa", but a lot of the names in succession in other parts of the list are very similar and they don't cause an error. I don't know what that is supposed to indicate. Does "cbsac" mean something in R that I'm not aware of?

library(readr)

> tf <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths( c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5, 2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5, 2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5, 2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 8, 8, 8, 8,  8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18), c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno", "region", "division", "state", "county", "countycc", "countysc", "cousub",  "cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp",  "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc",  "cbsa", "cbsac", "metdiv", "csa", "necta", "nectasc", "nectadiv" "cnecta", "cbsapci", "nectapci", "ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2", "zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr", "name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc", "partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens", "concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115", "sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc", "cnectasc", "memi", "nmemi", "puma", "reserved")))
Error: unexpected string constant in ""tract", "blkgrp",  "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc",  "cbsa", ""
> tf <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths( c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5, 2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5, 2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5, 2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 8, 8, 8, 8,  8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18), c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno", "region", "division", "state", "county", "countycc", "countysc", "cousub",  "cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp",  "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc",  "BCas", "CBsac", "metdiv", "csa", "necta", "nectasc", "nectadiv" "cnecta", "cbsapci", "nectapci", "ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2", "zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr", "name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc", "partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens", "concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115", "sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc", "cnectasc", "memi", "nmemi", "puma", "reserved")))
Error: unexpected string constant in ""tract", "blkgrp",  "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc",  "BCas", ""

> sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17763)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] readr_1.3.1

loaded via a namespace (and not attached):
[1] compiler_3.6.1  backports_1.1.5 R6_2.4.0        hms_0.5.1      
[5] pillar_1.4.2    tibble_2.1.3    Rcpp_1.0.2      crayon_1.3.4   
[9] vctrs_0.2.0     zeallot_0.1.0   pkgconfig_2.0.3 rlang_0.4.0    

This links to a zip that has the source file. The file is "orgeo2010.sf1". I should have said, the zip is kind of big. Sorry about that.


Solution

  • Does this fix your issue?

    widths <- c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5,
    2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5,
    2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5,
    2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 
    8, 8, 8, 8,  8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18)
    
    vars <- c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno",
    "region", "division", "state", "county", "countycc", "countysc", "cousub",
    "cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp",  "block",
    "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli",
    "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "cbsa", "cbsac",
    "metdiv", "csa", "necta", "nectasc", "nectadiv", "cnecta", "cbsapci", "nectapci",
    "ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2",
    "zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr",
    "name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc",
    "partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens",
    "concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115",
    "sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc",
    "cnectasc", "memi", "nmemi", "puma", "reserved")
    
    td <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths(widths)
    
    names(td) <- vars
    

    The unexpected string constant is caused by not defining the character vector correctly (you were missing a comma)