Search code examples
rstatisticsspss

weight data with R Part II


Given is the following data frame:

structure(list(UH6401 = c(1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 
1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 
0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 
1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 
1, 0, 1, 1), UH6402 = c(1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 
0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 
1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 
0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 
1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 
0, 1, 1), UH6403 = c(1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 
1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 
1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 
1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 
0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 
1, 1), UH6404 = c(0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 
0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 
1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 
1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 
0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 
1), UH6409 = c(1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 
1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 
0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 
1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 
1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0
), UH6410 = c(1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 
1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 
1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 
1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 
0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0
), UH6411 = c(0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 
1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 
0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 
1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1
), UH6412 = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 
1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1
), UH6503 = c(1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 
1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 
1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1
), UH66 = c(1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 
    UH68 = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 
    0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 
    0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), UH6501a = c(1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), UH6405a = c(1, 
    0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 
    0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 
    0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 
    1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 
    1, 0, 1, 1), UH6407a = c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
    1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 
    1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 
    1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 
    0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1), weight = c(405.002592353822, 
    479.360356183825, 526.548105855472, 810.005184707644, 312.321528531308, 
    930.961115757095, 567.383058387095, 475.323944260643, 1226.91439266118, 
    517.086839792615, 1200.2669656949, 810.005184707644, 656.723784884795, 
    605.370463928298, 668.467435759576, 558.112457492436, 793.751055244424, 
    479.360356183825, 1226.91439266118, 1606.54816212786, 1657.48609449633, 
    300.803580980276, 605.370463928298, 1140.55078447979, 669.102760422943, 
    810.005184707644, 1657.48609449633, 305.569853371963, 2994.30343152033, 
    762.922030382216, 479.360356183825, 1147.36030437824, 668.467435759576, 
    517.086839792615, 479.360356183825, 399.141865860217, 656.723784884795, 
    913.364738988386, 312.321528531308, 569.10576379231, 775.630259688922, 
    1207.22952429547, 1053.09621171094, 1140.55078447979, 314.857225320909, 
    668.467435759576, 2416.57081451012, 573.680152189121, 396.875527622212, 
    605.370463928298, 1036.3159447043, 3088.62283807823, 569.10576379231, 
    1140.55078447979, 2416.57081451012, 1147.36030437824, 762.922030382216, 
    702.064141140629, 351.032070570315, 629.714450641817, 517.086839792615, 
    1996.20228768022, 828.743047248167, 475.323944260643, 920.185794495882, 
    793.751055244424, 796.08788273764, 1197.42559758065, 405.002592353822, 
    418.584343119327, 300.803580980276, 654.76828203733, 2740.09421696516, 
    351.032070570315, 1069.6202614693, 2094.91447516374, 399.141865860217, 
    654.76828203733, 1003.65414063441, 573.680152189121, 851.074587580641, 
    913.364738988386, 762.922030382216, 1034.17367958523, 573.680152189121, 
    479.360356183825, 3208.8607844079, 654.76828203733, 908.055695892447, 
    328.361892442398, 1036.3159447043, 702.064141140629, 613.457196330588, 
    601.607161960551, 567.383058387095, 479.360356183825, 306.261087672466, 
    920.185794495882, 654.76828203733, 828.743047248167)), .Names = c("UH6401", 
"UH6402", "UH6403", "UH6404", "UH6409", "UH6410", "UH6411", "UH6412", 
"UH6503", "UH66", "UH68", "UH6501a", "UH6405a", "UH6407a", "weight"
), row.names = c(NA, 100L), class = "data.frame")

In social science we often have a weight variable to weight a case (row) by the factor of that variable to correct the sample to fit e.g. the population by age classes. If the weight variable of a row is "1.6" it means that this row need do be observed 1.6 times to fit the basis population.

In SPSS I would write

WEIGHT BY weight. 

and all procedures after that command will weight the data accordingly.

In R I can do that with stabs with the command

xtabs(weight ~ UH6401, data=df)

But what if I want to do a SVD or PCA analysis? Here there is no function to weight data like it is in xtabs.

So the question is, is there a method to weight data in R like it is possible in SPSS? The point with whole numbers would be easy, with the factor "2" we would just double the line, but what is with all the factors that are decimal?


UPDATE:

The SVD or PCA was just an example! Take any other statistical procedure. In social science the samples are never perfect, but to do an statistical analysis with sample data, the sample needs to represent the basic population, but a sample mostly doesn't. So we try to fix that deficit with weights, so the sample represent the basic population!


Solution

  • You probably need to get acquainted with the search engines for R. Baron's RSiteSearch and Rseek: This is one of the first hits on "weighted PCA" at Baron's site:

    http://finzi.psych.upenn.edu/R/library/aroma.light/html/wpca.matrix.html

    With the clarification in the comment to Joris Meys response, the answer is often that one needs to be clear that one is desires sample weights versus other types of weighting. Regression weighting is done with the survey package. Lumley's book on survey methods distinguishes among three types of weights. (The "weights" in the lm function are variance weights, NOT sample weights.)

    Note: Both PCA and factor analysis (experimental) are included in the survey package. So maybe Dominick's question requestiong a unified approach to weighting in regression methods has a single "answer".