I need some help with statistical testing codes. Basically I'm trying to study the relationship between age, political standing and views on cannabis legalisation. The dataset is the British Social Attitudes survey 2010.
bsa_2010 <- read.csv("https://dl.dropboxusercontent.com/s/ubl9huokroj9jw8/bsa%202010.csv")
> dput(head(bsa_2010))
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("England",
"Scotland", "Wales"), class = "factor"), RSex = structure(c(1L,
1L, 2L, 2L, 2L, 1L), .Label = c("Female", "Male"), class = "factor"),
RAge = c(75L, 34L, 81L, 25L, 33L, 76L), MarStat = structure(c(4L,
4L, 2L, 3L, 3L, 5L), .Label = c("Living as married", "Married",
"Not married", "Separated or divorced after marrying", "Widowed"
), class = "factor"), ChildHh = structure(c(1L, 1L, 1L, 1L,
1L, 1L), .Label = c("No", "Yes"), class = "factor"), WhPaper = structure(c(8L,
8L, 8L, 11L, 12L, 8L), .Label = c("(Scottish) Daily Express",
"(Scottish) Daily Mail", "Daily Mirror/ Scottish Mirror",
"Daily Record", "Daily Star", "Daily Telegraph", "Financial Times",
"Skip,not read paper normally", "The Guardian", "The Independent",
"The Sun/ Scottish Sun", "The Times"), class = "factor"),
PartyIDN = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front",
"Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat",
"None", "Other answer (WRITE IN)", "Other party (WRITE IN)",
"Plaid Cymru", "Refused to say", "Scottish National Party",
"UK Independence Party (UKIP)/Veritas"), class = "factor"),
Partyid1 = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front",
"Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat",
"None", "Other answer", "Other party", "Plaid Cymru", "Refusal",
"Scottish National Party", "UK Independence Party (UKIP)/Veritas"
), class = "factor"), PartyId2 = structure(c(1L, 5L, 1L,
4L, 1L, 4L), .Label = c("Conservative", "Green Party", "Labour",
"Liberal Democrat", "None", "Other party", "Other/DK/Ref"
), class = "factor"), Spend1 = structure(c(3L, 4L, 4L, 3L,
3L, 4L), .Label = c("(None of these)", "Defence", "Education",
"Health", "Help for industry", "Housing", "Overseas aid",
"Police and prisons", "Public transport", "Roads", "Social security benefits"
), class = "factor"), Spend2 = structure(c(6L, 3L, 2L, 4L,
9L, 10L), .Label = c("(None of these)", "Defence", "Education",
"Health", "Help for industry", "Housing", "Overseas aid",
"Police and prisons", "Public transport", "Roads", "Skip,no 1st priority",
"Social security benefits"), class = "factor"), RClassGp = structure(c(4L,
6L, 1L, 2L, 4L, 6L), .Label = c("Employers in small org; own account workers",
"Intermediate occupations", "Lower supervisory & technical occupations",
"Managerial & professional occups", "Not classifiable", "Semi-routine & routine occupations",
"Skip, never had a job+DK+NA last job"), class = "factor"),
RNSSECG = structure(c(4L, 8L, 9L, 3L, 4L, 8L), .Label = c("1.1",
"1.2", "Intermediate occupations", "Lower managerial and professional occupations",
"Lower supervisory & technical occupations", "Not classified",
"Routine occupations", "Semi-routine Occupations", "Small employers and own account workers"
), class = "factor"), CanLegal = structure(c(1L, 1L, 1L,
2L, 2L, 1L), .Label = c("Taking cannabis should remain illegal",
"should be legal, only licenced shops"), class = "factor"),
RaceOri3 = structure(c(10L, 10L, 10L, 10L, 10L, 10L), .Label = c("ASIAN: of Bangladeshi origin",
"ASIAN: of Chinese origin", "ASIAN: of Indian origin", "ASIAN: of Pakistani origin",
"ASIAN: of other origin (WRITE IN)", "BLACK: of African origin",
"BLACK: of Caribbean origin", "MIXED ORIGIN (WRITE IN)",
"OTHER (WRITE IN)", "WHITE: of any origin"), class = "factor"),
Agecat1 = structure(c(6L, 2L, 7L, 1L, 2L, 6L), .Label = c("(18,28]",
"(28,38]", "(38,48]", "(48,58]", "(58,68]", "(68,78]", "(78,88]",
"(88,98]"), class = "factor"), Agecat2 = structure(c(3L,
1L, 4L, 1L, 1L, 3L), .Label = c("(18,38]", "(38,58]", "(58,78]",
"(78,98]"), class = "factor")), .Names = c("Country", "RSex",
"RAge", "MarStat", "ChildHh", "WhPaper", "PartyIDN", "Partyid1",
"PartyId2", "Spend1", "Spend2", "RClassGp", "RNSSECG", "CanLegal",
"RaceOri3", "Agecat1", "Agecat2"), row.names = c(NA, 6L), class = "data.frame")
Variables used are: RAge (Age) PartyIDN (Political party identified) CanLegal (Cannabis Legalisation)
To simplify it, I've put ages into categories and kept only two of the biggest political parties.
bsa_2010$Agecat1 <- cut(bsa_2010$RAge, c(18,28,38,48,58,68,78,88,98))
Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")
Parties$PartyIDN <- factor(Parties$PartyIDN)
How would I carry out statistical testing to get P values on the relationship between age, politics and how they view cannabis legalisation?
Any help would be welcome, thank you!
Looks like you also need some help with the basics so I would look here https://stats.idre.ucla.edu/r/dae/logit-regression/ for the statistics part but the R part is easy (I'm going to consider your new age categories a factor not a continuous variable)...
bsa_2010$Agecat1 <- cut(bsa_2010$RAge, c(18,28,38,48,58,68,78,88,98))
Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")
Parties$PartyIDN <- factor(Parties$PartyIDN)
str(Parties)
Parties$Agecat1<-factor(Parties$Agecat1)
firstattempt <- glm(CanLegal ~ Agecat1 + PartyIDN, data = Parties, family = "binomial")
summary(firstattempt)
For what it's worth your cuts on Age don't help make the R
commands or the statistics easier. You could have just as easily done:
secondattempt <- glm(CanLegal ~ RAge + PartyIDN, data = Parties, family = "binomial")
summary(secondattempt)
It actually makes interpreting the data easier, which supports the hypothesis that age matters to the outcome but political party doesn't. You can see that with these very simple graphs
mosaicplot(xtabs(~CanLegal + PartyIDN, data = Parties))
mosaicplot(xtabs(~CanLegal + Agecat1, data = Parties))