############################## # # Script: Solutions 9 (assignment09a.R) # ############################## ssm <- read.csv("http://courses.kvasaheim.com/stat40x3/data/ssm.csv", header=TRUE) names(ssm) summary(ssm) #################### # Problem 09.1a shapiro.test(ssm$religiosity[ssm$south==0]) # Whoops! shapiro.test(ssm$religiosity[ssm$south==1]) # Whoops! with(ssm, ks.test(religiosity[south==1],religiosity[south==0]) ) # But wait! We must take the location shift into consideration! grp1 <- ssm$religiosity[ssm$south==1] grp0 <- ssm$religiosity[ssm$south==0] grp1 <- grp1-mean(grp1) grp0 <- grp0-mean(grp0) ks.test(grp0, grp1) # No significant difference, so we can use out non-parametric tests m091.1a <- kruskal.test(religiosity ~ south, data=ssm) print(m091.1a) m091.1b <- wilcox.test(religiosity ~ south, data=ssm) m091.1b # The p-value is not close to our alpha, so we can make a safe conclusion # that the South is different. BUT, which way? Neither test tells us that. # So, we have to compare means (or medians) to get the right direction. mean(ssm$religiosity[ssm$south==1]) mean(ssm$religiosity[ssm$south==0]) # Thus, the South is higher than the non-South #################### # Problem 09.1b shapiro.test(ssm$pctfavor[ssm$south==0]) shapiro.test(ssm$pctfavor[ssm$south==1]) # Whoops! bartlett.test(pctfavor ~ south, data=ssm) fligner.test(pctfavor ~ south, data=ssm) # You can argue that you are allowed to use AOV, but you # must make the argument. # Similarly, you must argue that you should use the # non-parametric test # Check the assumption of the Kruskal-Wallis test grp1 <- ssm$pctfavor[ssm$south==1] grp0 <- ssm$pctfavor[ssm$south==0] grp1 <- grp1-mean(grp1) grp0 <- grp0-mean(grp0) ks.test(grp0, grp1) # So, the assumption of the Kruskal-Wallis test is met m092.1 <- aov(pctfavor ~ south, data=ssm) summary(m092.1) m092.2 <- kruskal.test(pctfavor ~ south, data=ssm) print(m092.2) # or m092.1b <- t.test(pctfavor ~ south, data=ssm) m092.1b m092.2b <- wilcox.test(pctfavor ~ south, data=ssm) m092.2b # Conclusion: different support for ssm ballot measures in the # South than elsewhere in the United States. #################### # Problem 09.1c # Same analysis as above, but with civilunion, not south shapiro.test(ssm$pctfavor[ssm$civilunion==0]) shapiro.test(ssm$pctfavor[ssm$civilunion==1]) bartlett.test(pctfavor ~ civilunion, data=ssm) fligner.test(pctfavor ~ civilunion, data=ssm) # You can argue that you are allowed to use AOV, but you # must make the argument. m092.2 <- aov(pctfavor ~ civilunion, data=ssm) summary(m092.2) # or m092.2b <- t.test(pctfavor ~ civilunion, data=ssm) m092.2b # So, the conclusion is that there is no statistically # significant difference between the civilunion groups # in terms of vote support. #################### # Problem 09.2 ssm$propfavor <- ssm$pctfavor/100 model094.1 <- glm( propfavor~year*civilunion*religiosity, data=ssm, family=binomial(link=logit)) summary(model094.1) state <- data.frame(year=2011, civilunion=1, religiosity=3) pred.vote <- predict(model094.1, newdata=state) pred.vote <- logit.inv(pred.vote) pred.vote # = 55.16% plot(pctfavor ~ year, data=ssm, xlab="Year", ylab="Percent Voting for Bill", las=1, ylim=c(0,100), xlim=c(1995,2015) ) newyear <- 1995:2015 pred.vote <- predict(model094.1, newdata=data.frame(year=newyear, civilunion=1, religiosity=3)) pred.vote <- logit.inv(pred.vote)*100 lines(newyear, pred.vote, lwd=2, col=2) # Extra: pred.vote <- predict(model094.1, newdata=data.frame(year=newyear, civilunion=0, religiosity=3)) pred.vote <- logit.inv(pred.vote)*100 lines(newyear, pred.vote, lwd=2, col=4) legend("bottomleft", c("Restricting Civil Unions","Not Restricting Civil Unions"), col=c(2,4), lwd=2, bty="n") ##### # Alternatively, if you decided to plot the interaction model: model094.1 <- glm( propfavor~year*civilunion*religiosity, data=ssm, family=binomial(link=logit)) summary(model094.1) state <- data.frame(year=2011, civilunion=1, religiosity=3) pred.vote <- predict(model094.1, newdata=state) pred.vote <- logit.inv(pred.vote) pred.vote # = 55.16% plot(pctfavor ~ year, data=ssm, xlab="Year", ylab="Percent Voting for Bill", las=1, ylim=c(0,100), xlim=c(1995,2015) ) newyear <- 1995:2015 pred.vote <- predict(model094.1, newdata=data.frame(year=newyear, civilunion=1, religiosity=3)) pred.vote <- logit.inv(pred.vote)*100 lines(newyear, pred.vote, lwd=2, col=2) pred.vote <- predict(model094.1, newdata=data.frame(year=newyear, civilunion=0, religiosity=3)) pred.vote <- logit.inv(pred.vote)*100 lines(newyear, pred.vote, lwd=2, col=4) legend("bottomleft", c("Restricting Civil Unions","Not Restricting Civil Unions"), col=c(2,4), lwd=2, bty="n")