##############################
#
# Script: Solutions 9 (assignment09a.R)
#
##############################


ssm <- read.csv("http://courses.kvasaheim.com/stat40x3/data/ssm.csv", header=TRUE)
names(ssm)
summary(ssm)


####################
# Problem 09.1a

shapiro.test(ssm$religiosity[ssm$south==0])  # Whoops!
shapiro.test(ssm$religiosity[ssm$south==1])  # Whoops!

with(ssm, ks.test(religiosity[south==1],religiosity[south==0]) )

# But wait! We must take the location shift into consideration!

grp1 <- ssm$religiosity[ssm$south==1]
grp0 <- ssm$religiosity[ssm$south==0]

grp1 <- grp1-mean(grp1)
grp0 <- grp0-mean(grp0)

ks.test(grp0, grp1)

# No significant difference, so we can use out non-parametric tests

m091.1a <- kruskal.test(religiosity ~ south, data=ssm)
print(m091.1a)
m091.1b <- wilcox.test(religiosity ~ south, data=ssm)
m091.1b

# The p-value is not close to our alpha, so we can make a safe conclusion
# that the South is different. BUT, which way? Neither test tells us that.
# So, we have to compare means (or medians) to get the right direction.

mean(ssm$religiosity[ssm$south==1])  
mean(ssm$religiosity[ssm$south==0])

# Thus, the South is higher than the non-South




####################
# Problem 09.1b

shapiro.test(ssm$pctfavor[ssm$south==0])
shapiro.test(ssm$pctfavor[ssm$south==1])  # Whoops!

bartlett.test(pctfavor ~ south, data=ssm)
fligner.test(pctfavor ~ south, data=ssm)

# You can argue that you are allowed to use AOV, but you
# must make the argument.

# Similarly, you must argue that you should use the
# non-parametric test


# Check the assumption of the Kruskal-Wallis test

grp1 <- ssm$pctfavor[ssm$south==1]
grp0 <- ssm$pctfavor[ssm$south==0]

grp1 <- grp1-mean(grp1)
grp0 <- grp0-mean(grp0)

ks.test(grp0, grp1)

# So, the assumption of the Kruskal-Wallis test is met

m092.1 <- aov(pctfavor ~ south, data=ssm)
summary(m092.1)
m092.2 <- kruskal.test(pctfavor ~ south, data=ssm)
print(m092.2)

# or

m092.1b <- t.test(pctfavor ~ south, data=ssm)
m092.1b
m092.2b <- wilcox.test(pctfavor ~ south, data=ssm)
m092.2b


# Conclusion: different support for ssm ballot measures in the
# South than elsewhere in the United States.





####################
# Problem 09.1c

# Same analysis as above, but with civilunion, not south

shapiro.test(ssm$pctfavor[ssm$civilunion==0])
shapiro.test(ssm$pctfavor[ssm$civilunion==1])

bartlett.test(pctfavor ~ civilunion, data=ssm)
fligner.test(pctfavor ~ civilunion, data=ssm)

# You can argue that you are allowed to use AOV, but you
# must make the argument.

m092.2 <- aov(pctfavor ~ civilunion, data=ssm)
summary(m092.2)

# or

m092.2b <- t.test(pctfavor ~ civilunion, data=ssm)
m092.2b


# So, the conclusion is that there is no statistically
# significant difference between the civilunion groups
# in terms of vote support.






####################
# Problem 09.2

ssm$propfavor <- ssm$pctfavor/100
model094.1 <- glm( propfavor~year*civilunion*religiosity, data=ssm, family=binomial(link=logit))
summary(model094.1)



state <- data.frame(year=2011, civilunion=1, religiosity=3)
pred.vote <- predict(model094.1, newdata=state)
pred.vote <- logit.inv(pred.vote)
pred.vote     # = 55.16%



plot(pctfavor ~ year, data=ssm, xlab="Year", ylab="Percent Voting for Bill", 
  las=1, ylim=c(0,100), xlim=c(1995,2015) )

newyear <- 1995:2015
pred.vote <- predict(model094.1, newdata=data.frame(year=newyear, civilunion=1, religiosity=3))
pred.vote <- logit.inv(pred.vote)*100

lines(newyear, pred.vote, lwd=2, col=2)

# Extra:
pred.vote <- predict(model094.1, newdata=data.frame(year=newyear, civilunion=0, religiosity=3))
pred.vote <- logit.inv(pred.vote)*100
lines(newyear, pred.vote, lwd=2, col=4)

legend("bottomleft", c("Restricting Civil Unions","Not Restricting Civil Unions"),
  col=c(2,4), lwd=2, bty="n")




#####
# Alternatively, if you decided to plot the interaction model:


model094.1 <- glm( propfavor~year*civilunion*religiosity, data=ssm, family=binomial(link=logit))
summary(model094.1)


state <- data.frame(year=2011, civilunion=1, religiosity=3)
pred.vote <- predict(model094.1, newdata=state)
pred.vote <- logit.inv(pred.vote)
pred.vote  # = 55.16%


plot(pctfavor ~ year, data=ssm, xlab="Year", ylab="Percent Voting for Bill", 
  las=1, ylim=c(0,100), xlim=c(1995,2015) )

newyear <- 1995:2015

pred.vote <- predict(model094.1, newdata=data.frame(year=newyear, civilunion=1, religiosity=3))
pred.vote <- logit.inv(pred.vote)*100
lines(newyear, pred.vote, lwd=2, col=2)

pred.vote <- predict(model094.1, newdata=data.frame(year=newyear, civilunion=0, religiosity=3))
pred.vote <- logit.inv(pred.vote)*100
lines(newyear, pred.vote, lwd=2, col=4)

legend("bottomleft", c("Restricting Civil Unions","Not Restricting Civil Unions"),
  col=c(2,4), lwd=2, bty="n")