##############################
#
# Script: assignment12a.R
#   Answers to the twelfth (and final) homework assignment
#
##############################


# Preamble (as usual)
library(RFS)    # usual
library(nnet)   # for multinomial regression
library(MASS)   # for ordinal regression
library(lmtest) # for likelihood ratio tests

# get data 
gss <- read.csv("http://courses.kvasaheim.com/pols6123/data/gss2.csv")
attach(gss)
summary(gss)

# get complete data (on our independent variables)

cc <- complete.cases(spkcom,afterlif,attend,male,colmslm,pres04,rincome)


####################
# Problem 1

summary(spkcom) # What is: NA's?

attend <- set.base(attend, "never")

m1a <- glm(spkcom ~ afterlif+attend+male, family=binomial(link=logit), subset=cc)
summary(m1a)

m1b <- glm(spkcom ~ afterlif+attend+male, family=quasibinomial(link=logit), subset=cc)
summary(m1b)

m1c <- multinom(spkcom ~ afterlif+attend+male, subset=cc)
summary(m1c)

BOB <- data.frame(afterlif="yes, definitely", male=1, attend="every week")

logistic( predict(m1a, newdata=BOB) )
logistic( predict(m1b, newdata=BOB) )
predict(m1c, newdata=BOB, type="prob")


# Check for the variable statistical significance

m1a1 <- glm(spkcom ~ attend+male, family=binomial(link=logit), subset=cc)
m1a2 <- glm(spkcom ~ afterlif+male, family=binomial(link=logit), subset=cc)
m1a3 <- glm(spkcom ~ afterlif+attend, family=binomial(link=logit), subset=cc)

lrtest(m1a,m1a1)
lrtest(m1a,m1a2)
lrtest(m1a,m1a3)




####################
# Problem 2

summary(colmslm)   # What is: NA's?


m2a <- glm(colmslm ~ afterlif+attend+male, family=binomial(link=logit), subset=cc)
summary(m2a)

m2b <- multinom(colmslm ~ afterlif+attend+male, subset=cc)
summary(m2b)

BOB <- data.frame(afterlif="yes, definitely", male=1, attend="every week")

logistic( predict(m2a, newdata=BOB) )
predict(m2b, newdata=BOB, type="prob")


# Check for the variable statistical significance

m2a1 <- glm(colmslm ~ attend+male, family=binomial(link=logit), subset=cc)
m2a2 <- glm(colmslm ~ afterlif+male, family=binomial(link=logit), subset=cc)
m2a3 <- glm(colmslm ~ afterlif+attend, family=binomial(link=logit), subset=cc)

lrtest(m2a,m2a1)
lrtest(m2a,m2a2)
lrtest(m2a,m2a3)






####################
# Problem 3

pres04  <- set.base(pres04,"didnt vote")
rincome <- set.base(rincome,"refused")

m3a <- glm(colmslm ~ pres04+rincome+male, family=binomial(link=logit), subset=cc)
summary(m3a)

m3b <- multinom(colmslm ~ pres04+rincome+male, subset=cc)
summary(m3b)

BOB <- data.frame(afterlif="yes, definitely", male=1, attend="every week", 
	pres04="kerry", rincome="$10000 - 14999")

logistic( predict(m3a, newdata=BOB) )
predict(m3b, newdata=BOB, type="prob")
predict(m2b, newdata=BOB, type="prob")



# Check for the variable statistical significance

m3a1 <- glm(colmslm ~ rincome+male, family=binomial(link=logit), subset=cc)
m3a2 <- glm(colmslm ~ pres04+male, family=binomial(link=logit), subset=cc)
m3a3 <- glm(colmslm ~ pres04+rincome, family=binomial(link=logit), subset=cc)

lrtest(m3a,m3a1)
lrtest(m3a,m3a2)
lrtest(m3a,m3a3)




####################
# Problem 4

summary(harmgood)

harmgood <- factor(harmgood, c("strongly disagree", "disagree", "neither agree nor disagree", "agree", "strongly agree") )
pray <- set.base(pray,"never")
heaven <- set.base(heaven, "no, definitely not")
cappun <- set.base(cappun,"oppose")

m4a <- polr(harmgood ~ pray+heaven+colsci+cappun, subset=cc)
summary(m4a)




m4a1 <- polr(harmgood ~ heaven+colsci+cappun, subset=cc)
summary(m4a1)
# Compare AICs to see if you can use the reduced model, m4a1. As the
# difference in AICs is greater than 5, there is a statistical
# difference between these two models, so one cannot just use the smaller
# of the two (m4a1).

# BUT: As social scientists, if the pray variable was important to 
#      your theory, it must stay.


BOB <- data.frame(afterlif="yes, definitely", male=1, attend="every week", 
	pres04="kerry", rincome="$10000 - 14999",
	pray="once a day", heaven="yes, definitely", colsci="no", cappun="oppose")

predict(m4a, newdata=BOB)
predict(m4a, newdata=BOB, type="prob")


# Check for the variable statistical significance

m4a1 <- polr(harmgood ~ heaven+colsci+cappun, subset=cc)
m4a2 <- polr(harmgood ~ pray+colsci+cappun, subset=cc)
m4a3 <- polr(harmgood ~ pray+heaven+cappun, subset=cc)
m4a4 <- polr(harmgood ~ pray+heaven+colsci, subset=cc)

lrtest(m4a,m4a1)
lrtest(m4a,m4a2)
lrtest(m4a,m4a3)
lrtest(m4a,m4a4)





####################
# Problem 5


m5a <- multinom(harmgood ~ pray+heaven+colsci+cappun, subset=cc)
ss <- summary(m5a)

names(ss)
x <- ss$coefficients[4,]
s <- ss$standard.errors[4,]
z <- x/s
p <- 2*(1-pnorm(abs(z)))

cbind(x,s,z,p)


# predictions

predict(m5a, newdata=BOB)
predict(m5a, newdata=BOB, type="prob")
predict(m4a, newdata=BOB, type="prob")