#########################
#
# File: 20111108.R
#       Nominal Regression
#       Ordinal Regression
#
#########################


# Preamble

library(RFS)    # Needed for set.base
library(nnet)   # Needed for multinom
library(MASS)   # Needed for polr


# Let's read in some pseudo data

mrk <- read.csv("http://courses.kvasaheim.com/pols6123/data/murakami.csv")
names(mrk)
attach(mrk)
summary(mrk)


# book is "favorite book of the three"


# Let's get a look at the data

plot(mrk)   # Not too helpful
cor(mrk)    # Why only a single value???



# It's modeling time!

m1 <- multinom(book ~ gender)   # New function, same formula options
summary(m1)


m2 <- multinom(book ~ gender + age + vote)
# Did not converge. So, increase the maximum number of 
# iterations. This allows more models to be fit. STILL,
# some models may not be fittable. These are just bad
# models.


m2 <- multinom(book ~ gender + age + vote, maxit=500)
summary(m2)


# Define a person named Bob:
BOB <- data.frame(gender="Male", age=37, vote="Republican")

# Predict Bob's favorite book
predict(m1, newdata=BOB)             # Single prediction
predict(m1, newdata=BOB, type="p")   # All probabilities (usually better)


# Prediction plot (as usual)

newAge <- seq(1,70, length=500)
pr <- predict(m2, newdata=data.frame(age=newAge, gender="Male", vote="Democrat"), type="p")
plot(  newAge, pr[,"1Q84"], type='l', ylim=c(0,1), las=1, ylab="Probability", xlab="Age", lwd=2 )
lines( newAge, pr[,"Kafka on the Shore"], col=2, lwd=2)
lines( newAge, pr[,"Wind-Up Bird"], col=4, lwd=2)




# Need a different base? Nope; you never NEED one. However, 
# here is how you get one if you want

book <- set.base(book, base="Wind-Up Bird", data=mrk)

# See the effects:
m1 <- multinom(book ~ gender)
summary(m1)



# Done for this part. Let's clean up our memory so we do not have
# to worry.

detach(mrk)
rm(list=ls())







##################################################
#
# Ordinal Regression


ord <- read.csv("http://courses.kvasaheim.com/pols6123/data/ordwarm.csv")
names(ord)
attach(ord)
summary(ord)

# Dependent variable is warmth towards President Obama


# Modeling time!

m1 <- polr(warm ~ male + age)
summary(m1)

# Note how to read the tables! The coefficients are in the top table,
# and the intercepts (divisions between the categories) is in the
# bottom table.


# Oops! We forgot to order the ordinal dependent variable

warm <- factor(warm, c("SA","A","D","SD") )    #### Specify the ordering!



# Now, it's modeling time, part deux!

m1 <- polr(warm ~ male + age)
summary(m1)


# Again, predict for Bob
BOB <- data.frame(male="Men", age=37)
pr  <- predict(m1, newdata=BOB)              # Best predicted category
pr  <- predict(m1, newdata=BOB, type="p")    # All probabilities (usually better)


# Prediction plots (as usual)


# For men (additive model):
newAge <- seq(1,100, length=1000)
pr <- predict(m1, newdata=data.frame(age=newAge, male="Men"), type="p") 
plot( newAge, pr[,"SA"], type="l", las=1, ylim=c(0,0.6), ylab="Probability", xlab="Age", lwd=2)
lines(newAge, pr[,"A"], lwd=2, col=2)
lines(newAge, pr[,"D"], lwd=2, col=3)
lines(newAge, pr[,"SD"], lwd=2, col=4)
legend("topleft", c("SA","A","D","SD"), lwd=2, col=c(1,2,3,4), bty="n")


# For women (additive model):
newAge <- seq(1,100, length=1000)
pr <- predict(m1, newdata=data.frame(age=newAge, male="Women"), type="p")
plot( newAge, pr[,"SA"], type="l", las=1, ylim=c(0,1), 
		ylab="Probability", xlab="Age", lwd=2)
lines(newAge, pr[,"A"], lwd=2, col=2)
lines(newAge, pr[,"D"], lwd=2, col=3)
lines(newAge, pr[,"SD"], lwd=2, col=4)
legend("topleft", c("SA","A","D","SD"), lwd=2, col=c(1,2,3,4), bty="n")



# Interaction model

m2 <- polr(warm ~ male * age)
summary(m2)


# Predict for Bob (again)

BOB <- data.frame(male="Men", age=37)
predict(m1, newdata=BOB)
predict(m1, newdata=BOB, type="p")


# Prediction plots (again)


# Males (interaction model):
newAge <- seq(1,100, length=1000)
pr <- predict(m2, newdata=data.frame(age=newAge, male="Men"), type="p")
plot( newAge, pr[,"SA"], type="l", las=1, ylim=c(0,0.6), ylab="Probability", xlab="Age", lwd=2)
lines(newAge, pr[,"A"], lwd=2, col=2)
lines(newAge, pr[,"D"], lwd=2, col=3)
lines(newAge, pr[,"SD"], lwd=2, col=4)
legend("topleft", c("SA","A","D","SD"), lwd=2, col=c(1,2,3,4), bty="n")



# Women (interaction model):
newAge <- seq(1,100, length=1000)
pr <- predict(m2, newdata=data.frame(age=newAge, male="Women"), type="p")
plot( newAge, pr[,"SA"], type="l", las=1, ylim=c(0,1), ylab="Probability", xlab="Age", lwd=2)
lines(newAge, pr[,"A"], lwd=2, col=2)
lines(newAge, pr[,"D"], lwd=2, col=3)
lines(newAge, pr[,"SD"], lwd=2, col=4)
legend("topleft", c("SA","A","D","SD"), lwd=2, col=c(1,2,3,4), bty="n")



# Done!


# Keep in mind the structures for the tables (and why they are different from usual)
#
# Keep in mind the two new regression functions:
#      multinom
#      polr
#
# Keep in mind that there is NO NEED to back transform; the predictions are probabilities