######################### # # File: 20111108.R # Nominal Regression # Ordinal Regression # ######################### # Preamble library(RFS) # Needed for set.base library(nnet) # Needed for multinom library(MASS) # Needed for polr # Let's read in some pseudo data mrk <- read.csv("http://courses.kvasaheim.com/pols6123/data/murakami.csv") names(mrk) attach(mrk) summary(mrk) # book is "favorite book of the three" # Let's get a look at the data plot(mrk) # Not too helpful cor(mrk) # Why only a single value??? # It's modeling time! m1 <- multinom(book ~ gender) # New function, same formula options summary(m1) m2 <- multinom(book ~ gender + age + vote) # Did not converge. So, increase the maximum number of # iterations. This allows more models to be fit. STILL, # some models may not be fittable. These are just bad # models. m2 <- multinom(book ~ gender + age + vote, maxit=500) summary(m2) # Define a person named Bob: BOB <- data.frame(gender="Male", age=37, vote="Republican") # Predict Bob's favorite book predict(m1, newdata=BOB) # Single prediction predict(m1, newdata=BOB, type="p") # All probabilities (usually better) # Prediction plot (as usual) newAge <- seq(1,70, length=500) pr <- predict(m2, newdata=data.frame(age=newAge, gender="Male", vote="Democrat"), type="p") plot( newAge, pr[,"1Q84"], type='l', ylim=c(0,1), las=1, ylab="Probability", xlab="Age", lwd=2 ) lines( newAge, pr[,"Kafka on the Shore"], col=2, lwd=2) lines( newAge, pr[,"Wind-Up Bird"], col=4, lwd=2) # Need a different base? Nope; you never NEED one. However, # here is how you get one if you want book <- set.base(book, base="Wind-Up Bird", data=mrk) # See the effects: m1 <- multinom(book ~ gender) summary(m1) # Done for this part. Let's clean up our memory so we do not have # to worry. detach(mrk) rm(list=ls()) ################################################## # # Ordinal Regression ord <- read.csv("http://courses.kvasaheim.com/pols6123/data/ordwarm.csv") names(ord) attach(ord) summary(ord) # Dependent variable is warmth towards President Obama # Modeling time! m1 <- polr(warm ~ male + age) summary(m1) # Note how to read the tables! The coefficients are in the top table, # and the intercepts (divisions between the categories) is in the # bottom table. # Oops! We forgot to order the ordinal dependent variable warm <- factor(warm, c("SA","A","D","SD") ) #### Specify the ordering! # Now, it's modeling time, part deux! m1 <- polr(warm ~ male + age) summary(m1) # Again, predict for Bob BOB <- data.frame(male="Men", age=37) pr <- predict(m1, newdata=BOB) # Best predicted category pr <- predict(m1, newdata=BOB, type="p") # All probabilities (usually better) # Prediction plots (as usual) # For men (additive model): newAge <- seq(1,100, length=1000) pr <- predict(m1, newdata=data.frame(age=newAge, male="Men"), type="p") plot( newAge, pr[,"SA"], type="l", las=1, ylim=c(0,0.6), ylab="Probability", xlab="Age", lwd=2) lines(newAge, pr[,"A"], lwd=2, col=2) lines(newAge, pr[,"D"], lwd=2, col=3) lines(newAge, pr[,"SD"], lwd=2, col=4) legend("topleft", c("SA","A","D","SD"), lwd=2, col=c(1,2,3,4), bty="n") # For women (additive model): newAge <- seq(1,100, length=1000) pr <- predict(m1, newdata=data.frame(age=newAge, male="Women"), type="p") plot( newAge, pr[,"SA"], type="l", las=1, ylim=c(0,1), ylab="Probability", xlab="Age", lwd=2) lines(newAge, pr[,"A"], lwd=2, col=2) lines(newAge, pr[,"D"], lwd=2, col=3) lines(newAge, pr[,"SD"], lwd=2, col=4) legend("topleft", c("SA","A","D","SD"), lwd=2, col=c(1,2,3,4), bty="n") # Interaction model m2 <- polr(warm ~ male * age) summary(m2) # Predict for Bob (again) BOB <- data.frame(male="Men", age=37) predict(m1, newdata=BOB) predict(m1, newdata=BOB, type="p") # Prediction plots (again) # Males (interaction model): newAge <- seq(1,100, length=1000) pr <- predict(m2, newdata=data.frame(age=newAge, male="Men"), type="p") plot( newAge, pr[,"SA"], type="l", las=1, ylim=c(0,0.6), ylab="Probability", xlab="Age", lwd=2) lines(newAge, pr[,"A"], lwd=2, col=2) lines(newAge, pr[,"D"], lwd=2, col=3) lines(newAge, pr[,"SD"], lwd=2, col=4) legend("topleft", c("SA","A","D","SD"), lwd=2, col=c(1,2,3,4), bty="n") # Women (interaction model): newAge <- seq(1,100, length=1000) pr <- predict(m2, newdata=data.frame(age=newAge, male="Women"), type="p") plot( newAge, pr[,"SA"], type="l", las=1, ylim=c(0,1), ylab="Probability", xlab="Age", lwd=2) lines(newAge, pr[,"A"], lwd=2, col=2) lines(newAge, pr[,"D"], lwd=2, col=3) lines(newAge, pr[,"SD"], lwd=2, col=4) legend("topleft", c("SA","A","D","SD"), lwd=2, col=c(1,2,3,4), bty="n") # Done! # Keep in mind the structures for the tables (and why they are different from usual) # # Keep in mind the two new regression functions: # multinom # polr # # Keep in mind that there is NO NEED to back transform; the predictions are probabilities