############################## # # Script: assignment05a.R # Answers to the fifth # homework assignment # ############################## # Preamble library(RFS) # Read in data ssm <- read.csv("http://courses.kvasaheim.com/pols6123/data/ssm2.csv") names(ssm) summary(ssm) attach(ssm) # A slew of models lglPF <- logit(propFavor) summary( lm(lglPF ~ civilunion * myear ) )$adj.r.squared summary( lm(lglPF ~ civilunion * religiosity ) )$adj.r.squared summary( lm(lglPF ~ civilunion * gspcap ) )$adj.r.squared summary( lm(lglPF ~ civilunion * popdensity ) )$adj.r.squared summary( lm(lglPF ~ civilunion * povertyRank ) )$adj.r.squared summary( lm(lglPF ~ civilunion * povertyRate ) )$adj.r.squared summary( lm(lglPF ~ civilunion * hdi ) )$adj.r.squared summary( lm(lglPF ~ civilunion + myear ) )$adj.r.squared summary( lm(lglPF ~ civilunion + religiosity ) )$adj.r.squared summary( lm(lglPF ~ civilunion + gspcap ) )$adj.r.squared summary( lm(lglPF ~ civilunion + popdensity ) )$adj.r.squared summary( lm(lglPF ~ civilunion + povertyRank ) )$adj.r.squared summary( lm(lglPF ~ civilunion + povertyRate ) )$adj.r.squared summary( lm(lglPF ~ civilunion + hdi ) )$adj.r.squared summary( lm(lglPF ~ south * myear ) )$adj.r.squared summary( lm(lglPF ~ south * religiosity ) )$adj.r.squared summary( lm(lglPF ~ south * gspcap ) )$adj.r.squared summary( lm(lglPF ~ south * popdensity ) )$adj.r.squared summary( lm(lglPF ~ south * povertyRank ) )$adj.r.squared summary( lm(lglPF ~ south * povertyRate ) )$adj.r.squared # 0.7247 summary( lm(lglPF ~ south * hdi ) )$adj.r.squared summary( lm(lglPF ~ south + myear ) )$adj.r.squared summary( lm(lglPF ~ south + religiosity ) )$adj.r.squared summary( lm(lglPF ~ south + gspcap ) )$adj.r.squared summary( lm(lglPF ~ south + popdensity ) )$adj.r.squared summary( lm(lglPF ~ south + povertyRank ) )$adj.r.squared summary( lm(lglPF ~ south + povertyRate ) )$adj.r.squared summary( lm(lglPF ~ south + hdi ) )$adj.r.squared # The one with povertyRank was a teensiest bit better, # but it is generally better to avoid using ranks. Wait, # by 'better' I mean higher adjusted R-squared. model <- lm(lglPF ~ south * povertyRate ) summary(model) # Test assumptions cor(south,povertyRate) # I need to fix this =( stats::cor(south,povertyRate) cor( cbind(south,povertyRate)) # pretty high. This may dilute the effects res <- residuals(model) mean(res) shapiro.test(res) plot(lglPF,res, pch=16) plot(model) # Everything looks good to me # Predict for Washington WA <- data.frame(povertyRate=10.2, south=0) logistic( predict(model, newdata=WA) ) # 0.6511 # Probability of it passing: set.seed(577) t <- 1e6 e.constant <- rnorm(t, m= 1.53015, s=0.35778) e.povertyR <- rnorm(t, m=-0.08886, s=0.03035) prWA <- e.constant + 10.2*e.povertyR prd<- logistic(prWA) png("histogram.png",height=4,width=6,units="in",res=600) par(mar=c(4.3,1,1,1)) hist(prd, breaks=-11:111/100, main="", yaxt="n", ylab="", xlim=c(0,1), xaxs="i", xlab="Vote in Favor of SSM", yaxs="i") hist(prd[prd>0.500], breaks=-11:111/100, main="", yaxt="n", ylab="", xlim=c(0,1), xaxs="i", xlab="", add=TRUE, col=2) # What proportion are wins? length(which(prd>0.500))/t # 0.906 # What is the 95% confidence interval? Q13 <- quantile(prd, c(0.025,0.975)) # (0.424, 0.825) segments(Q13[1],0,Q13[2],0, col=3, lwd=5) segments(Q13[1],0,0,0, col=2, lwd=5) segments(1,0,Q13[2],0, col=2, lwd=5) text(0.3,15000, "p = 0.906") dev.off()