##### Script for Slidedeck eA ### Multiple Linear Regression ### Preamble source("http://rfs.kvasaheim.com/stat200.R") dt = read.csv("http://rfs.kvasaheim.com/data/crime.csv") attach(dt) ito=numeric() ito["Midwest"] = "#1b9e77" ito["Northeast"] = "#d95f02" ito["South"] = "#7570b3" ito["West"] = "#e7298a" ##### ##### ##### ### Example 1 mod1 = lm(vcrime00 ~ urbanp1990*census4) summary.aov(mod1) summary(mod1) predict(mod1, newdata=data.frame(urbanp1990=90, census4="South"), interval="confidence") predict(mod1,data.frame(urbanp1990=90,census4="South"),interval="predict") # Graphic 1: urban xx=seq(30,100,0.01) yx1n = predict(mod1, newdata=data.frame(urbanp1990=xx,census4="Northeast")) yx1m = predict(mod1, newdata=data.frame(urbanp1990=xx,census4="Midwest")) yx1s = predict(mod1, newdata=data.frame(urbanp1990=xx,census4="South")) yx1w = predict(mod1, newdata=data.frame(urbanp1990=xx,census4="West")) par(bg="transparent") par(xaxs="i", yaxs="i", xpd=NA) par(family="serif",las=1) par(mar=c(3.5,4,1,0.5)) par(font.lab=2,cex.lab=1.1) plot.new() plot.window( xlim=c(0,110), ylim=c(0,1600)) axis(1) axis(2, at=0:8*250) title(xlab="Urban Percentage (1990)", line=2.25) title(ylab="Violent Crime Rate (2000)", line=3) lines(xx,yx1n, lwd=2, col=ito["Northeast"]) lines(xx,yx1m, lwd=2, col=ito["Midwest"]) lines(xx,yx1s, lwd=2, col=ito["South"]) lines(xx,yx1w, lwd=2, col=ito["West"]) points(urbanp1990,vcrime00, pch=16, col=ito[census4]) legend("topleft", bty="n", lwd=1, pch=21, pt.bg=ito, col=ito, legend=c("Midwest","Northeast","South","West"), inset=c(0.05,0)) ### Example 2 # Violent Crime using Property Crime and Region mod2 = lm(vcrime00 ~ pcrime90*census4) summary.aov(mod2) mod2a = lm(vcrime00 ~ pcrime90 + census4) summary.aov(mod2a) summary(mod2a) # Graphic 2: Data w. prediction lines for each region xx=seq(0,10000,10) yx2n = predict(mod2a, newdata=data.frame(pcrime90=xx,census4="Northeast")) yx2m = predict(mod2a, newdata=data.frame(pcrime90=xx,census4="Midwest")) yx2s = predict(mod2a, newdata=data.frame(pcrime90=xx,census4="South")) yx2w = predict(mod2a, newdata=data.frame(pcrime90=xx,census4="West")) # graphic par(bg="transparent") par(xaxs="i", yaxs="i") par(family="serif",las=1) par(mar=c(3.5,4,1,0.5)) par(font.lab=2,cex.lab=1.1) plot.new() plot.window( xlim=c(0,10000), ylim=c(0,1600)) axis(1); axis(2, at=0:8*250) title(xlab="Property Crime Rate (1990)", line=2.25) title(ylab="Violent Crime Rate (2000)", line=3.00) lines(xx,yx2n, lwd=2, col=ito["Northeast"]) lines(xx,yx2m, lwd=2, col=ito["Midwest"]) lines(xx,yx2s, lwd=2, col=ito["South"]) lines(xx,yx2w, lwd=2, col=ito["West"]) points(pcrime90,vcrime00, pch=16, col=ito[census4]) legend("topleft", bty="n", lwd=1, pch=21, pt.bg=ito, col=ito, legend=c("Midwest","Northeast","South","West"), inset=c(0.05,0)) ### Example 3 # Violent Crime using Property Crime and Education Level ito2 = c("green4","orange2","red4") waeaRegion = as.numeric( cut(waea90, c(0, as.numeric(quantile(waea90,0.33)), as.numeric(quantile(waea90,0.66)),100 )) ) mod3 = lm(vcrime00 ~ pcrime90 * waea90) summary.aov(mod3) mod3a = lm(vcrime00 ~ pcrime90 + waea90) summary.aov(mod3a) summary(mod3a) # Graphic 3: xx=seq(50,70,1) ## WAEA Based yx3q1 = predict(mod3a, newdata=data.frame(waea90=xx, pcrime90=as.numeric(quantile(pcrime90,0.25)) )) yx3q2 = predict(mod3a, newdata=data.frame(waea90=xx, pcrime90=as.numeric(quantile(pcrime90,0.50)) )) yx3q3 = predict(mod3a, newdata=data.frame(waea90=xx, pcrime90=as.numeric(quantile(pcrime90,0.75)) )) # par(bg="transparent") par(xaxs="i", yaxs="i") par(family="serif",las=1) par(mar=c(3.5,4,0.5,1)) par(font.lab=2,cex.lab=1.1) plot.new() plot.window( xlim=c(50,73), ylim=c(0,1750)) axis(1) axis(2, at=0:8*250) title(xlab="Weighted Average Educational Attainment (1990)", line=2.25) title(ylab="Violent Crime Rate (2000)", line=3.00) lines(xx,yx3q1, lwd=2, col=ito2[1]) lines(xx,yx3q2, lwd=2, col=ito2[2]) lines(xx,yx3q3, lwd=2, col=ito2[3]) points(waea90, vcrime00, pch=16, col=ito2[waeaRegion]) legend("topleft", bty="n", lwd=1, pch=16, col=ito2, legend=c("Quartile 1","Median","Quartile 3"), title="Property Crime Rate", inset=c(0.05,0), title.font=2) ##### Beyond STAT 200 Examples ### Bernoulli: # create data xx = seq(21,100,1) p1 = logit.inv( -0.9 + xx*0.05 ) yy = rbinom(xx, size=1, prob=p1) mod4a = glm(yy~xx, family=binomial) # graphic par(bg="transparent") par(xaxs="i", yaxs="i") par(xpd=NA) par(family="serif",las=1) par(mar=c(3.5,5,0.5,5)) par(font.lab=2,cex.lab=1.1) plot.new() plot.window( xlim=c(0,100), ylim=c(0,1)) axis(1) axis(2, at=0:1, label=c("Uninsured","Insured") ) mtext(side=4, at=0.5, text="Probability of Being Insured", line=3, las=3, font=2, cex=1.2) axis(4) title(xlab="Homeowner Age", line=2.25) title(ylab="Insured for Floods?", line=3.00) points(xx,yy, pch=21, bg="lightblue") lines(xx,logit.inv(predict(mod4a) ), lwd=2, col="black") ### Binomial Count: detach(dt) dt = read.csv("http://rfs.kvasaheim.com/data/uga2011pres.csv") names(dt) total = dt$TOTAL valid = dt$VALID invalid = dt$INVALID pCnd = dt$MUSEVENI/valid pInv = invalid/total depVar = cbind(invalid,valid) newX = seq(0.20,1.00,0.01) mod5a = glm(depVar~pCnd, family=binomial) # graphic par(bg="transparent") par(xaxs="i", yaxs="i") par(xpd=NA) par(family="serif",las=1) par(mar=c(3.5,5,0.5,0.5)) par(font.lab=2,cex.lab=1.1) plot.new() plot.window( xlim=c(0.20,1.00), ylim=c(0,0.2)) axis(1) axis(2) title(xlab="Support for Yoweri Museveni", line=2.25) title(ylab="Invalidation Rate", line=3.25) points(pCnd,pInv, pch=21, bg="red") lines(newX,logit.inv(predict(mod5a, newdata=data.frame(pCnd=newX)) ), lwd=2, col="black") ### Nominal modeling: ### Poisson count: detach(dt) # get data dt = read.csv("http://rfs.kvasaheim.com/data/crime.csv") names(dt) drop = which(dt$inituse<1) dt = dt[-drop,] attach(dt) pop = pop00/1e6 mod6a = glm(inituse~pop * census4, family=poisson) summary(mod6a) newX = seq(0,20,0.01) newM = exp( predict(mod6a, newdata=data.frame(pop=newX, census4="Midwest") ) ) newN = exp( predict(mod6a, newdata=data.frame(pop=newX, census4="Northeast") ) ) newS = exp( predict(mod6a, newdata=data.frame(pop=newX, census4="South") ) ) newW = exp( predict(mod6a, newdata=data.frame(pop=newX, census4="West") ) ) # graphic par(bg="transparent") par(xaxs="i", yaxs="i") par(xpd=NA) par(family="serif",las=1) par(mar=c(3.5,5,0.5,0.5)) par(font.lab=2,cex.lab=1.1) plot.new() plot.window( xlim=c(0,20), ylim=c(0,60)) axis(1) axis(2) title(xlab="State Population (2000) [millions]", line=2.25) title(ylab="Citizen's Initiative Use", line=3.25) lines(newX,newM, lwd=2, col=ito["Midwest"]) lines(newX,newN, lwd=2, col=ito["Northeast"]) lines(newX,newS, lwd=2, col=ito["South"]) lines(newX,newW, lwd=2, col=ito["West"]) points(pop,dt$inituse, pch=21, bg=ito[census4]) legend("topright", bty="n", lwd=1, pch=21, pt.bg=ito, col=ito, legend=c("Midwest","Northeast","South","West"), inset=c(0.05,0)) ### End of File