############################## # # # December 6, 2011 # Topics: # Importing data from other formats # Mid-Semester Review III # # ############################## # Preamble # We will need these packages today library(RFS) library(foreign) # Small practice with the foreign package # There are several file formats R can access, beyond the # non-proprietary, standard csv format. # Stata file format (.dta) data1 <- read.dta("http://courses.kvasaheim.com/pols6123/data/test.dta") summary(data1) # SPSS (PSAW) file format (.sav) data2 <- read.spss("http://courses.kvasaheim.com/pols6123/data/test.sav") summary(data2) # SAS is a more complex file system, but it can also be read by R: # read.xport # read.ssd # Weka file # read.arff # Several different database programs like dBase and FoxPro # read.dbf # etc ... # The documentation for the package is located at: # http://cran.r-project.org/web/packages/foreign/foreign.pdf rm(list=ls()) ################################################## # Activity 3.1: Patrick Henry # Preamble library(RFS) # Read data ph <- read.csv("http://courses.kvasaheim.com/pols6123/data/patrickHenry.csv") attach(ph) # Get to know the data names(ph) summary(ph) head(ph) str(ph) # Preliminary modeling ... what works? m1 <- glm( logit(gpa/4) ~ composite+highschool+gender, family=gaussian(link=identity)) summary(m1) m10 <- glm( gpa/4 ~ composite+highschool+gender, family=Gamma(link=inverse)) summary(m10) highschool <- set.base(highschool, "Public High School") m10 <- glm( gpa ~ composite+highschool+gender, family=Gamma) summary(m10) m3 <- glm( gpa/4 ~ composite+highschool+gender, family=gaussian(link=make.link("logit")) ) summary(m3) m4 <- glm( gpa/4 ~ composite*highschool*gender, family=gaussian(link=make.link("logit")) ) summary(m4) # Predictions for two students Bud <- data.frame(highschool="Public High School", gender="Male", composite=1600) Kelly <- data.frame(highschool="Public High School", gender="Female", composite= 400) logistic( predict(m3, newdata=Bud ) ) *4 # Bud's predicted GPA logistic( predict(m3, newdata=Kelly) ) *4 # Kelly's predicted GPA # Prediction graph newComposite <- seq(400,1600, length=1000) prMV <- 4* logistic( predict(m3, newdata=data.frame(composite=newComposite, gender="Male", highschool="Private High School") ) ) prFV <- 4* logistic( predict(m3, newdata=data.frame(composite=newComposite, gender="Female", highschool="Private High School") ) ) prMU <- 4* logistic( predict(m3, newdata=data.frame(composite=newComposite, gender="Male", highschool="Public High School") ) ) prFU <- 4* logistic( predict(m3, newdata=data.frame(composite=newComposite, gender="Female", highschool="Public High School") ) ) prMH <- 4* logistic( predict(m3, newdata=data.frame(composite=newComposite, gender="Male", highschool="Home School") ) ) prFH <- 4* logistic( predict(m3, newdata=data.frame(composite=newComposite, gender="Female", highschool="Home School") ) ) par(mar=c(4,4,0,0)+0.35) plot(composite, gpa, las=1, ylim=c(0,4), xlim=c(300,1700), xlab="SAT Composite Score", ylab="Student GPA") lines(newComposite,prMV, lwd=2, col=4, lty=1) lines(newComposite,prMU, lwd=2, col=4, lty=2) lines(newComposite,prMH, lwd=2, col=4, lty=3) lines(newComposite,prFV, lwd=2, col=2, lty=1) lines(newComposite,prFU, lwd=2, col=2, lty=2) lines(newComposite,prFH, lwd=2, col=2, lty=3) legend("topleft", legend=c("Male, Public", "Male, Private", "Male, Home", "Female, Public", "Female, Private", "Female, Home"), lwd=2, col=c(4,4,4,2,2,2), lty=1:3, bty="n")