######################### # # File: 20110927.R # # Lab: African Corruption # ######################### # Preamble library(RFS) # Read in data gdp <- read.csv("http://courses.kvasaheim.com/pols6123/data/gdpcap.csv") # Feel data names(gdp) summary(gdp) attach(gdp) plot(gdp) # Create helpful variables and test for Normality AFR <- gdpcap[region=="Africa"] LAT <- gdpcap[region=="Latin America"] shapiro.test(AFR) shapiro.test(LAT) lAFR <- log(AFR) lLAT <- log(LAT) shapiro.test(lAFR) shapiro.test(lLAT) llAFR <- log(lAFR) llLAT <- log(lLAT) shapiro.test(llAFR) shapiro.test(llLAT) # It took two loggings to get Normality. Now, we can use ANOVA # So, (1) t.test(llAFR, mu=5000) # No surprise. (Why?) par(mar=c(1,5,1,1)+0.1) boxplot(AFR, las=1, ylab="GDP per Capita ($)\n", log="y") # And, (2) t.test(llAFR,llLAT) # No surprise. (Why?) par(mar=c(3,5,1,1)+0.1) boxplot(cbind(AFR,LAT), las=1, ylab="GDP per Capita ($)\n", log="y", xaxt="n") axis(1, at=1:2, label=c("Africa","Latin America")) ##### # Now for (3) model <- lm(log(gdpcap) ~ hig + OPEC) summary(model) # Test assumptions res <- residuals(model) mean(res) shapiro.test(res) # Note that if you hadn't transformed the gdpcap, the # Shapiro-Wilks test was reject par(mar=c(5,5,1,1)+0.1) plot(res~log(gdpcap)) # Oy! There's a prominent funnel shape. And, we need to # get rid of it. What to do? Another variable? Add an # interaction? Add a higher power? Transform gdpcap again? # For tonight, let's ignore it. And make our conclusions # based on the summary table summary(model) ################################################## # Additional: Plotting prediction lines # We will do this on Week 7, but you # may want a head start on us. # Let's plot the data and some prediction lines # The data plot(gdpcap~hig, las=1, ylab="GDP per capita (US$)\n", xlab="Honesty in Government", ylim=c(0,150000), xlim=c(1,10) ) # Part of the data in a different color points(gdpcap[OPEC=="Non-Member"]~hig[OPEC=="Non-Member"], pch=16, col="#6666ff") # The other part of the data in a different color points(gdpcap[OPEC=="Member"]~hig[OPEC=="Member"], pch=16, col="#00ff00") # Prediction curves: newHIG <- seq(1,10,length=100) pr1 <- predict(model, newdata=data.frame(hig=newHIG, OPEC="Member")) pr1 <- exp(pr1) lines(newHIG,pr1, col=3, lwd=2) pr0 <- predict(model, newdata=data.frame(hig=newHIG, OPEC="Non-Member")) pr0 <- exp(pr0) lines(newHIG,pr0, col=4, lwd=2) # Ask yourself: What did each part do?