##### # Script for Assignment 13 ##### ### Standard preamble ed <- read.csv("sri2010.csv", header=TRUE) names(ed) attach(ed) ### # Problem 13.1 # Method A ### # This method creates two new variables to act upon vote.north <- pVoting[province=="Northern Province"] vote.sabar <- pVoting[province=="Sabaragamuwa Province"] mean(vote.north) mean(vote.sabar) var(vote.north) var(vote.sabar) var.test(vote.north, vote.sabar) t.test(vote.north, vote.sabar) # Method B ### # This method does not create the two new variables mean(pVoting[province=="Northern Province"]) mean(pVoting[province=="Sabaragamuwa Province"]) var(pVoting[province=="Northern Province"]) var(pVoting[province=="Sabaragamuwa Province"]) var.test(pVoting[province=="Northern Province"], pVoting[province=="Sabaragamuwa Province"]) t.test(pVoting[province=="Northern Province"], pVoting[province=="Sabaragamuwa Province"]) # When doing several actions, it is usually better to # create the new variables. When doing only one or two # actions, it will usually be better to not create the # new variables. # Either way is correct. It is a question of readability # and speed. Note that I had to do more typing in Method B # and that I had to split a couple lines to print out this # script in Method B. ### # Problem 13.2 rej.north <- pRejected[province=="Northern Province"] rej.sabar <- pRejected[province=="Sabaragamuwa Province"] mean(rej.north) mean(rej.sabar) var(rej.north) var(rej.sabar) var.test(rej.north, rej.sabar) t.test(rej.north, rej.sabar) # Note here that there was no variation in the proportion of # votes rejected in Sabaragamuwa Province. This tells us that # our sample mean is VERY representative of the population # mean, which indicates that we are very sure about the real # population mean (although not certain). ### # Problem 13.3 # We can either go through our data and create a variable telling # us that Rajapaksa won the province (easiest), or we can do it # programmatically (much more difficult). # I will be lazy and do it the easy way. So, I am creating a new # dataset (sri2010b.csv) with that additional variable. # Make sure to 'unattach' (a.k.a. detach) the previous dataset, or # else confusion will reign like a Mississippi polecat! detach(ed) ed2 <- read.csv("sri2010b.csv", header=TRUE) names(ed2) attach(ed2) # Now, continuing as before... rej.raja <- pRejected[rajawon==1] rej.fons <- pRejected[rajawon==0] mean(rej.raja) mean(rej.fons) var(rej.raja) var(rej.fons) var.test(rej.raja, rej.fons) t.test(rej.raja, rej.fons) # Note that it was easiest to step out of our statistical package # and modify the original data (while saving in a new file). This # is a lesson we should take everywhere: Use the tool that gets you # the best information the easiest. ### # Problem 13.4 # Plotting Method A plot(pRejected ~ pRajapaksa) # Plotting Method B plot(pRajapaksa, pRejected) # Your choice, but the first method is like the modeling we have done in # the past (y ~ x), whereas the second is like the typical x,y plots # Now, to make it look acceptable plot(pRejected ~ pRajapaksa, xlab="Proportion of Vote for Rajapaksa", ylab="Proportion of Vote Rejected" ) # We could even make it look better: plot(pRejected ~ pRajapaksa, xlab="Proportion of Vote for Rajapaksa", ylab="Proportion of Vote Rejected", xlim=c(0,1), ylim=c(0,0.05), las=1, pch=16 ) # Or better plot(pRejected ~ pRajapaksa, xlab="Proportion of Vote for Rajapaksa", ylab="Proportion of Vote Rejected", xlim=c(0,1), ylim=c(0,0.05), las=1, pch=16 ) points(pRejected ~ pRajapaksa, subset=rajawon==0, pch=16, col=4) points(pRejected ~ pRajapaksa, subset=rajawon==1, pch=16, col=2) # This last plot gives different colors for the provinces won by # Rajapaksa (red) and the provinces won by Fonseka (blue). It may # make the point better. # So, now that we have our plot, how do we get it saved? We have two # options (the second is best). # Method 1: Right-click on the plot and select "Save as metafile", then # save it as whatever file you want, then normally import it to your # homework. # Method 2: use the png() function to save it as a png file. This latter # method is preferred, since changes to the plot will be automatically # saved in this method, but will require you to re-click/etc. in the first. png("plot.png", width=600, height=600) plot(pRejected ~ pRajapaksa, xlab="Proportion of Vote for Rajapaksa", ylab="Proportion of Vote Rejected", xlim=c(0,1), ylim=c(0,0.05), las=1, pch=16 ) points(pRejected ~ pRajapaksa, subset=rajawon==0, pch=15, col=4) points(pRejected ~ pRajapaksa, subset=rajawon==1, pch=16, col=2) dev.off() # The png() function requires you to specify the filename. The width # and the height parameters are in pixels and are optional. The dev.off() # command closes and saves the file. You need both to make it work. Why? var(pRejected) var(pRajapaksa) cov(pRejected, pRajapaksa) cor(pRejected, pRajapaksa) cor.test(pRejected, pRajapaksa) # Note that this correlation test differs from the one in the text (p513). # To perform the text's test, we merely calculate the correct test statistic # and compare it to the appropriate distribution, as such: r <- cor(pRejected, pRajapaksa) n <- length(pRejected) W <- 0.5 * log( (1+r)/(1-r) ) Ws <- 1 / (n-3) z <- W/sqrt(Ws/n) # Thus, our p-value is pnorm(z)*2 # The conclusions are the same, and usually are.