#####

# Script for Assignment 13

#####


### Standard preamble

ed <- read.csv("sri2010.csv", header=TRUE)
names(ed)
attach(ed)


###
# Problem 13.1

# Method A	###
# This method creates two new variables to act upon

vote.north <- pVoting[province=="Northern Province"]
vote.sabar <- pVoting[province=="Sabaragamuwa Province"]

mean(vote.north)
mean(vote.sabar)

var(vote.north)
var(vote.sabar)

var.test(vote.north, vote.sabar)

t.test(vote.north, vote.sabar)


# Method B	###
# This method does not create the two new variables

mean(pVoting[province=="Northern Province"])
mean(pVoting[province=="Sabaragamuwa Province"])

var(pVoting[province=="Northern Province"])
var(pVoting[province=="Sabaragamuwa Province"])

var.test(pVoting[province=="Northern Province"], 
		pVoting[province=="Sabaragamuwa Province"])

t.test(pVoting[province=="Northern Province"], 
		pVoting[province=="Sabaragamuwa Province"])


# When doing several actions, it is usually better to 
# create the new variables. When doing only one or two
# actions, it will usually be better to not create the 
# new variables.

# Either way is correct. It is a question of readability 
# and speed. Note that I had to do more typing in Method B
# and that I had to split a couple lines to print out this
# script in Method B. 



###
# Problem 13.2

rej.north <- pRejected[province=="Northern Province"]
rej.sabar <- pRejected[province=="Sabaragamuwa Province"]

mean(rej.north)
mean(rej.sabar)
var(rej.north)
var(rej.sabar)
var.test(rej.north, rej.sabar)
t.test(rej.north, rej.sabar)


# Note here that there was no variation in the proportion of 
# votes rejected in Sabaragamuwa Province. This tells us that
# our sample mean is VERY representative of the population
# mean, which indicates that we are very sure about the real
# population mean (although not certain).



###
# Problem 13.3

# We can either go through our data and create a variable telling
# us that Rajapaksa won the province (easiest), or we can do it 
# programmatically (much more difficult). 

# I will be lazy and do it the easy way. So, I am creating a new
# dataset (sri2010b.csv) with that additional variable.

# Make sure to 'unattach' (a.k.a. detach) the previous dataset, or 
# else confusion will reign like a Mississippi polecat!

detach(ed)
ed2 <- read.csv("sri2010b.csv", header=TRUE)
names(ed2)
attach(ed2)

# Now, continuing as before...

rej.raja <- pRejected[rajawon==1]
rej.fons <- pRejected[rajawon==0]

mean(rej.raja)
mean(rej.fons)
var(rej.raja)
var(rej.fons)
var.test(rej.raja, rej.fons)
t.test(rej.raja, rej.fons)


# Note that it was easiest to step out of our statistical package
# and modify the original data (while saving in a new file). This
# is a lesson we should take everywhere: Use the tool that gets you
# the best information the easiest.



###
# Problem 13.4

# Plotting Method A
plot(pRejected ~ pRajapaksa)

# Plotting Method B
plot(pRajapaksa, pRejected)

# Your choice, but the first method is like the modeling we have done in 
# the past (y ~ x), whereas the second is like the typical x,y plots

# Now, to make it look acceptable
plot(pRejected ~ pRajapaksa, xlab="Proportion of Vote for Rajapaksa",
	ylab="Proportion of Vote Rejected" )

# We could even make it look better:
plot(pRejected ~ pRajapaksa, xlab="Proportion of Vote for Rajapaksa",
	ylab="Proportion of Vote Rejected", xlim=c(0,1), ylim=c(0,0.05), 
	las=1, pch=16 )

# Or better
plot(pRejected ~ pRajapaksa, xlab="Proportion of Vote for Rajapaksa",
	ylab="Proportion of Vote Rejected", xlim=c(0,1), ylim=c(0,0.05), 
	las=1, pch=16 )
points(pRejected ~ pRajapaksa, subset=rajawon==0, pch=16, col=4)
points(pRejected ~ pRajapaksa, subset=rajawon==1, pch=16, col=2)

# This last plot gives different colors for the provinces won by 
# Rajapaksa (red) and the provinces won by Fonseka (blue). It may
# make the point better.


# So, now that we have our plot, how do we get it saved? We have two 
# options (the second is best).

# Method 1: Right-click on the plot and select "Save as metafile", then 
# save it as whatever file you want, then normally import it to your
# homework.

# Method 2: use the png() function to save it as a png file. This latter
# method is preferred, since changes to the plot will be automatically
# saved in this method, but will require you to re-click/etc. in the first.

png("plot.png", width=600, height=600)
plot(pRejected ~ pRajapaksa, xlab="Proportion of Vote for Rajapaksa",
	ylab="Proportion of Vote Rejected", xlim=c(0,1), ylim=c(0,0.05), 
	las=1, pch=16 )
points(pRejected ~ pRajapaksa, subset=rajawon==0, pch=15, col=4)
points(pRejected ~ pRajapaksa, subset=rajawon==1, pch=16, col=2)
dev.off()

# The png() function requires you to specify the filename. The width
# and the height parameters are in pixels and are optional. The dev.off() 
# command closes and saves the file. You need both to make it work. Why?

var(pRejected)
var(pRajapaksa)
cov(pRejected, pRajapaksa)
cor(pRejected, pRajapaksa)
cor.test(pRejected, pRajapaksa)

# Note that this correlation test differs from the one in the text (p513).

# To perform the text's test, we merely calculate the correct test statistic
# and compare it to the appropriate distribution, as such: 

r  <- cor(pRejected, pRajapaksa)
n  <- length(pRejected)
W  <- 0.5 * log( (1+r)/(1-r) )
Ws <- 1 / (n-3)
z  <- W/sqrt(Ws/n)

# Thus, our p-value is
pnorm(z)*2

# The conclusions are the same, and usually are.