##### Demonstration Script #4b
##### MATH322
#####
##### The Chi-Square test in action
#####


### Preamble
# Load the data from a URL
dt = read.csv("http://www.electoralforensics.org/datasets/mac2011parl.csv")
attach(dt)

summary(dt)


# Focus on the variable of interest
# and get rid of those darned 0 values
votes = VOTE[ -which(VOTE==0) ]

# Check that we got them all
summary(votes)



### Here is the Benford distribution
d=1:9
log10(1+1/d)


### Get leading digit and make numerical
leadingDigit = as.numeric( substr(votes,1,1) )
summary(leadingDigit)
n=length(leadingDigit)

### Do the test
table(leadingDigit)

chisq.test( c(452,257,175,145,109,94,90,84,61), p=log10(1+1/d) )

# or #
chisq.test( table(leadingDigit), p=log10(1+1/d) )

# The second method keeps typing errors from creeping in. =)




### Conclusion: 
# Since p-value 0.9199 > alpha=0.05, we fail to reject H0.
# This means we did not detect unfairness in this election.
# It does not mean the election was free and fair, nor does
# it mean that the election was fair in the aspect tested. It
# only means we did not detect any unfairness.




### Graphics
# This graphic plots the observed distribution and compares it
# to the hypothesized distribution:


# A basic graphic

plot.new()
plot.window( xlim=c(1,9), ylim=c(0,500) )

points(1:9,table(leadingDigit), col="red", pch=16)
points(1:9,log10(1+1/d)*length(votes), col="blue", pch=16)





# A better graphic

par(xaxs="i",yaxs="i")
par(las=1)

plot.new()
plot.window( xlim=c(0.5,9.5), ylim=c(0,500) )

points(1:9-0.1,table(leadingDigit), col="red", pch="\u23f5")
points(1:9+0.1,log10(1+1/d)*n, col="blue", pch="\u23f4")

axis(1, at=1:9)
axis(2)
abline(h=0)


legend("topright", col=c("red","blue"), pch=c("\u23f5","\u23f4"),	
	legend=c("Observed","Hypothesized"), bty="n")