##############################
#
# Script: Solutions 10 (assignment10xa.R)
#
##############################

# In case we need these functions:
  logit.inv <- function(x) exp(x)/(1+exp(x))
  logit     <- function(p) log( p/(1-p) )


# Read in the data
  egy <- read.csv("http://www.electoralforensics.org/datasets/egy2011referendum.csv", header=TRUE)
  names(egy)

# Variables of interest:
#    Votes for:
#    . YES
#    . INVALID
#    . VALID
#    . TOTAL
#    . GOV

  v.yes <- egy$YES
  v.inv <- egy$INVALID
  v.val <- egy$VALID
  v.tot <- egy$TOTAL
  gov   <- egy$GOV


# Now, two variables of interest
  p.yes <- v.yes/v.tot
  p.inv <- v.inv/v.tot

# Create this if you are using a Binomial
  y.inv <- cbind(v.inv,v.val)


# Initial model:
  model.1 <- glm(y.inv~p.yes, family=binomial(link=logit) )
  summary(model.1)

  model.2 <- glm(y.inv~p.yes, family=quasibinomial(link=logit) )
  summary(model.2)



# Steal the graph stuff from Sri Lanka (March 22):

  par(mar=c(5,6,2,2)+0.1)
  plot(p.inv~p.yes, las=1, xlim=c(0,1),ylim=c(0,0.02), 
      xlab="Proportion of Vote in Favor",
      ylab="Proportion of Vote Rejected\n")

  newyes <- seq( 0,1,length=100 )                                         # a sequence of 100 numbers b/t 0 and 1
  pr <- predict(model.2, newdata=data.frame(p.yes=newyes), se.fit=TRUE)   # predict
  pest <- logit.inv(pr$fit)                                               # convert back to level units
  ucl  <- pr$fit + 1.96*pr$se.fit                                         # calculate the Upper Confidence Limit
  lcl  <- pr$fit - 1.96*pr$se.fit                                         # calculate the Lower Confidence Limit
  ucl <- logit.inv(ucl)                                                   # convert to level units
  lcl <- logit.inv(lcl)                                                   # convert to level units
  lines(newyes,pest)                                                      # draw the lines on the plot (above)
  lines(newyes,ucl, lty=2, col=2)                                         # draw the upper confidence band
  lines(newyes,lcl, lty=2, col=2)                                         # draw the lower confidence band

  abline(h=logit.inv(mean(logit(p.inv))), col=4,lty=3)                    # draw the mean horizontal line

  legend("bottomright", c("Prediction","95% Confidence Band","Mean Invalid Vote"), col=c(1,2,4), lty=c(1,2,3), bty="n" )
                                                                          # create the legend

# The end