########################################
#
#  Script: 24 February 2011 (preliminary)
#
########################################

# Today:
#
#   ** Testing tests
#      . How good is a test?
#


# But first:


# Review questions:

#  . What are odds? How are they related to probabilities?
#   How does one calculate odds? 
#
#  . What are odds ratios? How are they calculated? What
#   do they tell us about relationships?
#


# Practice problem:

#    I recently tabulated the results from my years of teaching
#    high school in Portland, OR. From the table, answer the 
#    questions below.

# Table:

#    Sport:    Soccer    XC    Basketball  Softball
#    Grade: A     15     30        8         6
#           B     21     16       12         8
#           C     10      6       30        25
#       Other     10      8       45        18
#

# Q1: What are the odds that a Soccer player got an A?
# Q2: What are the odds that a XC runner got an A?
# Q3: Which sport player had higher odds of getting an A?
# Q4: Is this a statistically significant result?



# Remember from last time:

# OR are not distributed Normally, but the log of the OR
# are approximately Normal, with (asymptotic) variance
# 1/n11 + 1/n12 + 1/n21 + 1/n22

# Thus we can get a 95% confidence interval this way:

OR  <- ( obs[1,1]*obs[2,2] )/( obs[1,2]*obs[2,1] )
lOR <- log(OR)
vOR <- sum(1/obs)

lnUCL <- lOR + 1.96*sqrt(vOR)     # This should look VERY familiar!
lnLCL <- lOR - 1.96*sqrt(vOR)     # This should look VERY familiar, too!

UCL <- exp(lnUCL)
LCL <- exp(lnLCL)

LCL
UCL

# You should be able to follow the logic of this


# Alternatively, we can calculate the p-value:

OR  <- ( obs[1,1]*obs[2,2] )/( obs[1,2]*obs[2,1] )
lOR <- log(OR)
vOR <- sum(1/obs)

testStatistic <- lOR / sqrt(vOR)

if(testStatistic<0) {   # This picks the correct tail area
  p <- pnorm(testStatistic)
} else {
  p <- 1 - pnorm(testStatistic)
}

p*2  # As this is a two-tailed test

# You should be able to follow the logic of this, too.



# Conclusion?










##################################################



# For testing tests

# How good is the prediction from the test?


# The table looks like:

#        |  Actual
#        |  P    N
#  -----------------
#  P     |
#  r  P  | TP    FP
#  e     |
#  d  N  | FN    TN
#  n     |
#
#
# Some measures of test goodness:
#
#  FPR = FP/(FP+TN)
#  FNR = FN/(FN+TP)
#
#  Accuracy = (TP+TN)/(P+N)
# 
#  Specificity = TN/(FP+TN)
#  Sensitivity = TP/(FN+TP)
#





# Example:

# I devised a model that predicted which politically active 
# nationalist groups would resort to terrorism to achieve
# their goals. The model used 10 variables, including group
# cohesion, level of democracy in the state, and average citizen 
# wealth change. The prediction table for my groups was:

#                    |           Actual
#                    |  Terrorist    Non-Terrorist
#  --------------------------------------------------
#  P                 |
#  r  Terrorist      |     15                2
#  e                 |
#  d  Non-Terrorist  |      3               17
#  n                 |
#
#


# How good was my model?


terror <- matrix(c(15,2, 3,17), byrow=TRUE, nrow=2,
   dimnames=list( Predicted=c("Terrorist", "Non-Terrorist"), Actual=c("Terrorist", "Non-Terrorist") )
   )
terror


FPR <- terror[1,2]/(terror[1,2]+terror[2,2])   # FPR
FNR <- terror[2,1]/(terror[1,1]+terror[2,1])   # FNR
ACC <- (terror[1,1]+terror[2,2])/sum(terror)   # Accuracy

FPR
FNR
ACC

1-FPR                                          # Specificity
1-FNR                                          # Sensitivity




# So, how good is my model?
# Ans: Compared to what?


# Base accuracy:
BAC <- colSums(terror)[2]/sum(terror)          # Base accuracy
BAC
BAC <- as.integer(colSums(terror)[2])/sum(terror)
BAC
RAC <- ACC/BAC                                 # Relative accuracy



# Examples for you to do here:

# Example 1:
terror2 <- matrix(c(45,6, 9,54), byrow=TRUE, nrow=2,
   dimnames=list( Predicted=c("Terrorist", "Non-Terrorist"), Actual=c("Terrorist", "Non-Terrorist") )
   )
terror2





# Example 2:
elisa <- matrix(c(50,10, 20,120), byrow=TRUE, nrow=2,
   dimnames=list( Predicted=c("HIV Positive", "HIV Negative"), Actual=c("HIV Positive", "HIV Negative") )
   )
elisa

sr71 <- matrix(c(60,40, 10,90), byrow=TRUE, nrow=2,
   dimnames=list( Predicted=c("HIV Positive", "HIV Negative"), Actual=c("HIV Positive", "HIV Negative") )
   )
sr71