######################################## # # Script: 22 February 2011 (20110222.R) # ######################################## # Today: # # ** Statistical effect # . Level of effect # # ** Testing tests # . How good is a test? # # But first: # Practice question: # You count 14 students at Eskimo Joe's drinking at noon and # discover that 3 are female. Can we conclude that the number # of males drinking at Eskimo Joe's is statistically # different than the number of females? observed <- c(3,11) chisq.test(observed) binom.test(observed) prop.test(3,14) # Which test is most appropriate? # What is the null hypothesis of each test? # What is our conclusion? # Practice question: # I measured the height of 556 students. Is there # a statistical difference in height between the # two genders? obs <- matrix(c(315,108, 101,32), byrow=TRUE, nrow=2) colnames(obs) <- c("Male","Female") rownames(obs) <- c("Over 6'","Under 6'") obs fisher.test(obs) chisq.test(obs) # Which test is most appropriate? # What is the null hypothesis of each test? # What is our conclusion? ##### Effect size # Let's extend this a bit: # What are the odds that a male is over 6'? # Def: Odds are the ratio of the probability of # a success to the probability of a failure. Odds = (315/416)/(101/416) Odds # Thus, the odds that a male is over 6' # tall is 3.12-to-1. # What is it for females in this group? Odds = 108/32 Odds # How do we compare males and females here? By # using the Odds Ratio. # OR = Odds(male)/Odds(female) # = 3.12 / 3.38 # = 0.923 # This tells us that the odds of a male being over # 6' tall is less than the odds of a female being # over 6' tall by a factor of 0.923. # Is this statistically different from 1? # Well, ... # OR are not distributed Normally, but the log of the OR # are approximately Normal, with (asymptotic) variance # 1/n11 + 1/n12 + 1/n21 + 1/n22 # Thus we can get a 95% confidence interval this way: OR <- ( obs[1,1]*obs[2,2] )/( obs[1,2]*obs[2,1] ) lOR <- log(OR) vOR <- sum(1/obs) lnUCL <- lOR + 1.96*sqrt(vOR) # This should look VERY familiar! lnLCL <- lOR - 1.96*sqrt(vOR) # This should look VERY familiar, too! UCL <- exp(lnUCL) LCL <- exp(lnLCL) LCL UCL # You should be able to follow the logic of this # Alternatively, we can calculate the p-value: OR <- ( obs[1,1]*obs[2,2] )/( obs[1,2]*obs[2,1] ) lOR <- log(OR) vOR <- sum(1/obs) testStatistic <- lOR / sqrt(vOR) if(testStatistic<0) { # This picks the correct tail area p <- pnorm(testStatistic) } else { p <- 1 - pnorm(testStatistic) } p*2 # As this is a two-tailed test # You should be able to follow the logic of this, too. # Conclusion? # UCB Admissions data # Does the UC Berkeley discriminate against # females for graduate admission? (1972) # Here, we have male/female and admitted/ # not admitted for each of six graduate # departments at UCB. Is there evidence of # discrimination? data(UCBAdmissions) UCBAdmissions ftable(UCBAdmissions) dataset <- ftable(UCBAdmissions) marginal <- matrix(rowSums(dataset, 3), byrow=TRUE, nrow=2) rownames(marginal) <- c("Admitted","Rejected") colnames(marginal) <- c("Male","Female") marginal chisq.test(marginal) fisher.test(marginal) # Which is better? Why? # What is our null hypothesis? # What is our conclusion? # Which direction? ftable(UCBAdmissions[,,"A"]) dA <- ftable(UCBAdmissions[,,"A"]) dB <- ftable(UCBAdmissions[,,"B"]) dC <- ftable(UCBAdmissions[,,"C"]) dD <- ftable(UCBAdmissions[,,"D"]) dE <- ftable(UCBAdmissions[,,"E"]) dF <- ftable(UCBAdmissions[,,"F"]) chisq.test(dA) chisq.test(dB) chisq.test(dC) chisq.test(dD) chisq.test(dE) chisq.test(dF) fisher.test(dA) fisher.test(dB) fisher.test(dC) fisher.test(dD) fisher.test(dE) fisher.test(dF) # To see this visually: fourfoldplot(marginal) fourfoldplot(UCBAdmissions) fourfoldplot(UCBAdmissions, mfcol=c(2,3)) fourfoldplot(UCBAdmissions[,,"F"]) fourfoldplot(UCBAdmissions[,,"E"]) fourfoldplot(UCBAdmissions[,,"D"]) fourfoldplot(UCBAdmissions[,,"C"]) fourfoldplot(UCBAdmissions[,,"B"]) fourfoldplot(UCBAdmissions[,,"A"]) ################################################## # For testing tests # How good is the prediction from the test? # The table looks like: # | Actual # | P N # ----------------- # P | # r P | TP FP # e | # d N | FN TN # n | # # # Some measures of test goodness: # # FPR = FP/(FP+TN) # FNR = FN/(FN+TP) # # Accuracy = (TP+TN)/(P+N) # # Specificity = TN/(FP+TN) # Sensitivity = TP/(FN+TP) # # Example: # I devised a model that predicted which politically active # nationalist groups would resort to terrorism to achieve # their goals. The model used 10 variables, including group # cohesion, level of democracy in the state, and average citizen # wealth change. The prediction table for my groups was: # | Actual # | Terrorist Non-Terrorist # -------------------------------------------------- # P | # r Terrorist | 15 2 # e | # d Non-Terrorist | 3 17 # n | # # # How good was my model? terror <- matrix(c(15,2, 3,17), byrow=TRUE, nrow=2, dimnames=list( Predicted=c("Terrorist", "Non-Terrorist"), Actual=c("Terrorist", "Non-Terrorist") ) ) terror FPR <- terror[1,2]/(terror[1,2]+terror[2,2]) # FPR FNR <- terror[2,1]/(terror[1,1]+terror[2,1]) # FNR ACC <- (terror[1,1]+terror[2,2])/sum(terror) # Accuracy FPR FNR ACC # Base accuracy: BAC <- colSums(terror)[2]/sum(terror) # Base accuracy BAC BAC <- as.integer(colSums(terror)[2])/sum(terror) BAC RAC <- ACC/BAC # Relative accuracy 1-FPR # Specificity 1-FNR # Sensitivity # So, how good is my model? ################################################## # Suggested homework: # Write a function that produces the Accuracy, when given a 2x2 table.