######################################## # # Script: 8 February 2011 (20110208.R) # ######################################## # Today: # # ** Post-hoc testing # # Let us suppose the research hypothesis is that Africa has a lower # level of honesty in government than any other region. gdp <- read.csv("http://courses.kvasaheim.com/stat40x3/data/gdpcap.csv", header=TRUE) par(mar=c(5,7,2,2)+0.1) boxplot(hig~region, data=gdp, horizontal=TRUE, las=1, xlab="Honesty in Government index") bartlett.test(hig~region, data=gdp) # As it fails so badly here, I'll skip the Normality test kruskal.test(hig~region, data=gdp) # For the fun of it, let us do an ANalysis of Variance test on the data model <- aov(hig~region, data=gdp) summary(model) # So, a statistical difference at the alpha = 0.05 level, BUT: # Which is statistically different from which? # The key is that there are two types of Type I error rate: # . Comparison-wise Type I error rate # . Experiment-wise Type I error rate # The former is the alpha value for each test. The latter is the # 'alpha value' for the entire gamut of tests. When performing but # one test, there is no difference between the two. When comparing # multiple tests, the latter is always greater than the former. # Post hoc tests need to control for the experiment-wise Type I error # rate. There are several ways of dong this: # The Bonferroni Method # . Multiply the p-value by the number of tests you are performing. # This is rather conservative, but easily calculated. t.test(gdp$hig[gdp$region=="Western"],gdp$hig[gdp$region=="Other"]) t.test(gdp$hig[gdp$region=="Western"],gdp$hig[gdp$region=="Latin America"]) t.test(gdp$hig[gdp$region=="Western"],gdp$hig[gdp$region=="Islamic"]) t.test(gdp$hig[gdp$region=="Western"],gdp$hig[gdp$region=="Eastern"]) t.test(gdp$hig[gdp$region=="Western"],gdp$hig[gdp$region=="Africa"]) t.test(gdp$hig[gdp$region=="Other"],gdp$hig[gdp$region=="Latin America"]) t.test(gdp$hig[gdp$region=="Other"],gdp$hig[gdp$region=="Islamic"]) t.test(gdp$hig[gdp$region=="Other"],gdp$hig[gdp$region=="Eastern"]) t.test(gdp$hig[gdp$region=="Other"],gdp$hig[gdp$region=="Africa"]) t.test(gdp$hig[gdp$region=="Latin America"],gdp$hig[gdp$region=="Islamic"]) t.test(gdp$hig[gdp$region=="Latin America"],gdp$hig[gdp$region=="Eastern"]) t.test(gdp$hig[gdp$region=="Latin America"],gdp$hig[gdp$region=="Africa"]) t.test(gdp$hig[gdp$region=="Islamic"],gdp$hig[gdp$region=="Eastern"]) t.test(gdp$hig[gdp$region=="Islamic"],gdp$hig[gdp$region=="Africa"]) t.test(gdp$hig[gdp$region=="Eastern"],gdp$hig[gdp$region=="Africa"]) # There are 15 tests, so we multiply each p-value by 15. Alternatively, we could # divide our alpha by 15 and use that as our new critical value: alpha* = 0.05/15 = 0.00333 # With this as our new critical value, we know the following are statistically different: # . Western and Other # . Western and Latin America # . Western and Islamic # . Western and Eastern # . Western and Africa # Thus, there appear to be two groups of regions: 'Western' and 'everything else'. As such, we # may be able to simplify our model by only having these two groups. pairwise.t.test(gdp$hig,gdp$region, p.adjust.method="none") # This does not adjust pairwise.t.test(gdp$hig,gdp$region, p.adjust.method="bonferroni") # This uses the Bonferroni method with(gdp,pairwise.t.test(hig,region, p.adjust.method="bonferroni")) # This may make it easier to understand # The with function allows one to specify a base dataset in those functions that do not have a "data=" parameter. # Fisher's LSD procedure # . Done for multiple t-tests and balanced data # Calculate one "Least Significant Difference" for the several tests and # determine that two groups are significantly different if their means are # farther apart than the calculated LSD. # LSD = sqrt(2 MSE / n) t[alpha/2] # Note what this value depends upon. # Also note that we cannot use this procedure here, as the regions are not balanced. # We can, however, use an alternative version of it, which allows for unbalanced # groups. # # Here is a function that will calculate the LSD for the two groups and determine # if they are statistically different at the alpha = 0.05 level. fisher.LSD <- function(model, conf.level=0.95) { # A function to perform pairwise t-tests using Fisher's LSD procedure # Initialize variable MSE <- sum((model$residuals)^2 )/model$df.residual # Define matrices diff <- matrix(nrow=length(model$xlevels[[1]]),ncol=length(model$xlevels[[1]])) LSD <- matrix(nrow=length(model$xlevels[[1]]),ncol=length(model$xlevels[[1]])) means <- matrix(nrow=length(model$xlevels[[1]]),ncol=length(model$xlevels[[1]])) # Loop through all pairs for( i in 1:(length(model$xlevels[[1]])-1) ) { for( j in (i+1):length(model$xlevels[[1]]) ) { sset1 <- model$xlevels[[1]][i] # Defines the first group sset2 <- model$xlevels[[1]][j] # Defines the second group s1 <- model$model[[1]][model$model[[2]]==sset1] # The data in Group 1 s2 <- model$model[[1]][model$model[[2]]==sset2] # The data in Group 2 # Calculate the LSD statistic t <- qt(p=1-(1-conf.level)/2, df=model$df.residual ) LSD[i,j] <- t*sqrt(MSE * (1/length(s1) + 1/length(s2))) LSD[j,i] <- t*sqrt(MSE * (1/length(s1) + 1/length(s2))) # Calculate the difference in means between the two groups means[i,j] <- mean(s1)-mean(s2) means[j,i] <- mean(s2)-mean(s1) # Determine if the difference in means is statistically significant diff[i,j] <- abs( mean(s1)-mean(s2) )>LSD[i,j] diff[j,i] <- abs( mean(s2)-mean(s1) )>LSD[j,i] } # End j loop } # End i loop # Tidy up the output a bit rownames(diff) <- model$xlevels[[1]] colnames(diff) <- model$xlevels[[1]] rownames(LSD) <- model$xlevels[[1]] colnames(LSD) <- model$xlevels[[1]] rownames(means) <- model$xlevels[[1]] colnames(means) <- model$xlevels[[1]] # Define the variable we will return from this function item <- list(diff=diff) item$means <- means item$LSD <- LSD # Return the variable return(item) } # End function fisher.LSD(model) # Here is how you call this function # Note that the output consists of three tables. Not very pretty, but # prettying it up will take another function. # Also note that you can change the alpha value: fisher.LSD(model, conf.level=0.90) # alpha = 0.10 fisher.LSD(model, conf.level=0.95) # alpha = 0.05 (default) fisher.LSD(model, conf.level=0.975) # alpha = 0.025 # Note that the results are different than the Bonferroni method. This tells us that # Africa and Eastern may or may not be significantly different. It depends on how we # do our adjusting. However, we are VERY confident that Western is different from all # of the others, and has a higher level of honesty in government than any other region # Note that there is a Fisher's LSD function, but it is important that you start to see # connections between what we want to compute and how to do it by hand. # Tukey's HSD procedure # Fisher's procedure does not set the experiment-wise alpha value perfectly (none of these # do). Tukey improved upon it and called his procedure "Honestly Significant Difference", # hence HSD. It does abetter job of ensuring the experiment-wise Type I error rate is # closer to what you expect than does Fisher's LSD procedure. It is not perfect, but it is # better. TukeyHSD(model) # default alpha = 0.05 # Note the output. The 'diff' column is the difference in the means. The 'lwr' and 'upr' # columns are the lower and upper confidence bounds (based on your alpha). The final # column is the p-value associated with the difference in means. # Note that these results as slightly different than those of the LSD. # Duncan's Multiple Range Test # This test realizes that it is easier for means to be significantly different if there are # mulitple groups between them. That is, it is easier for Africa and Western to be significantly # different since there are 4 other regions between them (in terms of ordered means). Duncan's # MRT adjusts for this. It is very popular in certain disciplines. It is unused in others. # In R, to use this function, you will have to download a new package, install it, and attach it. # As there are other important tests in that package, let us install it. The package's name is # agricolae library(agricolae) duncan.test(model, "region") # Standard usage (alpha=0.05) duncan.test(model, "region", alpha=0.10) # Set alpha to 0.10 # Note the outputs. # Scheffe's test # The Scheffe' test squares the differences between the means and divides by the typical denominator. This # test statistic is then compared to (k-1) times the appropriate F value. It was originally designed to # deal with unbalanced designs, but works well with balanced designs. This test is popular in Education research. # This test also requires the 'agricolae' library to use. scheffe.test(model, "region") # Again, note the output # This package also performs the LSD test (as above), the HSD test (as above), and the SNK test. LSD.test(model, "region") HSD.test(model, "region") SNK.test(model, "region") # The nice thing about using a special package is that the format of the tests is very similar. # The Student-Newman-Keuls test # The SNK test is usually less conservative than Tukey's HSD test. Where HSD controls the error for # all comparisons, SNK only controls for those comparisons made. If you use all comparisons, then # the two tests will be equivalent. # This test also requires the 'agricolae' library to use. SNK.test(model, "region") # Again, note the output # The key # The key is that you should use multiple methods for multiple comparisons to get a better feel for how # different the groups really are. From using all of the above tests, we know that Western is different # from the others, but Africa/Eastern may not be significantly different.