########################################
#
#  Script: 8 February 2011 (20110208.R)
#
########################################

# Today:
#
#   ** Post-hoc testing
#


# Let us suppose the research hypothesis is that Africa has a lower
# level of honesty in government than any other region.

gdp <- read.csv("http://courses.kvasaheim.com/stat40x3/data/gdpcap.csv", header=TRUE)

par(mar=c(5,7,2,2)+0.1)
boxplot(hig~region, data=gdp, horizontal=TRUE, las=1, xlab="Honesty in Government index")

bartlett.test(hig~region, data=gdp) # As it fails so badly here, I'll skip the Normality test
kruskal.test(hig~region, data=gdp)

# For the fun of it, let us do an ANalysis of Variance test on the data
model <- aov(hig~region, data=gdp)
summary(model)

# So, a statistical difference at the alpha = 0.05 level, BUT:
#   Which is statistically different from which?


# The key is that there are two types of Type I error rate:
#  . Comparison-wise Type I error rate
#  . Experiment-wise Type I error rate

# The former is the alpha value for each test. The latter is the
# 'alpha value' for the entire gamut of tests. When performing but
# one test, there is no difference between the two. When comparing
# multiple tests, the latter is always greater than the former.

# Post hoc tests need to control for the experiment-wise Type I error 
# rate. There are several ways of dong this:

# The Bonferroni Method
#  . Multiply the p-value by the number of tests you are performing.
#    This is rather conservative, but easily calculated.

t.test(gdp$hig[gdp$region=="Western"],gdp$hig[gdp$region=="Other"])
t.test(gdp$hig[gdp$region=="Western"],gdp$hig[gdp$region=="Latin America"])
t.test(gdp$hig[gdp$region=="Western"],gdp$hig[gdp$region=="Islamic"])
t.test(gdp$hig[gdp$region=="Western"],gdp$hig[gdp$region=="Eastern"])
t.test(gdp$hig[gdp$region=="Western"],gdp$hig[gdp$region=="Africa"])

t.test(gdp$hig[gdp$region=="Other"],gdp$hig[gdp$region=="Latin America"])
t.test(gdp$hig[gdp$region=="Other"],gdp$hig[gdp$region=="Islamic"])
t.test(gdp$hig[gdp$region=="Other"],gdp$hig[gdp$region=="Eastern"])
t.test(gdp$hig[gdp$region=="Other"],gdp$hig[gdp$region=="Africa"])

t.test(gdp$hig[gdp$region=="Latin America"],gdp$hig[gdp$region=="Islamic"])
t.test(gdp$hig[gdp$region=="Latin America"],gdp$hig[gdp$region=="Eastern"])
t.test(gdp$hig[gdp$region=="Latin America"],gdp$hig[gdp$region=="Africa"])

t.test(gdp$hig[gdp$region=="Islamic"],gdp$hig[gdp$region=="Eastern"])
t.test(gdp$hig[gdp$region=="Islamic"],gdp$hig[gdp$region=="Africa"])

t.test(gdp$hig[gdp$region=="Eastern"],gdp$hig[gdp$region=="Africa"])

# There are 15 tests, so we multiply each p-value by 15. Alternatively, we could
# divide our alpha by 15 and use that as our new critical value: alpha* = 0.05/15 = 0.00333

# With this as our new critical value, we know the following are statistically different:
#  . Western and Other
#  . Western and Latin America
#  . Western and Islamic
#  . Western and Eastern
#  . Western and Africa

# Thus, there appear to be two groups of regions: 'Western' and 'everything else'. As such, we
# may be able to simplify our model by only having these two groups.

pairwise.t.test(gdp$hig,gdp$region, p.adjust.method="none")          # This does not adjust
pairwise.t.test(gdp$hig,gdp$region, p.adjust.method="bonferroni")    # This uses the Bonferroni method

with(gdp,pairwise.t.test(hig,region, p.adjust.method="bonferroni"))  # This may make it easier to understand  

# The with function allows one to specify a base dataset in those functions that do not have a "data=" parameter.



# Fisher's LSD procedure
#  . Done for multiple t-tests and balanced data
# Calculate one "Least Significant Difference" for the several tests and
# determine that two groups are significantly different if their means are
# farther apart than the calculated LSD.

# LSD = sqrt(2 MSE / n) t[alpha/2]

# Note what this value depends upon.

# Also note that we cannot use this procedure here, as the regions are not balanced. 
# We can, however, use an alternative version of it, which allows for unbalanced 
# groups. 
#
# Here is a function that will calculate the LSD for the two groups and determine
# if they are statistically different at the alpha = 0.05 level.



fisher.LSD <- function(model, conf.level=0.95) {

# A function to perform pairwise t-tests using Fisher's LSD procedure

# Initialize variable
  MSE        <- sum((model$residuals)^2 )/model$df.residual

# Define matrices
  diff  <- matrix(nrow=length(model$xlevels[[1]]),ncol=length(model$xlevels[[1]]))
  LSD   <- matrix(nrow=length(model$xlevels[[1]]),ncol=length(model$xlevels[[1]]))
  means <- matrix(nrow=length(model$xlevels[[1]]),ncol=length(model$xlevels[[1]]))


# Loop through all pairs
  for( i in 1:(length(model$xlevels[[1]])-1) ) {
    for( j in (i+1):length(model$xlevels[[1]]) ) {

      sset1 <- model$xlevels[[1]][i]                     # Defines the first group
      sset2 <- model$xlevels[[1]][j]                     # Defines the second group

      s1 <- model$model[[1]][model$model[[2]]==sset1]    # The data in Group 1
      s2 <- model$model[[1]][model$model[[2]]==sset2]    # The data in Group 2

# Calculate the LSD statistic
      t          <- qt(p=1-(1-conf.level)/2, df=model$df.residual )
      LSD[i,j]   <- t*sqrt(MSE * (1/length(s1) + 1/length(s2)))
      LSD[j,i]   <- t*sqrt(MSE * (1/length(s1) + 1/length(s2)))

# Calculate the difference in means between the two groups
      means[i,j] <- mean(s1)-mean(s2)
      means[j,i] <- mean(s2)-mean(s1)

# Determine if the difference in means is statistically significant
      diff[i,j]  <- abs( mean(s1)-mean(s2) )>LSD[i,j]
      diff[j,i]  <- abs( mean(s2)-mean(s1) )>LSD[j,i]

    } # End j loop
  }   # End i loop

# Tidy up the output a bit
  rownames(diff) <- model$xlevels[[1]]
  colnames(diff) <- model$xlevels[[1]]

  rownames(LSD) <- model$xlevels[[1]]
  colnames(LSD) <- model$xlevels[[1]]

  rownames(means) <- model$xlevels[[1]]
  colnames(means) <- model$xlevels[[1]]

# Define the variable we will return from this function
  item       <- list(diff=diff)
  item$means <- means
  item$LSD   <- LSD

# Return the variable
  return(item) 

} # End function



fisher.LSD(model)  # Here is how you call this function

# Note that the output consists of three tables. Not very pretty, but
# prettying it up will take another function.

# Also note that you can change the alpha value:
fisher.LSD(model, conf.level=0.90)  # alpha = 0.10
fisher.LSD(model, conf.level=0.95)  # alpha = 0.05 (default)
fisher.LSD(model, conf.level=0.975) # alpha = 0.025

# Note that the results are different than the Bonferroni method. This tells us that
# Africa and Eastern may or may not be significantly different. It depends on how we
# do our adjusting. However, we are VERY confident that Western is different from all
# of the others, and has a higher level of honesty in government than any other region

# Note that there is a Fisher's LSD function, but it is important that you start to see
# connections between what we want to compute and how to do it by hand.







# Tukey's HSD procedure

# Fisher's procedure does not set the experiment-wise alpha value perfectly (none of these
# do). Tukey improved upon it and called his procedure "Honestly Significant Difference",
# hence HSD. It does  abetter job of ensuring the experiment-wise Type I error rate is 
# closer to what you expect than does Fisher's LSD procedure. It is not perfect, but it is 
# better.

TukeyHSD(model)  # default alpha = 0.05

# Note the output. The 'diff' column is the difference in the means. The 'lwr' and 'upr'
# columns are the lower and upper confidence bounds (based on your alpha). The final
# column is the p-value associated with the difference in means.

# Note that these results as slightly different than those of the LSD.






# Duncan's Multiple Range Test

# This test realizes that it is easier for means to be significantly different if there are
# mulitple groups between them. That is, it is easier for Africa and Western to be significantly 
# different since there are 4 other regions between them (in terms of ordered means). Duncan's
# MRT adjusts for this. It is very popular in certain disciplines. It is unused in others. 

# In R, to use this function, you will have to download a new package, install it, and attach it.
# As there are other important tests in that package, let us install it. The package's name is

#  agricolae

library(agricolae)

duncan.test(model, "region")              # Standard usage (alpha=0.05)
duncan.test(model, "region", alpha=0.10)  # Set alpha to 0.10    

# Note the outputs.







# Scheffe's test

# The Scheffe' test squares the differences between the means and divides by the typical denominator. This
# test statistic is then compared to (k-1) times the appropriate F value. It was originally designed to
# deal with unbalanced designs, but works well with balanced designs. This test is popular in Education research.

# This test also requires the 'agricolae' library to use.

scheffe.test(model, "region")

# Again, note the output




# This package also performs the LSD test (as above), the HSD test (as above), and the SNK test.
LSD.test(model, "region")
HSD.test(model, "region")
SNK.test(model, "region")

# The nice thing about using a special package is that the format of the tests is very similar.






# The Student-Newman-Keuls test

# The SNK test is usually less conservative than Tukey's HSD test. Where HSD controls the error for
# all comparisons, SNK only controls for those comparisons made. If you use all comparisons, then
# the two tests will be equivalent.


# This test also requires the 'agricolae' library to use.

SNK.test(model, "region")

# Again, note the output





# The key

# The key is that you should use multiple methods for multiple comparisons to get a better feel for how
# different the groups really are. From using all of the above tests, we know that Western is different
# from the others, but Africa/Eastern may not be significantly different.